kmeans + naive based
This commit is contained in:
@@ -10,7 +10,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -914,11 +914,162 @@
|
||||
"games_crime_dur = sns.jointplot(data = games_dur, x = \"Year\", y = 'Violent_US')\n",
|
||||
"plt.close(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Kmeans\n",
|
||||
"This executes the kmeans algorithm on the Critic/User scores and total units shipped for video games"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.2907407242071878\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0 2\n",
|
||||
"1 2\n",
|
||||
"2 2\n",
|
||||
"3 2\n",
|
||||
"4 2\n",
|
||||
" ..\n",
|
||||
"23596 1\n",
|
||||
"23597 1\n",
|
||||
"23598 1\n",
|
||||
"23599 1\n",
|
||||
"23600 1\n",
|
||||
"Name: Kmean_Labels, Length: 23601, dtype: int32"
|
||||
]
|
||||
},
|
||||
"execution_count": 75,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.cluster import KMeans\n",
|
||||
"from sklearn import metrics\n",
|
||||
"gammas_train_kmeans = KMeans(n_clusters=10, random_state=420, n_init=\"auto\").fit(\n",
|
||||
" gammas[[\"Critic_Score\", \"User_Score\", \"Total_Shipped\"]]\n",
|
||||
")\n",
|
||||
"gammas_labels = gammas_train_kmeans.labels_\n",
|
||||
"\n",
|
||||
"silh_score = metrics.silhouette_score(\n",
|
||||
" gammas[[\"Critic_Score\", \"User_Score\", \"Total_Shipped\"]],\n",
|
||||
" gammas_labels,\n",
|
||||
" metric=\"euclidean\",\n",
|
||||
")\n",
|
||||
"print(silh_score)\n",
|
||||
"gammas[\"Kmean_Labels\"] = gammas_labels\n",
|
||||
"gammas[\"Kmean_Labels\"]\n",
|
||||
"# print(gammas_train.head())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Splitting"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"# gammas = pd.read_csv(\"../datasets/videogames/games_cleanish.csv\")\n",
|
||||
"gammas_train, gammas_test = train_test_split(gammas, test_size=0.20, random_state=69)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Naive Bayes\n",
|
||||
"Using the classifier on the video game data set"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of mislabeled points out of a total 4721 points : 302\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.naive_bayes import GaussianNB\n",
|
||||
"gnb = GaussianNB()\n",
|
||||
"silly_columns = [\"Critic_Score\", \"User_Score\", \"Total_Shipped\"]\n",
|
||||
"prediction = gnb.fit(X=gammas_train[silly_columns], y=gammas_train[\"Kmean_Labels\"]).predict(gammas_test[silly_columns])\n",
|
||||
"len(prediction)\n",
|
||||
"y_test = gammas_test[\"Kmean_Labels\"]\n",
|
||||
"len(y_test)\n",
|
||||
"print(\"Number of mislabeled points out of a total %d points : %d\"\n",
|
||||
" % (gammas_test.shape[0], (y_test != prediction).sum()))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of mislabeled points out of a total 75 points : 4\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,\n",
|
||||
" 0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 0, 2, 2, 1, 0, 1, 1, 1, 2, 0, 2, 0,\n",
|
||||
" 0, 1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 0, 2, 1, 1, 1,\n",
|
||||
" 1, 2, 0, 0, 2, 1, 0, 0, 1])"
|
||||
]
|
||||
},
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_iris\n",
|
||||
"X, y = load_iris(return_X_y=True)\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n",
|
||||
"gnb = GaussianNB()\n",
|
||||
"y_pred = gnb.fit(X_train, y_train).predict(X_test)\n",
|
||||
"print(\"Number of mislabeled points out of a total %d points : %d\"\n",
|
||||
" % (X_test.shape[0], (y_test != y_pred).sum()))\n",
|
||||
"y_test"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "jewpidor",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -932,12 +1083,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.0"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "c261aea317cc0286b3b3261fbba9abdec21eaa57589985bb7a274bf54d6cc0a7"
|
||||
"hash": "70ce2434745d4d40671ff71d794558676bf30253e5dd946148d83d754be8251d"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user