kmeans + naive based

This commit is contained in:
LinlyBoi
2023-05-15 19:11:38 +03:00
parent 65d268a902
commit 701c3c6a87

View File

@@ -10,7 +10,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -914,11 +914,162 @@
"games_crime_dur = sns.jointplot(data = games_dur, x = \"Year\", y = 'Violent_US')\n", "games_crime_dur = sns.jointplot(data = games_dur, x = \"Year\", y = 'Violent_US')\n",
"plt.close(1)" "plt.close(1)"
] ]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Kmeans\n",
"This executes the kmeans algorithm on the Critic/User scores and total units shipped for video games"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.2907407242071878\n"
]
},
{
"data": {
"text/plain": [
"0 2\n",
"1 2\n",
"2 2\n",
"3 2\n",
"4 2\n",
" ..\n",
"23596 1\n",
"23597 1\n",
"23598 1\n",
"23599 1\n",
"23600 1\n",
"Name: Kmean_Labels, Length: 23601, dtype: int32"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.cluster import KMeans\n",
"from sklearn import metrics\n",
"gammas_train_kmeans = KMeans(n_clusters=10, random_state=420, n_init=\"auto\").fit(\n",
" gammas[[\"Critic_Score\", \"User_Score\", \"Total_Shipped\"]]\n",
")\n",
"gammas_labels = gammas_train_kmeans.labels_\n",
"\n",
"silh_score = metrics.silhouette_score(\n",
" gammas[[\"Critic_Score\", \"User_Score\", \"Total_Shipped\"]],\n",
" gammas_labels,\n",
" metric=\"euclidean\",\n",
")\n",
"print(silh_score)\n",
"gammas[\"Kmean_Labels\"] = gammas_labels\n",
"gammas[\"Kmean_Labels\"]\n",
"# print(gammas_train.head())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Splitting"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"# gammas = pd.read_csv(\"../datasets/videogames/games_cleanish.csv\")\n",
"gammas_train, gammas_test = train_test_split(gammas, test_size=0.20, random_state=69)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Naive Bayes\n",
"Using the classifier on the video game data set"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of mislabeled points out of a total 4721 points : 302\n"
]
}
],
"source": [
"from sklearn.naive_bayes import GaussianNB\n",
"gnb = GaussianNB()\n",
"silly_columns = [\"Critic_Score\", \"User_Score\", \"Total_Shipped\"]\n",
"prediction = gnb.fit(X=gammas_train[silly_columns], y=gammas_train[\"Kmean_Labels\"]).predict(gammas_test[silly_columns])\n",
"len(prediction)\n",
"y_test = gammas_test[\"Kmean_Labels\"]\n",
"len(y_test)\n",
"print(\"Number of mislabeled points out of a total %d points : %d\"\n",
" % (gammas_test.shape[0], (y_test != prediction).sum()))\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of mislabeled points out of a total 75 points : 4\n"
]
},
{
"data": {
"text/plain": [
"array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,\n",
" 0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 0, 2, 2, 1, 0, 1, 1, 1, 2, 0, 2, 0,\n",
" 0, 1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 0, 2, 1, 1, 1,\n",
" 1, 2, 0, 0, 2, 1, 0, 0, 1])"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.datasets import load_iris\n",
"X, y = load_iris(return_X_y=True)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n",
"gnb = GaussianNB()\n",
"y_pred = gnb.fit(X_train, y_train).predict(X_test)\n",
"print(\"Number of mislabeled points out of a total %d points : %d\"\n",
" % (X_test.shape[0], (y_test != y_pred).sum()))\n",
"y_test"
]
} }
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "jewpidor",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -932,12 +1083,12 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.0" "version": "3.10.9"
}, },
"orig_nbformat": 4, "orig_nbformat": 4,
"vscode": { "vscode": {
"interpreter": { "interpreter": {
"hash": "c261aea317cc0286b3b3261fbba9abdec21eaa57589985bb7a274bf54d6cc0a7" "hash": "70ce2434745d4d40671ff71d794558676bf30253e5dd946148d83d754be8251d"
} }
} }
}, },