DUMDUM PYDOR 2: Electrid DAETh

This commit is contained in:
2023-05-15 21:49:51 +03:00
parent 5cf650e9dc
commit 1e552f6c6e

View File

@@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
@@ -39,7 +39,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 90,
"metadata": {},
"outputs": [
{
@@ -100,7 +100,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
@@ -122,7 +122,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 92,
"metadata": {},
"outputs": [
{
@@ -174,7 +174,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
@@ -191,7 +191,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 94,
"metadata": {},
"outputs": [
{
@@ -490,7 +490,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 124,
"metadata": {},
"outputs": [
{
@@ -517,43 +517,43 @@
"8 1.157204\n",
"9 2.335800\n",
"Name: Critic_Score_Norm, dtype: float64\n",
" Rank Name Genre ESRB_Rating Platform \\\n",
"53206 53207.0 Capcom Beat 'Em Up Bundle Fighting T XOne \n",
"3500 3501.0 Napoleon: Total War Strategy T PC \n",
"5916 5917.0 Tom Clancy's HAWX 2 Action T X360 \n",
"10144 10145.0 Hot Wheels: Battle Force 5 Racing E10 DS \n",
"13424 13425.0 Wordfish Puzzle E DS \n",
" Rank Name Genre ESRB_Rating \\\n",
"610 611.0 The Elder Scrolls III: Morrowind Role-Playing T \n",
"7004 7005.0 Assassin's Creed II: Discovery Platform T \n",
"1732 1733.0 WWE SmackDown vs Raw 2008 Fighting T \n",
"1590 1591.0 Sonic Heroes Platform E \n",
"9414 9415.0 Brink Shooter T \n",
"\n",
" Publisher Developer Critic_Score User_Score \\\n",
"53206 Capcom Capcom 8.053846 5.000000 \n",
"3500 Sega The Creative Assembly 8.500000 8.437968 \n",
"5916 Ubisoft Ubisoft Bucharest 4.750000 9.131273 \n",
"10144 Activision Sidhe Interactive 5.033333 7.557451 \n",
"13424 Ubisoft Ubisoft 5.563636 3.463415 \n",
" Platform Publisher Developer Critic_Score \\\n",
"610 XB Bethesda Softworks Bethesda Softworks 8.2 \n",
"7004 DS Ubisoft Griptonite Games 7.3 \n",
"1732 PS3 THQ Yuke's Media Creations 7.3 \n",
"1590 GC Sega Sonic Team 7.2 \n",
"9414 PC Bethesda Softworks Splash Damage 7.0 \n",
"\n",
" Total_Shipped NA_Sales Year bin_Critic_Score bin_value \\\n",
"53206 0.030000 0.00 2018.0 larg 8.5 \n",
"3500 0.667833 0.02 2010.0 larg 8.5 \n",
"5916 0.360000 0.24 2010.0 epik 5.5 \n",
"10144 0.150000 0.12 2009.0 epik 5.5 \n",
"13424 0.080000 0.07 2008.0 epik 5.5 \n",
" User_Score Total_Shipped NA_Sales PAL_Sales Year bin_Critic_Score \\\n",
"610 9.376923 2.86000 2.090 0.63 2002.0 larg \n",
"7004 7.633333 0.28000 0.150 0.11 2009.0 larg \n",
"1732 9.588889 1.32625 0.620 0.50 2007.0 larg \n",
"1590 9.466667 1.42000 0.315 0.24 2004.0 larg \n",
"9414 8.094444 0.18000 0.060 0.09 2011.0 epik \n",
"\n",
" Critic_Score_Norm \n",
"53206 0.697422 \n",
"3500 1.073018 \n",
"5916 -2.083936 \n",
"10144 -1.845410 \n",
"13424 -1.398972 \n",
" bin_value Critic_Score_Norm \n",
"610 8.5 0.820462 \n",
"7004 8.5 0.062793 \n",
"1732 8.5 0.062793 \n",
"1590 8.5 -0.021392 \n",
"9414 5.5 -0.189763 \n",
" Dissim | Entry 1 | Entry 2 | Entry 3 | Entry 4 | Entry 5 |\n",
"Entry 1 | 0.00000 | 8.75026 | 10.4348 | 10.5905 | 11.0438 |\n",
"Entry 1 | 0.00000 | 8.00716 | 5.56410 | 3.33063 | 10.2811 |\n",
"\n",
"Entry 2 | 8.75026 | 0.00000 | 5.80109 | 5.62036 | 7.26823 |\n",
"Entry 2 | 8.00716 | 0.00000 | 3.04825 | 5.45178 | 3.65854 |\n",
"\n",
"Entry 3 | 10.4348 | 5.80109 | 0.00000 | 1.91640 | 6.11254 |\n",
"Entry 3 | 5.56410 | 3.04825 | 0.00000 | 3.03339 | 5.40210 |\n",
"\n",
"Entry 4 | 10.5905 | 5.62036 | 1.91640 | 0.00000 | 4.27189 |\n",
"Entry 4 | 3.33063 | 5.45178 | 3.03339 | 0.00000 | 7.84707 |\n",
"\n",
"Entry 5 | 11.0438 | 7.26823 | 6.11254 | 4.27189 | 0.00000 |\n",
"Entry 5 | 10.2811 | 3.65854 | 5.40210 | 7.84707 | 0.00000 |\n",
"\n"
]
}
@@ -604,7 +604,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
@@ -644,7 +644,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 97,
"metadata": {},
"outputs": [
{
@@ -683,7 +683,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 98,
"metadata": {},
"outputs": [
{
@@ -722,7 +722,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 99,
"metadata": {},
"outputs": [
{
@@ -753,7 +753,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 100,
"metadata": {},
"outputs": [
{
@@ -784,7 +784,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 101,
"metadata": {},
"outputs": [
{
@@ -807,7 +807,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 102,
"metadata": {},
"outputs": [
{
@@ -840,7 +840,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 103,
"metadata": {},
"outputs": [
{
@@ -877,7 +877,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 104,
"metadata": {},
"outputs": [
{
@@ -907,7 +907,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 105,
"metadata": {},
"outputs": [
{
@@ -938,34 +938,34 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 113,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.2907407242071878\n"
"0.27944729305334054\n"
]
},
{
"data": {
"text/plain": [
"19 2\n",
"20 2\n",
"21 2\n",
"23 2\n",
"24 2\n",
"19 4\n",
"20 4\n",
"21 4\n",
"24 4\n",
"25 4\n",
" ..\n",
"55778 1\n",
"55779 1\n",
"55788 1\n",
"55789 1\n",
"55790 1\n",
"Name: Kmean_Labels, Length: 23601, dtype: int32"
"55090 6\n",
"55423 8\n",
"55490 6\n",
"55528 6\n",
"55653 6\n",
"Name: Kmean_Labels, Length: 6116, dtype: int32"
]
},
"execution_count": 45,
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
@@ -999,7 +999,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
@@ -1019,14 +1019,14 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 108,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of mislabeled points out of a total 4721 points : 302\n"
"Number of mislabeled points out of a total 1224 points : 56\n"
]
}
],
@@ -1054,43 +1054,80 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 125,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier\n",
"from sklearn.model_selection import train_test_split # Import train_test_split function\n",
"from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn import preprocessing\n",
"from sklearn.compose import make_column_transformer"
]
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 126,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "no supported conversion for types: (dtype('float64'), dtype('O'))",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[62], line 12\u001b[0m\n\u001b[0;32m 8\u001b[0m predikt_col \u001b[39m=\u001b[39m gammas[\u001b[39m\"\u001b[39m\u001b[39mNA_Sales\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m 10\u001b[0m transformer \u001b[39m=\u001b[39m make_column_transformer((OneHotEncoder(), [\u001b[39m\"\u001b[39m\u001b[39mGenre\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mESRB_Rating\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mPlatform\u001b[39m\u001b[39m\"\u001b[39m]), remainder \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39mpassthrough\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m---> 12\u001b[0m predikt_transform \u001b[39m=\u001b[39m transformer\u001b[39m.\u001b[39;49mfit_transform(gammas)\n\u001b[0;32m 14\u001b[0m predikt_df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(predikt_transform, columns\u001b[39m=\u001b[39mtransformer\u001b[39m.\u001b[39mget_feature_names_out())\n\u001b[0;32m 16\u001b[0m \u001b[39m# Checking dimensions\u001b[39;00m\n",
"File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\_set_output.py:140\u001b[0m, in \u001b[0;36m_wrap_method_output.<locals>.wrapped\u001b[1;34m(self, X, *args, **kwargs)\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[39m@wraps\u001b[39m(f)\n\u001b[0;32m 139\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrapped\u001b[39m(\u001b[39mself\u001b[39m, X, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m--> 140\u001b[0m data_to_wrap \u001b[39m=\u001b[39m f(\u001b[39mself\u001b[39;49m, X, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[0;32m 141\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(data_to_wrap, \u001b[39mtuple\u001b[39m):\n\u001b[0;32m 142\u001b[0m \u001b[39m# only wrap the first output for cross decomposition\u001b[39;00m\n\u001b[0;32m 143\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m 144\u001b[0m _wrap_data_with_container(method, data_to_wrap[\u001b[39m0\u001b[39m], X, \u001b[39mself\u001b[39m),\n\u001b[0;32m 145\u001b[0m \u001b[39m*\u001b[39mdata_to_wrap[\u001b[39m1\u001b[39m:],\n\u001b[0;32m 146\u001b[0m )\n",
"File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:751\u001b[0m, in \u001b[0;36mColumnTransformer.fit_transform\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m 748\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_validate_output(Xs)\n\u001b[0;32m 749\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_record_output_indices(Xs)\n\u001b[1;32m--> 751\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_hstack(\u001b[39mlist\u001b[39;49m(Xs))\n",
"File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:840\u001b[0m, in \u001b[0;36mColumnTransformer._hstack\u001b[1;34m(self, Xs)\u001b[0m\n\u001b[0;32m 834\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mValueError\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 835\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 836\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mFor a sparse output, all columns should \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 837\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mbe a numeric or convertible to a numeric.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 838\u001b[0m ) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n\u001b[1;32m--> 840\u001b[0m \u001b[39mreturn\u001b[39;00m sparse\u001b[39m.\u001b[39;49mhstack(converted_Xs)\u001b[39m.\u001b[39mtocsr()\n\u001b[0;32m 841\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 842\u001b[0m Xs \u001b[39m=\u001b[39m [f\u001b[39m.\u001b[39mtoarray() \u001b[39mif\u001b[39;00m sparse\u001b[39m.\u001b[39missparse(f) \u001b[39melse\u001b[39;00m f \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m Xs]\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\scipy\\sparse\\_construct.py:535\u001b[0m, in \u001b[0;36mhstack\u001b[1;34m(blocks, format, dtype)\u001b[0m\n\u001b[0;32m 505\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mhstack\u001b[39m(blocks, \u001b[39mformat\u001b[39m\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, dtype\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m):\n\u001b[0;32m 506\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 507\u001b[0m \u001b[39m Stack sparse matrices horizontally (column wise)\u001b[39;00m\n\u001b[0;32m 508\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 533\u001b[0m \n\u001b[0;32m 534\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 535\u001b[0m \u001b[39mreturn\u001b[39;00m bmat([blocks], \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39mformat\u001b[39;49m, dtype\u001b[39m=\u001b[39;49mdtype)\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\scipy\\sparse\\_construct.py:682\u001b[0m, in \u001b[0;36mbmat\u001b[1;34m(blocks, format, dtype)\u001b[0m\n\u001b[0;32m 680\u001b[0m \u001b[39mif\u001b[39;00m dtype \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 681\u001b[0m all_dtypes \u001b[39m=\u001b[39m [blk\u001b[39m.\u001b[39mdtype \u001b[39mfor\u001b[39;00m blk \u001b[39min\u001b[39;00m blocks[block_mask]]\n\u001b[1;32m--> 682\u001b[0m dtype \u001b[39m=\u001b[39m upcast(\u001b[39m*\u001b[39;49mall_dtypes) \u001b[39mif\u001b[39;00m all_dtypes \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 684\u001b[0m row_offsets \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(\u001b[39m0\u001b[39m, np\u001b[39m.\u001b[39mcumsum(brow_lengths))\n\u001b[0;32m 685\u001b[0m col_offsets \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(\u001b[39m0\u001b[39m, np\u001b[39m.\u001b[39mcumsum(bcol_lengths))\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\scipy\\sparse\\_sputils.py:53\u001b[0m, in \u001b[0;36mupcast\u001b[1;34m(*args)\u001b[0m\n\u001b[0;32m 50\u001b[0m _upcast_memo[\u001b[39mhash\u001b[39m(args)] \u001b[39m=\u001b[39m t\n\u001b[0;32m 51\u001b[0m \u001b[39mreturn\u001b[39;00m t\n\u001b[1;32m---> 53\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mno supported conversion for types: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m (args,))\n",
"\u001b[1;31mTypeError\u001b[0m: no supported conversion for types: (dtype('float64'), dtype('O'))"
"name": "stdout",
"output_type": "stream",
"text": [
" Genre ESRB_Rating Platform bin_Critic_Score\n",
"19 0 3 17 1\n",
"20 0 3 18 1\n",
"21 7 0 2 1\n",
"24 12 0 0 1\n",
"25 13 0 2 1\n",
"... ... ... ... ...\n",
"55090 15 3 14 1\n",
"55423 8 1 12 1\n",
"55490 0 3 12 1\n",
"55528 1 0 12 0\n",
"55653 19 5 18 1\n",
"\n",
"[6116 rows x 4 columns]\n",
"1 3650\n",
"0 2238\n",
"2 228\n",
"Name: bin_Critic_Score, dtype: int64\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:12: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" to_be_nodes[\"Genre\"] = le.fit_transform(gammas[\"Genre\"])\n",
"C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:13: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" to_be_nodes[\"ESRB_Rating\"] = le.fit_transform(gammas[\"ESRB_Rating\"])\n",
"C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:14: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" to_be_nodes[\"Platform\"] = le.fit_transform(gammas[\"Platform\"])\n",
"C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" to_be_nodes[\"bin_Critic_Score\"] = le.fit_transform(gammas[\"bin_Critic_Score\"])\n"
]
}
],
"source": [
"# Columnising dataset\n",
"node_cols = [\"Genre\", \"ESRB_Rating\", \"Platform\", \"Critic_Score\", \"User_Score\"]\n",
"node_cols = [\"Genre\", \"ESRB_Rating\", \"Platform\", \"bin_Critic_Score\"]\n",
"\n",
"# Columns to be considered as nodes\n",
"to_be_nodes = gammas[node_cols]\n",
@@ -1098,41 +1135,54 @@
"# Attribute to be predicted\n",
"predikt_col = gammas[\"NA_Sales\"]\n",
"\n",
"transformer = make_column_transformer((OneHotEncoder(), [\"Genre\", \"ESRB_Rating\", \"Platform\"]), remainder = 'passthrough')\n",
"le = preprocessing.LabelEncoder()\n",
"\n",
"predikt_transform = transformer.fit_transform(gammas)\n",
"to_be_nodes[\"Genre\"] = le.fit_transform(gammas[\"Genre\"])\n",
"to_be_nodes[\"ESRB_Rating\"] = le.fit_transform(gammas[\"ESRB_Rating\"])\n",
"to_be_nodes[\"Platform\"] = le.fit_transform(gammas[\"Platform\"])\n",
"\n",
"predikt_df = pd.DataFrame(predikt_transform, columns=transformer.get_feature_names_out())\n",
"\n",
"# Checking dimensions\n",
"print(to_be_nodes.shape)\n",
"\n",
"predikt_df.head()"
"print(to_be_nodes)\n",
"print(to_be_nodes[\"bin_Critic_Score\"].value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
"# Chopping down trees\n",
"node_train, node_test, predikt_train, predikt_test = train_test_split(to_be_nodes, predikt_col, test_size = 0.2, random_state =69)"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Found input variables with inconsistent numbers of samples: [18880, 23601]",
"evalue": "Unknown label type: 'continuous'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[50], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m# Chopping down trees\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m node_train, node_test, predikt_train, predikt_test \u001b[39m=\u001b[39m train_test_split(to_be_nodes, predikt_col, test_size \u001b[39m=\u001b[39;49m \u001b[39m0.2\u001b[39;49m, random_state \u001b[39m=\u001b[39;49m\u001b[39m69\u001b[39;49m)\n",
"File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\model_selection\\_split.py:2559\u001b[0m, in \u001b[0;36mtrain_test_split\u001b[1;34m(test_size, train_size, random_state, shuffle, stratify, *arrays)\u001b[0m\n\u001b[0;32m 2556\u001b[0m \u001b[39mif\u001b[39;00m n_arrays \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m 2557\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mAt least one array required as input\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m-> 2559\u001b[0m arrays \u001b[39m=\u001b[39m indexable(\u001b[39m*\u001b[39;49marrays)\n\u001b[0;32m 2561\u001b[0m n_samples \u001b[39m=\u001b[39m _num_samples(arrays[\u001b[39m0\u001b[39m])\n\u001b[0;32m 2562\u001b[0m n_train, n_test \u001b[39m=\u001b[39m _validate_shuffle_split(\n\u001b[0;32m 2563\u001b[0m n_samples, test_size, train_size, default_test_size\u001b[39m=\u001b[39m\u001b[39m0.25\u001b[39m\n\u001b[0;32m 2564\u001b[0m )\n",
"File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\validation.py:443\u001b[0m, in \u001b[0;36mindexable\u001b[1;34m(*iterables)\u001b[0m\n\u001b[0;32m 424\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Make arrays indexable for cross-validation.\u001b[39;00m\n\u001b[0;32m 425\u001b[0m \n\u001b[0;32m 426\u001b[0m \u001b[39mChecks consistent length, passes through None, and ensures that everything\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 439\u001b[0m \u001b[39m sparse matrix, or dataframe) or `None`.\u001b[39;00m\n\u001b[0;32m 440\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 442\u001b[0m result \u001b[39m=\u001b[39m [_make_indexable(X) \u001b[39mfor\u001b[39;00m X \u001b[39min\u001b[39;00m iterables]\n\u001b[1;32m--> 443\u001b[0m check_consistent_length(\u001b[39m*\u001b[39;49mresult)\n\u001b[0;32m 444\u001b[0m \u001b[39mreturn\u001b[39;00m result\n",
"File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\validation.py:397\u001b[0m, in \u001b[0;36mcheck_consistent_length\u001b[1;34m(*arrays)\u001b[0m\n\u001b[0;32m 395\u001b[0m uniques \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39munique(lengths)\n\u001b[0;32m 396\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(uniques) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m--> 397\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 398\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mFound input variables with inconsistent numbers of samples: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 399\u001b[0m \u001b[39m%\u001b[39m [\u001b[39mint\u001b[39m(l) \u001b[39mfor\u001b[39;00m l \u001b[39min\u001b[39;00m lengths]\n\u001b[0;32m 400\u001b[0m )\n",
"\u001b[1;31mValueError\u001b[0m: Found input variables with inconsistent numbers of samples: [18880, 23601]"
"Cell \u001b[1;32mIn[128], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m classifier_obj \u001b[39m=\u001b[39m DecisionTreeClassifier()\n\u001b[1;32m----> 3\u001b[0m classifier_obj \u001b[39m=\u001b[39m classifier_obj\u001b[39m.\u001b[39;49mfit(node_train, predikt_train)\n\u001b[0;32m 5\u001b[0m predikt_result \u001b[39m=\u001b[39m classifier_obj\u001b[39m.\u001b[39mpredict(node_test)\n\u001b[0;32m 7\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mACCURACY FOR MODEL PRE: \u001b[39m\u001b[39m\"\u001b[39m, metrics\u001b[39m.\u001b[39maccuracy_score(predikt_test, predikt_result))\n",
"File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\tree\\_classes.py:889\u001b[0m, in \u001b[0;36mDecisionTreeClassifier.fit\u001b[1;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[0;32m 859\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfit\u001b[39m(\u001b[39mself\u001b[39m, X, y, sample_weight\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, check_input\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[0;32m 860\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Build a decision tree classifier from the training set (X, y).\u001b[39;00m\n\u001b[0;32m 861\u001b[0m \n\u001b[0;32m 862\u001b[0m \u001b[39m Parameters\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 886\u001b[0m \u001b[39m Fitted estimator.\u001b[39;00m\n\u001b[0;32m 887\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 889\u001b[0m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mfit(\n\u001b[0;32m 890\u001b[0m X,\n\u001b[0;32m 891\u001b[0m y,\n\u001b[0;32m 892\u001b[0m sample_weight\u001b[39m=\u001b[39;49msample_weight,\n\u001b[0;32m 893\u001b[0m check_input\u001b[39m=\u001b[39;49mcheck_input,\n\u001b[0;32m 894\u001b[0m )\n\u001b[0;32m 895\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\n",
"File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\tree\\_classes.py:224\u001b[0m, in \u001b[0;36mBaseDecisionTree.fit\u001b[1;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[0;32m 221\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_outputs_ \u001b[39m=\u001b[39m y\u001b[39m.\u001b[39mshape[\u001b[39m1\u001b[39m]\n\u001b[0;32m 223\u001b[0m \u001b[39mif\u001b[39;00m is_classification:\n\u001b[1;32m--> 224\u001b[0m check_classification_targets(y)\n\u001b[0;32m 225\u001b[0m y \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mcopy(y)\n\u001b[0;32m 227\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclasses_ \u001b[39m=\u001b[39m []\n",
"File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\multiclass.py:218\u001b[0m, in \u001b[0;36mcheck_classification_targets\u001b[1;34m(y)\u001b[0m\n\u001b[0;32m 210\u001b[0m y_type \u001b[39m=\u001b[39m type_of_target(y, input_name\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39my\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 211\u001b[0m \u001b[39mif\u001b[39;00m y_type \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m [\n\u001b[0;32m 212\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mbinary\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 213\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mmulticlass\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 216\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mmultilabel-sequences\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 217\u001b[0m ]:\n\u001b[1;32m--> 218\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mUnknown label type: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m y_type)\n",
"\u001b[1;31mValueError\u001b[0m: Unknown label type: 'continuous'"
]
}
],
"source": [
"# Chopping down trees\n",
"node_train, node_test, predikt_train, predikt_test = train_test_split(to_be_nodes, predikt_col, test_size = 0.2, random_state =69)"
"classifier_obj = DecisionTreeClassifier()\n",
"\n",
"classifier_obj = classifier_obj.fit(node_train, predikt_train)\n",
"\n",
"predikt_result = classifier_obj.predict(node_test)\n",
"\n",
"print(\"ACCURACY FOR MODEL PRE: \", metrics.accuracy_score(predikt_test, predikt_result))"
]
}
],