From 1e552f6c6e342bffb23b227b2c8b8489a7403eb0 Mon Sep 17 00:00:00 2001 From: Supermjork Date: Mon, 15 May 2023 21:49:51 +0300 Subject: [PATCH] DUMDUM PYDOR 2: Electrid DAETh --- dwarves/Mining_HQ.ipynb | 246 ++++++++++++++++++++++++---------------- 1 file changed, 148 insertions(+), 98 deletions(-) diff --git a/dwarves/Mining_HQ.ipynb b/dwarves/Mining_HQ.ipynb index f89ac97..21445fb 100644 --- a/dwarves/Mining_HQ.ipynb +++ b/dwarves/Mining_HQ.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 89, "metadata": {}, "outputs": [], "source": [ @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 90, "metadata": {}, "outputs": [ { @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 91, "metadata": {}, "outputs": [], "source": [ @@ -122,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 92, "metadata": {}, "outputs": [ { @@ -174,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ @@ -191,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 94, "metadata": {}, "outputs": [ { @@ -490,7 +490,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 124, "metadata": {}, "outputs": [ { @@ -517,43 +517,43 @@ "8 1.157204\n", "9 2.335800\n", "Name: Critic_Score_Norm, dtype: float64\n", - " Rank Name Genre ESRB_Rating Platform \\\n", - "53206 53207.0 Capcom Beat 'Em Up Bundle Fighting T XOne \n", - "3500 3501.0 Napoleon: Total War Strategy T PC \n", - "5916 5917.0 Tom Clancy's HAWX 2 Action T X360 \n", - "10144 10145.0 Hot Wheels: Battle Force 5 Racing E10 DS \n", - "13424 13425.0 Wordfish Puzzle E DS \n", + " Rank Name Genre ESRB_Rating \\\n", + "610 611.0 The Elder Scrolls III: Morrowind Role-Playing T \n", + "7004 7005.0 Assassin's Creed II: Discovery Platform T \n", + "1732 1733.0 WWE SmackDown vs Raw 2008 Fighting T \n", + "1590 1591.0 Sonic Heroes Platform E \n", + "9414 9415.0 Brink Shooter T \n", "\n", - " Publisher Developer Critic_Score User_Score \\\n", - "53206 Capcom Capcom 8.053846 5.000000 \n", - "3500 Sega The Creative Assembly 8.500000 8.437968 \n", - "5916 Ubisoft Ubisoft Bucharest 4.750000 9.131273 \n", - "10144 Activision Sidhe Interactive 5.033333 7.557451 \n", - "13424 Ubisoft Ubisoft 5.563636 3.463415 \n", + " Platform Publisher Developer Critic_Score \\\n", + "610 XB Bethesda Softworks Bethesda Softworks 8.2 \n", + "7004 DS Ubisoft Griptonite Games 7.3 \n", + "1732 PS3 THQ Yuke's Media Creations 7.3 \n", + "1590 GC Sega Sonic Team 7.2 \n", + "9414 PC Bethesda Softworks Splash Damage 7.0 \n", "\n", - " Total_Shipped NA_Sales Year bin_Critic_Score bin_value \\\n", - "53206 0.030000 0.00 2018.0 larg 8.5 \n", - "3500 0.667833 0.02 2010.0 larg 8.5 \n", - "5916 0.360000 0.24 2010.0 epik 5.5 \n", - "10144 0.150000 0.12 2009.0 epik 5.5 \n", - "13424 0.080000 0.07 2008.0 epik 5.5 \n", + " User_Score Total_Shipped NA_Sales PAL_Sales Year bin_Critic_Score \\\n", + "610 9.376923 2.86000 2.090 0.63 2002.0 larg \n", + "7004 7.633333 0.28000 0.150 0.11 2009.0 larg \n", + "1732 9.588889 1.32625 0.620 0.50 2007.0 larg \n", + "1590 9.466667 1.42000 0.315 0.24 2004.0 larg \n", + "9414 8.094444 0.18000 0.060 0.09 2011.0 epik \n", "\n", - " Critic_Score_Norm \n", - "53206 0.697422 \n", - "3500 1.073018 \n", - "5916 -2.083936 \n", - "10144 -1.845410 \n", - "13424 -1.398972 \n", + " bin_value Critic_Score_Norm \n", + "610 8.5 0.820462 \n", + "7004 8.5 0.062793 \n", + "1732 8.5 0.062793 \n", + "1590 8.5 -0.021392 \n", + "9414 5.5 -0.189763 \n", " Dissim | Entry 1 | Entry 2 | Entry 3 | Entry 4 | Entry 5 |\n", - "Entry 1 | 0.00000 | 8.75026 | 10.4348 | 10.5905 | 11.0438 |\n", + "Entry 1 | 0.00000 | 8.00716 | 5.56410 | 3.33063 | 10.2811 |\n", "\n", - "Entry 2 | 8.75026 | 0.00000 | 5.80109 | 5.62036 | 7.26823 |\n", + "Entry 2 | 8.00716 | 0.00000 | 3.04825 | 5.45178 | 3.65854 |\n", "\n", - "Entry 3 | 10.4348 | 5.80109 | 0.00000 | 1.91640 | 6.11254 |\n", + "Entry 3 | 5.56410 | 3.04825 | 0.00000 | 3.03339 | 5.40210 |\n", "\n", - "Entry 4 | 10.5905 | 5.62036 | 1.91640 | 0.00000 | 4.27189 |\n", + "Entry 4 | 3.33063 | 5.45178 | 3.03339 | 0.00000 | 7.84707 |\n", "\n", - "Entry 5 | 11.0438 | 7.26823 | 6.11254 | 4.27189 | 0.00000 |\n", + "Entry 5 | 10.2811 | 3.65854 | 5.40210 | 7.84707 | 0.00000 |\n", "\n" ] } @@ -604,7 +604,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 96, "metadata": {}, "outputs": [], "source": [ @@ -644,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 97, "metadata": {}, "outputs": [ { @@ -683,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 98, "metadata": {}, "outputs": [ { @@ -722,7 +722,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 99, "metadata": {}, "outputs": [ { @@ -753,7 +753,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 100, "metadata": {}, "outputs": [ { @@ -784,7 +784,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 101, "metadata": {}, "outputs": [ { @@ -807,7 +807,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 102, "metadata": {}, "outputs": [ { @@ -840,7 +840,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 103, "metadata": {}, "outputs": [ { @@ -877,7 +877,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 104, "metadata": {}, "outputs": [ { @@ -907,7 +907,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 105, "metadata": {}, "outputs": [ { @@ -938,34 +938,34 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 113, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.2907407242071878\n" + "0.27944729305334054\n" ] }, { "data": { "text/plain": [ - "19 2\n", - "20 2\n", - "21 2\n", - "23 2\n", - "24 2\n", + "19 4\n", + "20 4\n", + "21 4\n", + "24 4\n", + "25 4\n", " ..\n", - "55778 1\n", - "55779 1\n", - "55788 1\n", - "55789 1\n", - "55790 1\n", - "Name: Kmean_Labels, Length: 23601, dtype: int32" + "55090 6\n", + "55423 8\n", + "55490 6\n", + "55528 6\n", + "55653 6\n", + "Name: Kmean_Labels, Length: 6116, dtype: int32" ] }, - "execution_count": 45, + "execution_count": 113, "metadata": {}, "output_type": "execute_result" } @@ -999,7 +999,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 107, "metadata": {}, "outputs": [], "source": [ @@ -1019,14 +1019,14 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 108, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of mislabeled points out of a total 4721 points : 302\n" + "Number of mislabeled points out of a total 1224 points : 56\n" ] } ], @@ -1054,43 +1054,80 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 125, "metadata": {}, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier\n", "from sklearn.model_selection import train_test_split # Import train_test_split function\n", "from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation\n", - "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn import preprocessing\n", "from sklearn.compose import make_column_transformer" ] }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 126, "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "no supported conversion for types: (dtype('float64'), dtype('O'))", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[62], line 12\u001b[0m\n\u001b[0;32m 8\u001b[0m predikt_col \u001b[39m=\u001b[39m gammas[\u001b[39m\"\u001b[39m\u001b[39mNA_Sales\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m 10\u001b[0m transformer \u001b[39m=\u001b[39m make_column_transformer((OneHotEncoder(), [\u001b[39m\"\u001b[39m\u001b[39mGenre\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mESRB_Rating\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mPlatform\u001b[39m\u001b[39m\"\u001b[39m]), remainder \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39mpassthrough\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m---> 12\u001b[0m predikt_transform \u001b[39m=\u001b[39m transformer\u001b[39m.\u001b[39;49mfit_transform(gammas)\n\u001b[0;32m 14\u001b[0m predikt_df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(predikt_transform, columns\u001b[39m=\u001b[39mtransformer\u001b[39m.\u001b[39mget_feature_names_out())\n\u001b[0;32m 16\u001b[0m \u001b[39m# Checking dimensions\u001b[39;00m\n", - "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\_set_output.py:140\u001b[0m, in \u001b[0;36m_wrap_method_output..wrapped\u001b[1;34m(self, X, *args, **kwargs)\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[39m@wraps\u001b[39m(f)\n\u001b[0;32m 139\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrapped\u001b[39m(\u001b[39mself\u001b[39m, X, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m--> 140\u001b[0m data_to_wrap \u001b[39m=\u001b[39m f(\u001b[39mself\u001b[39;49m, X, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[0;32m 141\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(data_to_wrap, \u001b[39mtuple\u001b[39m):\n\u001b[0;32m 142\u001b[0m \u001b[39m# only wrap the first output for cross decomposition\u001b[39;00m\n\u001b[0;32m 143\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m 144\u001b[0m _wrap_data_with_container(method, data_to_wrap[\u001b[39m0\u001b[39m], X, \u001b[39mself\u001b[39m),\n\u001b[0;32m 145\u001b[0m \u001b[39m*\u001b[39mdata_to_wrap[\u001b[39m1\u001b[39m:],\n\u001b[0;32m 146\u001b[0m )\n", - "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:751\u001b[0m, in \u001b[0;36mColumnTransformer.fit_transform\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m 748\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_validate_output(Xs)\n\u001b[0;32m 749\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_record_output_indices(Xs)\n\u001b[1;32m--> 751\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_hstack(\u001b[39mlist\u001b[39;49m(Xs))\n", - "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:840\u001b[0m, in \u001b[0;36mColumnTransformer._hstack\u001b[1;34m(self, Xs)\u001b[0m\n\u001b[0;32m 834\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mValueError\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 835\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 836\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mFor a sparse output, all columns should \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 837\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mbe a numeric or convertible to a numeric.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 838\u001b[0m ) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n\u001b[1;32m--> 840\u001b[0m \u001b[39mreturn\u001b[39;00m sparse\u001b[39m.\u001b[39;49mhstack(converted_Xs)\u001b[39m.\u001b[39mtocsr()\n\u001b[0;32m 841\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 842\u001b[0m Xs \u001b[39m=\u001b[39m [f\u001b[39m.\u001b[39mtoarray() \u001b[39mif\u001b[39;00m sparse\u001b[39m.\u001b[39missparse(f) \u001b[39melse\u001b[39;00m f \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m Xs]\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\scipy\\sparse\\_construct.py:535\u001b[0m, in \u001b[0;36mhstack\u001b[1;34m(blocks, format, dtype)\u001b[0m\n\u001b[0;32m 505\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mhstack\u001b[39m(blocks, \u001b[39mformat\u001b[39m\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, dtype\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m):\n\u001b[0;32m 506\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 507\u001b[0m \u001b[39m Stack sparse matrices horizontally (column wise)\u001b[39;00m\n\u001b[0;32m 508\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 533\u001b[0m \n\u001b[0;32m 534\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 535\u001b[0m \u001b[39mreturn\u001b[39;00m bmat([blocks], \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39mformat\u001b[39;49m, dtype\u001b[39m=\u001b[39;49mdtype)\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\scipy\\sparse\\_construct.py:682\u001b[0m, in \u001b[0;36mbmat\u001b[1;34m(blocks, format, dtype)\u001b[0m\n\u001b[0;32m 680\u001b[0m \u001b[39mif\u001b[39;00m dtype \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 681\u001b[0m all_dtypes \u001b[39m=\u001b[39m [blk\u001b[39m.\u001b[39mdtype \u001b[39mfor\u001b[39;00m blk \u001b[39min\u001b[39;00m blocks[block_mask]]\n\u001b[1;32m--> 682\u001b[0m dtype \u001b[39m=\u001b[39m upcast(\u001b[39m*\u001b[39;49mall_dtypes) \u001b[39mif\u001b[39;00m all_dtypes \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 684\u001b[0m row_offsets \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(\u001b[39m0\u001b[39m, np\u001b[39m.\u001b[39mcumsum(brow_lengths))\n\u001b[0;32m 685\u001b[0m col_offsets \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(\u001b[39m0\u001b[39m, np\u001b[39m.\u001b[39mcumsum(bcol_lengths))\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\scipy\\sparse\\_sputils.py:53\u001b[0m, in \u001b[0;36mupcast\u001b[1;34m(*args)\u001b[0m\n\u001b[0;32m 50\u001b[0m _upcast_memo[\u001b[39mhash\u001b[39m(args)] \u001b[39m=\u001b[39m t\n\u001b[0;32m 51\u001b[0m \u001b[39mreturn\u001b[39;00m t\n\u001b[1;32m---> 53\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mno supported conversion for types: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m (args,))\n", - "\u001b[1;31mTypeError\u001b[0m: no supported conversion for types: (dtype('float64'), dtype('O'))" + "name": "stdout", + "output_type": "stream", + "text": [ + " Genre ESRB_Rating Platform bin_Critic_Score\n", + "19 0 3 17 1\n", + "20 0 3 18 1\n", + "21 7 0 2 1\n", + "24 12 0 0 1\n", + "25 13 0 2 1\n", + "... ... ... ... ...\n", + "55090 15 3 14 1\n", + "55423 8 1 12 1\n", + "55490 0 3 12 1\n", + "55528 1 0 12 0\n", + "55653 19 5 18 1\n", + "\n", + "[6116 rows x 4 columns]\n", + "1 3650\n", + "0 2238\n", + "2 228\n", + "Name: bin_Critic_Score, dtype: int64\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:12: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " to_be_nodes[\"Genre\"] = le.fit_transform(gammas[\"Genre\"])\n", + "C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:13: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " to_be_nodes[\"ESRB_Rating\"] = le.fit_transform(gammas[\"ESRB_Rating\"])\n", + "C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:14: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " to_be_nodes[\"Platform\"] = le.fit_transform(gammas[\"Platform\"])\n", + "C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:15: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " to_be_nodes[\"bin_Critic_Score\"] = le.fit_transform(gammas[\"bin_Critic_Score\"])\n" ] } ], "source": [ "# Columnising dataset\n", - "node_cols = [\"Genre\", \"ESRB_Rating\", \"Platform\", \"Critic_Score\", \"User_Score\"]\n", + "node_cols = [\"Genre\", \"ESRB_Rating\", \"Platform\", \"bin_Critic_Score\"]\n", "\n", "# Columns to be considered as nodes\n", "to_be_nodes = gammas[node_cols]\n", @@ -1098,41 +1135,54 @@ "# Attribute to be predicted\n", "predikt_col = gammas[\"NA_Sales\"]\n", "\n", - "transformer = make_column_transformer((OneHotEncoder(), [\"Genre\", \"ESRB_Rating\", \"Platform\"]), remainder = 'passthrough')\n", + "le = preprocessing.LabelEncoder()\n", "\n", - "predikt_transform = transformer.fit_transform(gammas)\n", + "to_be_nodes[\"Genre\"] = le.fit_transform(gammas[\"Genre\"])\n", + "to_be_nodes[\"ESRB_Rating\"] = le.fit_transform(gammas[\"ESRB_Rating\"])\n", + "to_be_nodes[\"Platform\"] = le.fit_transform(gammas[\"Platform\"])\n", "\n", - "predikt_df = pd.DataFrame(predikt_transform, columns=transformer.get_feature_names_out())\n", - "\n", - "# Checking dimensions\n", - "print(to_be_nodes.shape)\n", - "\n", - "predikt_df.head()" + "print(to_be_nodes)\n", + "print(to_be_nodes[\"bin_Critic_Score\"].value_counts())" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "# Chopping down trees\n", + "node_train, node_test, predikt_train, predikt_test = train_test_split(to_be_nodes, predikt_col, test_size = 0.2, random_state =69)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, "metadata": {}, "outputs": [ { "ename": "ValueError", - "evalue": "Found input variables with inconsistent numbers of samples: [18880, 23601]", + "evalue": "Unknown label type: 'continuous'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[50], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m# Chopping down trees\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m node_train, node_test, predikt_train, predikt_test \u001b[39m=\u001b[39m train_test_split(to_be_nodes, predikt_col, test_size \u001b[39m=\u001b[39;49m \u001b[39m0.2\u001b[39;49m, random_state \u001b[39m=\u001b[39;49m\u001b[39m69\u001b[39;49m)\n", - "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\model_selection\\_split.py:2559\u001b[0m, in \u001b[0;36mtrain_test_split\u001b[1;34m(test_size, train_size, random_state, shuffle, stratify, *arrays)\u001b[0m\n\u001b[0;32m 2556\u001b[0m \u001b[39mif\u001b[39;00m n_arrays \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m 2557\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mAt least one array required as input\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m-> 2559\u001b[0m arrays \u001b[39m=\u001b[39m indexable(\u001b[39m*\u001b[39;49marrays)\n\u001b[0;32m 2561\u001b[0m n_samples \u001b[39m=\u001b[39m _num_samples(arrays[\u001b[39m0\u001b[39m])\n\u001b[0;32m 2562\u001b[0m n_train, n_test \u001b[39m=\u001b[39m _validate_shuffle_split(\n\u001b[0;32m 2563\u001b[0m n_samples, test_size, train_size, default_test_size\u001b[39m=\u001b[39m\u001b[39m0.25\u001b[39m\n\u001b[0;32m 2564\u001b[0m )\n", - "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\validation.py:443\u001b[0m, in \u001b[0;36mindexable\u001b[1;34m(*iterables)\u001b[0m\n\u001b[0;32m 424\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Make arrays indexable for cross-validation.\u001b[39;00m\n\u001b[0;32m 425\u001b[0m \n\u001b[0;32m 426\u001b[0m \u001b[39mChecks consistent length, passes through None, and ensures that everything\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 439\u001b[0m \u001b[39m sparse matrix, or dataframe) or `None`.\u001b[39;00m\n\u001b[0;32m 440\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 442\u001b[0m result \u001b[39m=\u001b[39m [_make_indexable(X) \u001b[39mfor\u001b[39;00m X \u001b[39min\u001b[39;00m iterables]\n\u001b[1;32m--> 443\u001b[0m check_consistent_length(\u001b[39m*\u001b[39;49mresult)\n\u001b[0;32m 444\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", - "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\validation.py:397\u001b[0m, in \u001b[0;36mcheck_consistent_length\u001b[1;34m(*arrays)\u001b[0m\n\u001b[0;32m 395\u001b[0m uniques \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39munique(lengths)\n\u001b[0;32m 396\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(uniques) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m--> 397\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 398\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mFound input variables with inconsistent numbers of samples: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 399\u001b[0m \u001b[39m%\u001b[39m [\u001b[39mint\u001b[39m(l) \u001b[39mfor\u001b[39;00m l \u001b[39min\u001b[39;00m lengths]\n\u001b[0;32m 400\u001b[0m )\n", - "\u001b[1;31mValueError\u001b[0m: Found input variables with inconsistent numbers of samples: [18880, 23601]" + "Cell \u001b[1;32mIn[128], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m classifier_obj \u001b[39m=\u001b[39m DecisionTreeClassifier()\n\u001b[1;32m----> 3\u001b[0m classifier_obj \u001b[39m=\u001b[39m classifier_obj\u001b[39m.\u001b[39;49mfit(node_train, predikt_train)\n\u001b[0;32m 5\u001b[0m predikt_result \u001b[39m=\u001b[39m classifier_obj\u001b[39m.\u001b[39mpredict(node_test)\n\u001b[0;32m 7\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mACCURACY FOR MODEL PRE: \u001b[39m\u001b[39m\"\u001b[39m, metrics\u001b[39m.\u001b[39maccuracy_score(predikt_test, predikt_result))\n", + "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\tree\\_classes.py:889\u001b[0m, in \u001b[0;36mDecisionTreeClassifier.fit\u001b[1;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[0;32m 859\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfit\u001b[39m(\u001b[39mself\u001b[39m, X, y, sample_weight\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, check_input\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[0;32m 860\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Build a decision tree classifier from the training set (X, y).\u001b[39;00m\n\u001b[0;32m 861\u001b[0m \n\u001b[0;32m 862\u001b[0m \u001b[39m Parameters\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 886\u001b[0m \u001b[39m Fitted estimator.\u001b[39;00m\n\u001b[0;32m 887\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 889\u001b[0m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mfit(\n\u001b[0;32m 890\u001b[0m X,\n\u001b[0;32m 891\u001b[0m y,\n\u001b[0;32m 892\u001b[0m sample_weight\u001b[39m=\u001b[39;49msample_weight,\n\u001b[0;32m 893\u001b[0m check_input\u001b[39m=\u001b[39;49mcheck_input,\n\u001b[0;32m 894\u001b[0m )\n\u001b[0;32m 895\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\n", + "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\tree\\_classes.py:224\u001b[0m, in \u001b[0;36mBaseDecisionTree.fit\u001b[1;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[0;32m 221\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_outputs_ \u001b[39m=\u001b[39m y\u001b[39m.\u001b[39mshape[\u001b[39m1\u001b[39m]\n\u001b[0;32m 223\u001b[0m \u001b[39mif\u001b[39;00m is_classification:\n\u001b[1;32m--> 224\u001b[0m check_classification_targets(y)\n\u001b[0;32m 225\u001b[0m y \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mcopy(y)\n\u001b[0;32m 227\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclasses_ \u001b[39m=\u001b[39m []\n", + "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\multiclass.py:218\u001b[0m, in \u001b[0;36mcheck_classification_targets\u001b[1;34m(y)\u001b[0m\n\u001b[0;32m 210\u001b[0m y_type \u001b[39m=\u001b[39m type_of_target(y, input_name\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39my\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 211\u001b[0m \u001b[39mif\u001b[39;00m y_type \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m [\n\u001b[0;32m 212\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mbinary\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 213\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mmulticlass\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 216\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mmultilabel-sequences\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 217\u001b[0m ]:\n\u001b[1;32m--> 218\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mUnknown label type: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m y_type)\n", + "\u001b[1;31mValueError\u001b[0m: Unknown label type: 'continuous'" ] } ], "source": [ - "# Chopping down trees\n", - "node_train, node_test, predikt_train, predikt_test = train_test_split(to_be_nodes, predikt_col, test_size = 0.2, random_state =69)" + "classifier_obj = DecisionTreeClassifier()\n", + "\n", + "classifier_obj = classifier_obj.fit(node_train, predikt_train)\n", + "\n", + "predikt_result = classifier_obj.predict(node_test)\n", + "\n", + "print(\"ACCURACY FOR MODEL PRE: \", metrics.accuracy_score(predikt_test, predikt_result))" ] } ],