DUMDUM PYDOR 2: Electrid DAETh

2023-05-15 21:49:51 +03:00
parent 5cf650e9dc
commit 1e552f6c6e
1 changed files with 148 additions and 98 deletions
--- a/dwarves/Mining_HQ.ipynb
+++ b/dwarves/Mining_HQ.ipynb
@@ -11,7 +11,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -39,7 +39,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
@@ -100,7 +100,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -122,7 +122,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
@@ -174,7 +174,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -191,7 +191,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
@@ -490,7 +490,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
@@ -517,43 +517,43 @@
      "8    1.157204\n",
      "9    2.335800\n",
      "Name: Critic_Score_Norm, dtype: float64\n",
-      "          Rank                        Name     Genre ESRB_Rating Platform  \\\n",
-      "53206  53207.0   Capcom Beat 'Em Up Bundle  Fighting           T     XOne   \n",
-      "3500    3501.0         Napoleon: Total War  Strategy           T       PC   \n",
-      "5916    5917.0         Tom Clancy's HAWX 2    Action           T     X360   \n",
-      "10144  10145.0  Hot Wheels: Battle Force 5    Racing         E10       DS   \n",
-      "13424  13425.0                    Wordfish    Puzzle           E       DS   \n",
+      "        Rank                              Name         Genre ESRB_Rating  \\\n",
+      "610    611.0  The Elder Scrolls III: Morrowind  Role-Playing           T   \n",
+      "7004  7005.0    Assassin's Creed II: Discovery      Platform           T   \n",
+      "1732  1733.0         WWE SmackDown vs Raw 2008      Fighting           T   \n",
+      "1590  1591.0                      Sonic Heroes      Platform           E   \n",
+      "9414  9415.0                             Brink       Shooter           T   \n",
      "\n",
-      "        Publisher              Developer  Critic_Score  User_Score  \\\n",
-      "53206      Capcom                 Capcom      8.053846    5.000000   \n",
-      "3500         Sega  The Creative Assembly      8.500000    8.437968   \n",
-      "5916      Ubisoft      Ubisoft Bucharest      4.750000    9.131273   \n",
-      "10144  Activision      Sidhe Interactive      5.033333    7.557451   \n",
-      "13424     Ubisoft                Ubisoft      5.563636    3.463415   \n",
+      "     Platform           Publisher               Developer  Critic_Score  \\\n",
+      "610        XB  Bethesda Softworks      Bethesda Softworks           8.2   \n",
+      "7004       DS             Ubisoft        Griptonite Games           7.3   \n",
+      "1732      PS3                 THQ  Yuke's Media Creations           7.3   \n",
+      "1590       GC                Sega              Sonic Team           7.2   \n",
+      "9414       PC  Bethesda Softworks           Splash Damage           7.0   \n",
      "\n",
-      "       Total_Shipped  NA_Sales    Year bin_Critic_Score  bin_value  \\\n",
-      "53206       0.030000      0.00  2018.0             larg        8.5   \n",
-      "3500        0.667833      0.02  2010.0             larg        8.5   \n",
-      "5916        0.360000      0.24  2010.0             epik        5.5   \n",
-      "10144       0.150000      0.12  2009.0             epik        5.5   \n",
-      "13424       0.080000      0.07  2008.0             epik        5.5   \n",
+      "      User_Score  Total_Shipped  NA_Sales  PAL_Sales    Year bin_Critic_Score  \\\n",
+      "610     9.376923        2.86000     2.090       0.63  2002.0             larg   \n",
+      "7004    7.633333        0.28000     0.150       0.11  2009.0             larg   \n",
+      "1732    9.588889        1.32625     0.620       0.50  2007.0             larg   \n",
+      "1590    9.466667        1.42000     0.315       0.24  2004.0             larg   \n",
+      "9414    8.094444        0.18000     0.060       0.09  2011.0             epik   \n",
      "\n",
-      "       Critic_Score_Norm  \n",
-      "53206           0.697422  \n",
-      "3500            1.073018  \n",
-      "5916           -2.083936  \n",
-      "10144          -1.845410  \n",
-      "13424          -1.398972  \n",
+      "      bin_value  Critic_Score_Norm  \n",
+      "610         8.5           0.820462  \n",
+      "7004        8.5           0.062793  \n",
+      "1732        8.5           0.062793  \n",
+      "1590        8.5          -0.021392  \n",
+      "9414        5.5          -0.189763  \n",
      "  Dissim  |  Entry 1 | Entry 2 | Entry 3 | Entry 4 | Entry 5 |\n",
-      "Entry  1  |  0.00000 | 8.75026 | 10.4348 | 10.5905 | 11.0438 |\n",
+      "Entry  1  |  0.00000 | 8.00716 | 5.56410 | 3.33063 | 10.2811 |\n",
      "\n",
-      "Entry  2  |  8.75026 | 0.00000 | 5.80109 | 5.62036 | 7.26823 |\n",
+      "Entry  2  |  8.00716 | 0.00000 | 3.04825 | 5.45178 | 3.65854 |\n",
      "\n",
-      "Entry  3  |  10.4348 | 5.80109 | 0.00000 | 1.91640 | 6.11254 |\n",
+      "Entry  3  |  5.56410 | 3.04825 | 0.00000 | 3.03339 | 5.40210 |\n",
      "\n",
-      "Entry  4  |  10.5905 | 5.62036 | 1.91640 | 0.00000 | 4.27189 |\n",
+      "Entry  4  |  3.33063 | 5.45178 | 3.03339 | 0.00000 | 7.84707 |\n",
      "\n",
-      "Entry  5  |  11.0438 | 7.26823 | 6.11254 | 4.27189 | 0.00000 |\n",
+      "Entry  5  |  10.2811 | 3.65854 | 5.40210 | 7.84707 | 0.00000 |\n",
      "\n"
     ]
    }
@@ -604,7 +604,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -644,7 +644,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
@@ -683,7 +683,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
@@ -722,7 +722,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
@@ -753,7 +753,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
@@ -784,7 +784,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
@@ -807,7 +807,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
@@ -840,7 +840,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
@@ -877,7 +877,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
@@ -907,7 +907,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
@@ -938,34 +938,34 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.2907407242071878\n"
+      "0.27944729305334054\n"
     ]
    },
    {
     "data": {
      "text/plain": [
-       "19       2\n",
-       "20       2\n",
-       "21       2\n",
-       "23       2\n",
-       "24       2\n",
+       "19       4\n",
+       "20       4\n",
+       "21       4\n",
+       "24       4\n",
+       "25       4\n",
       "        ..\n",
-       "55778    1\n",
-       "55779    1\n",
-       "55788    1\n",
-       "55789    1\n",
-       "55790    1\n",
-       "Name: Kmean_Labels, Length: 23601, dtype: int32"
+       "55090    6\n",
+       "55423    8\n",
+       "55490    6\n",
+       "55528    6\n",
+       "55653    6\n",
+       "Name: Kmean_Labels, Length: 6116, dtype: int32"
      ]
     },
-     "execution_count": 45,
+     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -999,7 +999,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1019,14 +1019,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Number of mislabeled points out of a total 4721 points : 302\n"
+      "Number of mislabeled points out of a total 1224 points : 56\n"
     ]
    }
   ],
@@ -1054,43 +1054,80 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier\n",
    "from sklearn.model_selection import train_test_split # Import train_test_split function\n",
    "from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation\n",
-    "from sklearn.preprocessing import OneHotEncoder\n",
+    "from sklearn import preprocessing\n",
    "from sklearn.compose import make_column_transformer"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
-     "ename": "TypeError",
-     "evalue": "no supported conversion for types: (dtype('float64'), dtype('O'))",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[62], line 12\u001b[0m\n\u001b[0;32m      8\u001b[0m predikt_col \u001b[39m=\u001b[39m gammas[\u001b[39m\"\u001b[39m\u001b[39mNA_Sales\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m     10\u001b[0m transformer \u001b[39m=\u001b[39m make_column_transformer((OneHotEncoder(), [\u001b[39m\"\u001b[39m\u001b[39mGenre\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mESRB_Rating\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mPlatform\u001b[39m\u001b[39m\"\u001b[39m]), remainder \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39mpassthrough\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m---> 12\u001b[0m predikt_transform \u001b[39m=\u001b[39m transformer\u001b[39m.\u001b[39;49mfit_transform(gammas)\n\u001b[0;32m     14\u001b[0m predikt_df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(predikt_transform, columns\u001b[39m=\u001b[39mtransformer\u001b[39m.\u001b[39mget_feature_names_out())\n\u001b[0;32m     16\u001b[0m \u001b[39m# Checking dimensions\u001b[39;00m\n",
-      "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\_set_output.py:140\u001b[0m, in \u001b[0;36m_wrap_method_output.<locals>.wrapped\u001b[1;34m(self, X, *args, **kwargs)\u001b[0m\n\u001b[0;32m    138\u001b[0m \u001b[39m@wraps\u001b[39m(f)\n\u001b[0;32m    139\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrapped\u001b[39m(\u001b[39mself\u001b[39m, X, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m--> 140\u001b[0m     data_to_wrap \u001b[39m=\u001b[39m f(\u001b[39mself\u001b[39;49m, X, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[0;32m    141\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(data_to_wrap, \u001b[39mtuple\u001b[39m):\n\u001b[0;32m    142\u001b[0m         \u001b[39m# only wrap the first output for cross decomposition\u001b[39;00m\n\u001b[0;32m    143\u001b[0m         \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m    144\u001b[0m             _wrap_data_with_container(method, data_to_wrap[\u001b[39m0\u001b[39m], X, \u001b[39mself\u001b[39m),\n\u001b[0;32m    145\u001b[0m             \u001b[39m*\u001b[39mdata_to_wrap[\u001b[39m1\u001b[39m:],\n\u001b[0;32m    146\u001b[0m         )\n",
-      "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:751\u001b[0m, in \u001b[0;36mColumnTransformer.fit_transform\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m    748\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_validate_output(Xs)\n\u001b[0;32m    749\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_record_output_indices(Xs)\n\u001b[1;32m--> 751\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_hstack(\u001b[39mlist\u001b[39;49m(Xs))\n",
-      "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:840\u001b[0m, in \u001b[0;36mColumnTransformer._hstack\u001b[1;34m(self, Xs)\u001b[0m\n\u001b[0;32m    834\u001b[0m     \u001b[39mexcept\u001b[39;00m \u001b[39mValueError\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m    835\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m    836\u001b[0m             \u001b[39m\"\u001b[39m\u001b[39mFor a sparse output, all columns should \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    837\u001b[0m             \u001b[39m\"\u001b[39m\u001b[39mbe a numeric or convertible to a numeric.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    838\u001b[0m         ) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n\u001b[1;32m--> 840\u001b[0m     \u001b[39mreturn\u001b[39;00m sparse\u001b[39m.\u001b[39;49mhstack(converted_Xs)\u001b[39m.\u001b[39mtocsr()\n\u001b[0;32m    841\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    842\u001b[0m     Xs \u001b[39m=\u001b[39m [f\u001b[39m.\u001b[39mtoarray() \u001b[39mif\u001b[39;00m sparse\u001b[39m.\u001b[39missparse(f) \u001b[39melse\u001b[39;00m f \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m Xs]\n",
-      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\scipy\\sparse\\_construct.py:535\u001b[0m, in \u001b[0;36mhstack\u001b[1;34m(blocks, format, dtype)\u001b[0m\n\u001b[0;32m    505\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mhstack\u001b[39m(blocks, \u001b[39mformat\u001b[39m\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, dtype\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m):\n\u001b[0;32m    506\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m    507\u001b[0m \u001b[39m    Stack sparse matrices horizontally (column wise)\u001b[39;00m\n\u001b[0;32m    508\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    533\u001b[0m \n\u001b[0;32m    534\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 535\u001b[0m     \u001b[39mreturn\u001b[39;00m bmat([blocks], \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39mformat\u001b[39;49m, dtype\u001b[39m=\u001b[39;49mdtype)\n",
-      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\scipy\\sparse\\_construct.py:682\u001b[0m, in \u001b[0;36mbmat\u001b[1;34m(blocks, format, dtype)\u001b[0m\n\u001b[0;32m    680\u001b[0m \u001b[39mif\u001b[39;00m dtype \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m    681\u001b[0m     all_dtypes \u001b[39m=\u001b[39m [blk\u001b[39m.\u001b[39mdtype \u001b[39mfor\u001b[39;00m blk \u001b[39min\u001b[39;00m blocks[block_mask]]\n\u001b[1;32m--> 682\u001b[0m     dtype \u001b[39m=\u001b[39m upcast(\u001b[39m*\u001b[39;49mall_dtypes) \u001b[39mif\u001b[39;00m all_dtypes \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m    684\u001b[0m row_offsets \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(\u001b[39m0\u001b[39m, np\u001b[39m.\u001b[39mcumsum(brow_lengths))\n\u001b[0;32m    685\u001b[0m col_offsets \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(\u001b[39m0\u001b[39m, np\u001b[39m.\u001b[39mcumsum(bcol_lengths))\n",
-      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\scipy\\sparse\\_sputils.py:53\u001b[0m, in \u001b[0;36mupcast\u001b[1;34m(*args)\u001b[0m\n\u001b[0;32m     50\u001b[0m         _upcast_memo[\u001b[39mhash\u001b[39m(args)] \u001b[39m=\u001b[39m t\n\u001b[0;32m     51\u001b[0m         \u001b[39mreturn\u001b[39;00m t\n\u001b[1;32m---> 53\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mno supported conversion for types: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m (args,))\n",
-      "\u001b[1;31mTypeError\u001b[0m: no supported conversion for types: (dtype('float64'), dtype('O'))"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "       Genre  ESRB_Rating  Platform  bin_Critic_Score\n",
+      "19         0            3        17                 1\n",
+      "20         0            3        18                 1\n",
+      "21         7            0         2                 1\n",
+      "24        12            0         0                 1\n",
+      "25        13            0         2                 1\n",
+      "...      ...          ...       ...               ...\n",
+      "55090     15            3        14                 1\n",
+      "55423      8            1        12                 1\n",
+      "55490      0            3        12                 1\n",
+      "55528      1            0        12                 0\n",
+      "55653     19            5        18                 1\n",
+      "\n",
+      "[6116 rows x 4 columns]\n",
+      "1    3650\n",
+      "0    2238\n",
+      "2     228\n",
+      "Name: bin_Critic_Score, dtype: int64\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:12: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  to_be_nodes[\"Genre\"] = le.fit_transform(gammas[\"Genre\"])\n",
+      "C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:13: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  to_be_nodes[\"ESRB_Rating\"] = le.fit_transform(gammas[\"ESRB_Rating\"])\n",
+      "C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:14: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  to_be_nodes[\"Platform\"] = le.fit_transform(gammas[\"Platform\"])\n",
+      "C:\\Users\\hellom\\AppData\\Local\\Temp\\ipykernel_7948\\1163177163.py:15: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  to_be_nodes[\"bin_Critic_Score\"] = le.fit_transform(gammas[\"bin_Critic_Score\"])\n"
     ]
    }
   ],
   "source": [
    "# Columnising dataset\n",
-    "node_cols = [\"Genre\", \"ESRB_Rating\", \"Platform\", \"Critic_Score\", \"User_Score\"]\n",
+    "node_cols = [\"Genre\", \"ESRB_Rating\", \"Platform\", \"bin_Critic_Score\"]\n",
    "\n",
    "# Columns to be considered as nodes\n",
    "to_be_nodes = gammas[node_cols]\n",
@@ -1098,41 +1135,54 @@
    "# Attribute to be predicted\n",
    "predikt_col = gammas[\"NA_Sales\"]\n",
    "\n",
-    "transformer = make_column_transformer((OneHotEncoder(), [\"Genre\", \"ESRB_Rating\", \"Platform\"]), remainder = 'passthrough')\n",
+    "le = preprocessing.LabelEncoder()\n",
    "\n",
-    "predikt_transform = transformer.fit_transform(gammas)\n",
+    "to_be_nodes[\"Genre\"] = le.fit_transform(gammas[\"Genre\"])\n",
+    "to_be_nodes[\"ESRB_Rating\"] = le.fit_transform(gammas[\"ESRB_Rating\"])\n",
+    "to_be_nodes[\"Platform\"] = le.fit_transform(gammas[\"Platform\"])\n",
    "\n",
-    "predikt_df = pd.DataFrame(predikt_transform, columns=transformer.get_feature_names_out())\n",
-    "\n",
-    "# Checking dimensions\n",
-    "print(to_be_nodes.shape)\n",
-    "\n",
-    "predikt_df.head()"
+    "print(to_be_nodes)\n",
+    "print(to_be_nodes[\"bin_Critic_Score\"].value_counts())"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 127,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Chopping down trees\n",
+    "node_train, node_test, predikt_train, predikt_test = train_test_split(to_be_nodes, predikt_col, test_size = 0.2, random_state =69)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
-     "evalue": "Found input variables with inconsistent numbers of samples: [18880, 23601]",
+     "evalue": "Unknown label type: 'continuous'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[50], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[39m# Chopping down trees\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m node_train, node_test, predikt_train, predikt_test \u001b[39m=\u001b[39m train_test_split(to_be_nodes, predikt_col, test_size \u001b[39m=\u001b[39;49m \u001b[39m0.2\u001b[39;49m, random_state \u001b[39m=\u001b[39;49m\u001b[39m69\u001b[39;49m)\n",
-      "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\model_selection\\_split.py:2559\u001b[0m, in \u001b[0;36mtrain_test_split\u001b[1;34m(test_size, train_size, random_state, shuffle, stratify, *arrays)\u001b[0m\n\u001b[0;32m   2556\u001b[0m \u001b[39mif\u001b[39;00m n_arrays \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m   2557\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mAt least one array required as input\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m-> 2559\u001b[0m arrays \u001b[39m=\u001b[39m indexable(\u001b[39m*\u001b[39;49marrays)\n\u001b[0;32m   2561\u001b[0m n_samples \u001b[39m=\u001b[39m _num_samples(arrays[\u001b[39m0\u001b[39m])\n\u001b[0;32m   2562\u001b[0m n_train, n_test \u001b[39m=\u001b[39m _validate_shuffle_split(\n\u001b[0;32m   2563\u001b[0m     n_samples, test_size, train_size, default_test_size\u001b[39m=\u001b[39m\u001b[39m0.25\u001b[39m\n\u001b[0;32m   2564\u001b[0m )\n",
-      "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\validation.py:443\u001b[0m, in \u001b[0;36mindexable\u001b[1;34m(*iterables)\u001b[0m\n\u001b[0;32m    424\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Make arrays indexable for cross-validation.\u001b[39;00m\n\u001b[0;32m    425\u001b[0m \n\u001b[0;32m    426\u001b[0m \u001b[39mChecks consistent length, passes through None, and ensures that everything\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    439\u001b[0m \u001b[39m    sparse matrix, or dataframe) or `None`.\u001b[39;00m\n\u001b[0;32m    440\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m    442\u001b[0m result \u001b[39m=\u001b[39m [_make_indexable(X) \u001b[39mfor\u001b[39;00m X \u001b[39min\u001b[39;00m iterables]\n\u001b[1;32m--> 443\u001b[0m check_consistent_length(\u001b[39m*\u001b[39;49mresult)\n\u001b[0;32m    444\u001b[0m \u001b[39mreturn\u001b[39;00m result\n",
-      "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\validation.py:397\u001b[0m, in \u001b[0;36mcheck_consistent_length\u001b[1;34m(*arrays)\u001b[0m\n\u001b[0;32m    395\u001b[0m uniques \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39munique(lengths)\n\u001b[0;32m    396\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(uniques) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m--> 397\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m    398\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mFound input variables with inconsistent numbers of samples: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m    399\u001b[0m         \u001b[39m%\u001b[39m [\u001b[39mint\u001b[39m(l) \u001b[39mfor\u001b[39;00m l \u001b[39min\u001b[39;00m lengths]\n\u001b[0;32m    400\u001b[0m     )\n",
-      "\u001b[1;31mValueError\u001b[0m: Found input variables with inconsistent numbers of samples: [18880, 23601]"
+      "Cell \u001b[1;32mIn[128], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m classifier_obj \u001b[39m=\u001b[39m DecisionTreeClassifier()\n\u001b[1;32m----> 3\u001b[0m classifier_obj \u001b[39m=\u001b[39m classifier_obj\u001b[39m.\u001b[39;49mfit(node_train, predikt_train)\n\u001b[0;32m      5\u001b[0m predikt_result \u001b[39m=\u001b[39m classifier_obj\u001b[39m.\u001b[39mpredict(node_test)\n\u001b[0;32m      7\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mACCURACY FOR MODEL PRE: \u001b[39m\u001b[39m\"\u001b[39m, metrics\u001b[39m.\u001b[39maccuracy_score(predikt_test, predikt_result))\n",
+      "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\tree\\_classes.py:889\u001b[0m, in \u001b[0;36mDecisionTreeClassifier.fit\u001b[1;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[0;32m    859\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfit\u001b[39m(\u001b[39mself\u001b[39m, X, y, sample_weight\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, check_input\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[0;32m    860\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"Build a decision tree classifier from the training set (X, y).\u001b[39;00m\n\u001b[0;32m    861\u001b[0m \n\u001b[0;32m    862\u001b[0m \u001b[39m    Parameters\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    886\u001b[0m \u001b[39m        Fitted estimator.\u001b[39;00m\n\u001b[0;32m    887\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 889\u001b[0m     \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mfit(\n\u001b[0;32m    890\u001b[0m         X,\n\u001b[0;32m    891\u001b[0m         y,\n\u001b[0;32m    892\u001b[0m         sample_weight\u001b[39m=\u001b[39;49msample_weight,\n\u001b[0;32m    893\u001b[0m         check_input\u001b[39m=\u001b[39;49mcheck_input,\n\u001b[0;32m    894\u001b[0m     )\n\u001b[0;32m    895\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\n",
+      "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\tree\\_classes.py:224\u001b[0m, in \u001b[0;36mBaseDecisionTree.fit\u001b[1;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[0;32m    221\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_outputs_ \u001b[39m=\u001b[39m y\u001b[39m.\u001b[39mshape[\u001b[39m1\u001b[39m]\n\u001b[0;32m    223\u001b[0m \u001b[39mif\u001b[39;00m is_classification:\n\u001b[1;32m--> 224\u001b[0m     check_classification_targets(y)\n\u001b[0;32m    225\u001b[0m     y \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mcopy(y)\n\u001b[0;32m    227\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclasses_ \u001b[39m=\u001b[39m []\n",
+      "File \u001b[1;32mc:\\Python311\\Lib\\site-packages\\sklearn\\utils\\multiclass.py:218\u001b[0m, in \u001b[0;36mcheck_classification_targets\u001b[1;34m(y)\u001b[0m\n\u001b[0;32m    210\u001b[0m y_type \u001b[39m=\u001b[39m type_of_target(y, input_name\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39my\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m    211\u001b[0m \u001b[39mif\u001b[39;00m y_type \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m [\n\u001b[0;32m    212\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mbinary\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m    213\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mmulticlass\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    216\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mmultilabel-sequences\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m    217\u001b[0m ]:\n\u001b[1;32m--> 218\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mUnknown label type: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m y_type)\n",
+      "\u001b[1;31mValueError\u001b[0m: Unknown label type: 'continuous'"
     ]
    }
   ],
   "source": [
-    "# Chopping down trees\n",
-    "node_train, node_test, predikt_train, predikt_test = train_test_split(to_be_nodes, predikt_col, test_size = 0.2, random_state =69)"
+    "classifier_obj = DecisionTreeClassifier()\n",
+    "\n",
+    "classifier_obj = classifier_obj.fit(node_train, predikt_train)\n",
+    "\n",
+    "predikt_result = classifier_obj.predict(node_test)\n",
+    "\n",
+    "print(\"ACCURACY FOR MODEL PRE: \", metrics.accuracy_score(predikt_test, predikt_result))"
   ]
  }
 ],