{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import Modules\n",
    "The dwarves!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "invalid syntax (2807776577.py, line 1)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;36m  Cell \u001b[0;32mIn[4], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m    import digger, gunner, scout from dwarves\u001b[0m\n\u001b[0m                                 ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
     ]
    }
   ],
   "source": [
    "from dwarves import digger, gunner, scout \n",
    "import pandas as pd\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pre-Processing\n",
    "This segment is a demonstration on how the data cleaning process on the acquired datasets was handled"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Game Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'digger' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m games_review \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mread_csv(\u001b[39m\"\u001b[39m\u001b[39mdatasets/videogames/Games.xls\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m      2\u001b[0m games_sales \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mread_csv(\u001b[39m\"\u001b[39m\u001b[39mdatasets/videogames/vgsales.csv\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m games_review_phase1 \u001b[39m=\u001b[39m digger\u001b[39m.\u001b[39mslice_column(games_review, \u001b[39m\"\u001b[39m\u001b[39mGameName\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mReview\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m      5\u001b[0m games_review_final \u001b[39m=\u001b[39m digger\u001b[39m.\u001b[39mslice_column(games_review, \u001b[39m\"\u001b[39m\u001b[39mGameName\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39m(Import)\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m      7\u001b[0m games_merged_dat \u001b[39m=\u001b[39m digger\u001b[39m.\u001b[39mwrite_joined_df(games_sales, games_review_final)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'digger' is not defined"
     ]
    }
   ],
   "source": [
    "\n",
    "games_review = pd.read_csv(\"datasets/videogames/Games.xls\")\n",
    "games_sales = pd.read_csv(\"datasets/videogames/vgsales.csv\")\n",
    "\n",
    "games_review_phase1 = digger.slice_column(games_review, \"GameName\", \"Review\")\n",
    "games_review_final = digger.slice_column(games_review, \"GameName\", \"(Import)\")\n",
    "\n",
    "games_merged_dat = digger.write_joined_df(games_sales, games_review_final)\n",
    "\n",
    "# Acquisition of Merged dataset\n",
    "print(games_merged_dat.head(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "gammas = pd.read_csv(\"datasets/videogames/games_merged.csv\")\n",
    "labels = [\"smol\", \"epik\", \"larg\"]\n",
    "gammas = digger.slam_dunk(gammas, \"Critic_Score\", labels=labels)\n",
    "gammas.to_csv(\"datasets/videogames/games_output.csv\", index=False)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Crime Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "crime_CA = pd.read_excel(\"datasets/crime/clean_crime_canada_dataset.xlsx\")\n",
    "\n",
    "crime_US = pd.read_csv(\"datasets/crime/report.csv\")\n",
    "\n",
    "NA_col_list = [\n",
    "    \"JP_Sales\",\n",
    "    \"Other_Sales\",\n",
    "    \"Global_Sales\",\n",
    "    \"GameName\",\n",
    "    \"Review\",\n",
    "    \"Console\",\n",
    "    \"Score\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "print(crime_US.isnull())\n",
    "print(crime_CA.isnull())\n",
    "\n",
    "year_interval = gunner.year_interval(crime_US, crime_CA, \"report_year\", \"year\")\n",
    "\n",
    "year_max = year_interval[0]\n",
    "year_min = year_interval[1]\n",
    "\n",
    "crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, \"report_year\", \"year\")\n",
    "\n",
    "crime_US_intersect = crime_intersect[0]\n",
    "crime_CA_intersect = crime_intersect[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)\n",
    "games_merged_dat.to_csv(\"datasets/videogames/games_merged.csv\", index=False)\n",
    "\n",
    "sale_tri_split = gunner.trisect_by_year(games_merged_dat, \"Year\", year_interval)\n",
    "\n",
    "games_sales_split_pre = sale_tri_split[0]\n",
    "games_sales_split_dur = sale_tri_split[1]\n",
    "games_sales_split_pos = sale_tri_split[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "print(\"Acquired Datasets:\\n\")\n",
    "print(sale_tri_split[0].head(5), sale_tri_split[1].head(5), sale_tri_split[2].head(5))\n",
    "\n",
    "print(\"Dataset Info:\\n\")\n",
    "sale_tri_split[0].info()\n",
    "sale_tri_split[1].info()\n",
    "sale_tri_split[2].info()\n",
    "\n",
    "\n",
    "print(\"Dataset Info:\\n\")\n",
    "games_sales_split_pre.info()\n",
    "games_sales_split_dur.info()\n",
    "games_sales_split_pos.info()\n",
    "\n",
    "print(\"Yer forsaken Statistical Description:\\n\", games_sales_split_dur.describe())\n",
    "\n",
    "print(\n",
    "    games_sales_split_pre.head(5),\n",
    "    games_sales_split_dur.head(5),\n",
    "    games_sales_split_pos.head(5),\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Plotting\n",
    "Engineer.py steps up to the job"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Importing and setting up environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "import mining_hq\n",
    "from numpy import count_nonzero\n",
    "\n",
    "games_pre = mining_hq.games_sales_split_pre\n",
    "games_dur = mining_hq.games_sales_split_dur\n",
    "games_pos = mining_hq.games_sales_split_pos\n",
    "\n",
    "crime_US = mining_hq.crime_US_intersect\n",
    "crime_CA = mining_hq.crime_CA_intersect\n",
    "\n",
    "custom_params = {\"axes.spines.right\": False, \"axes.spines.top\": False}\n",
    "\n",
    "sns.set_theme(style = 'ticks', rc = custom_params)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Game Sales pre 2000s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### General Plot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "plt.xticks(rotation = 90)\n",
    "games_fig_pre = sns.histplot(data = games_pre, x = \"Year\", palette = sns.color_palette(\"flare\"), kde = True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### According to Genre"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "plt.xticks(rotation = 90)\n",
    "games_fig2_pre = sns.histplot(data = games_pre, x = \"Year\", hue = \"Genre\", multiple = \"stack\", kde = True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Game Sales during 2000s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### General Plot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "plt.xticks(rotation = 90)\n",
    "games_fig_dur = sns.histplot(data = games_dur, x = \"Year\", kde = True)\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### According to Genre"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "plt.xticks(rotation = 90)\n",
    "games_fig2_dur = sns.histplot(data = games_dur, x = \"Year\", hue = \"Genre\", multiple = \"stack\", kde = True)\n",
    "plt.show()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Crime in Canada and US"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "plt.xticks(rotation = 90)\n",
    "crime_CA_fig = sns.histplot(data = crime_CA, x = \"year\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "plt.xticks(rotation = 90)\n",
    "crime_US_fig = sns.histplot(data = crime_US, x = \"report_year\")\n",
    "plt.show()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Plotting Crime in both CA and US together"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "plt.xticks(rotation = 90)\n",
    "crime_US_fig = sns.histplot(data = crime_US, x = \"report_year\")\n",
    "plt.show()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Integrating violence crimes column into Game sales  \n",
    "- Note: multiplying sales by 1000 because they're reduced by default\n",
    "- to plot later"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "games_dur['Violent_US'] = crime_US['violent_crimes']\n",
    "games_dur['NA_Sales'] = games_dur['NA_Sales'].multiply(1000)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Plotting Sales against Crime with Relplot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.xticks(rotation = 90)\n",
    "games_violence_US = sns.relplot(data = games_dur, x = 'NA_Sales', y = 'Violent_US')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Joined Plot and Histograms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.xticks(rotation = 90)\n",
    "games_crime_dur = sns.jointplot(data = games_dur, x = \"Year\", y = 'Violent_US')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}