Curse Jewpidor smh

This commit is contained in:
2023-03-31 13:43:33 +02:00
parent 83f5415ff0
commit fb062ac0e9
5 changed files with 953 additions and 465 deletions

View File

@@ -1,459 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Import Modules\n",
"The dwarves!"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from dwarves import digger, gunner, scout \n",
"import pandas as pd\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pre-Processing\n",
"This segment is a demonstration on how the data cleaning process on the acquired datasets was handled"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Game Datasets"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Rank Name Platform Year Genre Publisher \\\n",
"0 1.0 Wii Sports Wii 2006.0 Sports Nintendo \n",
"1 2.0 Super Mario Bros. NES 1985.0 Platform Nintendo \n",
"2 3.0 Mario Kart Wii Wii 2008.0 Racing Nintendo \n",
"3 4.0 Wii Sports Resort Wii 2009.0 Sports Nintendo \n",
"4 5.0 Pokemon Red/Pokemon Blue GB 1996.0 Role-Playing Nintendo \n",
"\n",
" NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales Console Review \\\n",
"0 41.49 29.02 3.77 8.46 82.74 NaN NaN \n",
"1 29.08 3.58 6.81 0.77 40.24 NaN NaN \n",
"2 15.85 12.88 3.79 3.31 35.82 NaN NaN \n",
"3 15.75 11.01 3.28 2.96 33.00 NaN NaN \n",
"4 11.27 8.89 10.22 1.00 31.37 NaN NaN \n",
"\n",
" Score GameName \n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n"
]
}
],
"source": [
"\n",
"games_review = pd.read_csv(\"datasets/videogames/Games.xls\")\n",
"games_sales = pd.read_csv(\"datasets/videogames/vgsales.csv\")\n",
"\n",
"games_review_phase1 = digger.slice_column(games_review, \"GameName\", \"Review\")\n",
"games_review_final = digger.slice_column(games_review, \"GameName\", \"(Import)\")\n",
"\n",
"games_merged_dat = digger.write_joined_df(games_sales, games_review_final)\n",
"\n",
"# Acquisition of Merged dataset\n",
"print(games_merged_dat.head(5))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"gammas = pd.read_csv(\"datasets/videogames/games_merged.csv\")\n",
"labels = [\"smol\", \"epik\", \"larg\"]\n",
"gammas = digger.slam_dunk(gammas, \"Critic_Score\", labels=labels)\n",
"gammas.to_csv(\"datasets/videogames/games_output.csv\", index=False)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Crime Datasets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"crime_CA = pd.read_excel(\"datasets/crime/clean_crime_canada_dataset.xlsx\")\n",
"\n",
"crime_US = pd.read_csv(\"datasets/crime/report.csv\")\n",
"\n",
"NA_col_list = [\n",
" \"JP_Sales\",\n",
" \"Other_Sales\",\n",
" \"Global_Sales\",\n",
" \"GameName\",\n",
" \"Review\",\n",
" \"Console\",\n",
" \"Score\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"print(crime_US.isnull())\n",
"print(crime_CA.isnull())\n",
"\n",
"year_interval = gunner.year_interval(crime_US, crime_CA, \"report_year\", \"year\")\n",
"\n",
"year_max = year_interval[0]\n",
"year_min = year_interval[1]\n",
"\n",
"crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, \"report_year\", \"year\")\n",
"\n",
"crime_US_intersect = crime_intersect[0]\n",
"crime_CA_intersect = crime_intersect[1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)\n",
"games_merged_dat.to_csv(\"datasets/videogames/games_merged.csv\", index=False)\n",
"\n",
"sale_tri_split = gunner.trisect_by_year(games_merged_dat, \"Year\", year_interval)\n",
"\n",
"games_sales_split_pre = sale_tri_split[0]\n",
"games_sales_split_dur = sale_tri_split[1]\n",
"games_sales_split_pos = sale_tri_split[2]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"print(\"Acquired Datasets:\\n\")\n",
"print(sale_tri_split[0].head(5), sale_tri_split[1].head(5), sale_tri_split[2].head(5))\n",
"\n",
"print(\"Dataset Info:\\n\")\n",
"sale_tri_split[0].info()\n",
"sale_tri_split[1].info()\n",
"sale_tri_split[2].info()\n",
"\n",
"\n",
"print(\"Dataset Info:\\n\")\n",
"games_sales_split_pre.info()\n",
"games_sales_split_dur.info()\n",
"games_sales_split_pos.info()\n",
"\n",
"print(\"Yer forsaken Statistical Description:\\n\", games_sales_split_dur.describe())\n",
"\n",
"print(\n",
" games_sales_split_pre.head(5),\n",
" games_sales_split_dur.head(5),\n",
" games_sales_split_pos.head(5),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Plotting\n",
"Engineer.py steps up to the job"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Importing and setting up environment"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import pandas as pd\n",
"import mining_hq\n",
"from numpy import count_nonzero\n",
"\n",
"games_pre = mining_hq.games_sales_split_pre\n",
"games_dur = mining_hq.games_sales_split_dur\n",
"games_pos = mining_hq.games_sales_split_pos\n",
"\n",
"crime_US = mining_hq.crime_US_intersect\n",
"crime_CA = mining_hq.crime_CA_intersect\n",
"\n",
"custom_params = {\"axes.spines.right\": False, \"axes.spines.top\": False}\n",
"\n",
"sns.set_theme(style = 'ticks', rc = custom_params)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Game Sales pre 2000s"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### General Plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"plt.xticks(rotation = 90)\n",
"games_fig_pre = sns.histplot(data = games_pre, x = \"Year\", palette = sns.color_palette(\"flare\"), kde = True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### According to Genre"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"plt.xticks(rotation = 90)\n",
"games_fig2_pre = sns.histplot(data = games_pre, x = \"Year\", hue = \"Genre\", multiple = \"stack\", kde = True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Game Sales during 2000s"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### General Plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"plt.xticks(rotation = 90)\n",
"games_fig_dur = sns.histplot(data = games_dur, x = \"Year\", kde = True)\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### According to Genre"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"plt.xticks(rotation = 90)\n",
"games_fig2_dur = sns.histplot(data = games_dur, x = \"Year\", hue = \"Genre\", multiple = \"stack\", kde = True)\n",
"plt.show()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Crime in Canada and US"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"plt.xticks(rotation = 90)\n",
"crime_CA_fig = sns.histplot(data = crime_CA, x = \"year\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"plt.xticks(rotation = 90)\n",
"crime_US_fig = sns.histplot(data = crime_US, x = \"report_year\")\n",
"plt.show()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting Crime in both CA and US together"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"plt.xticks(rotation = 90)\n",
"crime_US_fig = sns.histplot(data = crime_US, x = \"report_year\")\n",
"plt.show()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Integrating violence crimes column into Game sales \n",
"- Note: multiplying sales by 1000 because they're reduced by default\n",
"- to plot later"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"games_dur['Violent_US'] = crime_US['violent_crimes']\n",
"games_dur['NA_Sales'] = games_dur['NA_Sales'].multiply(1000)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting Sales against Crime with Relplot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.xticks(rotation = 90)\n",
"games_violence_US = sns.relplot(data = games_dur, x = 'NA_Sales', y = 'Violent_US')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Joined Plot and Histograms"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.xticks(rotation = 90)\n",
"games_crime_dur = sns.jointplot(data = games_dur, x = \"Year\", y = 'Violent_US')\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

946
dwarves/Mining_HQ.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@@ -1,7 +1,7 @@
# Getting and combining data
import pandas as pd
import numpy as np
from dwarves import scout
import scout
from numpy import ndarray
from scipy.stats import binned_statistic

View File

@@ -53,8 +53,10 @@ games_dur['NA_Sales'] = games_dur['NA_Sales'].multiply(1000)
plt.xticks(rotation = 90)
games_violence_US = sns.relplot(data = games_dur, x = 'NA_Sales', y = 'Violent_US')
plt.close(1)
plt.show()
plt.xticks(rotation = 90)
games_crime_dur = sns.jointplot(data = games_dur, x = "Year", y = 'Violent_US')
plt.close(1)
plt.show()

View File

@@ -2,10 +2,9 @@
# Collects stuff from the rest of the scripts
import pandas as pd
import numpy as np
import seaborn as sns
# containment breach
import scipy as scp
import digger, gunner, scout
import gunner, digger, gunner, scout
# Instantiating globals to be used in other files
global games_merged_dat
@@ -32,8 +31,8 @@ crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
crime_US = pd.read_csv("datasets/crime/report.csv")
print(crime_US.isnull())
print(crime_CA.isnull())
print(crime_US.isnull().count())
print(crime_CA.isnull().count())
year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year")