Curse Jewpidor smh
This commit is contained in:
459
Mining_HQ.ipynb
459
Mining_HQ.ipynb
@@ -1,459 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Import Modules\n",
|
||||
"The dwarves!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from dwarves import digger, gunner, scout \n",
|
||||
"import pandas as pd\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pre-Processing\n",
|
||||
"This segment is a demonstration on how the data cleaning process on the acquired datasets was handled"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Game Datasets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Rank Name Platform Year Genre Publisher \\\n",
|
||||
"0 1.0 Wii Sports Wii 2006.0 Sports Nintendo \n",
|
||||
"1 2.0 Super Mario Bros. NES 1985.0 Platform Nintendo \n",
|
||||
"2 3.0 Mario Kart Wii Wii 2008.0 Racing Nintendo \n",
|
||||
"3 4.0 Wii Sports Resort Wii 2009.0 Sports Nintendo \n",
|
||||
"4 5.0 Pokemon Red/Pokemon Blue GB 1996.0 Role-Playing Nintendo \n",
|
||||
"\n",
|
||||
" NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales Console Review \\\n",
|
||||
"0 41.49 29.02 3.77 8.46 82.74 NaN NaN \n",
|
||||
"1 29.08 3.58 6.81 0.77 40.24 NaN NaN \n",
|
||||
"2 15.85 12.88 3.79 3.31 35.82 NaN NaN \n",
|
||||
"3 15.75 11.01 3.28 2.96 33.00 NaN NaN \n",
|
||||
"4 11.27 8.89 10.22 1.00 31.37 NaN NaN \n",
|
||||
"\n",
|
||||
" Score GameName \n",
|
||||
"0 NaN NaN \n",
|
||||
"1 NaN NaN \n",
|
||||
"2 NaN NaN \n",
|
||||
"3 NaN NaN \n",
|
||||
"4 NaN NaN \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"games_review = pd.read_csv(\"datasets/videogames/Games.xls\")\n",
|
||||
"games_sales = pd.read_csv(\"datasets/videogames/vgsales.csv\")\n",
|
||||
"\n",
|
||||
"games_review_phase1 = digger.slice_column(games_review, \"GameName\", \"Review\")\n",
|
||||
"games_review_final = digger.slice_column(games_review, \"GameName\", \"(Import)\")\n",
|
||||
"\n",
|
||||
"games_merged_dat = digger.write_joined_df(games_sales, games_review_final)\n",
|
||||
"\n",
|
||||
"# Acquisition of Merged dataset\n",
|
||||
"print(games_merged_dat.head(5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"gammas = pd.read_csv(\"datasets/videogames/games_merged.csv\")\n",
|
||||
"labels = [\"smol\", \"epik\", \"larg\"]\n",
|
||||
"gammas = digger.slam_dunk(gammas, \"Critic_Score\", labels=labels)\n",
|
||||
"gammas.to_csv(\"datasets/videogames/games_output.csv\", index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Crime Datasets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"crime_CA = pd.read_excel(\"datasets/crime/clean_crime_canada_dataset.xlsx\")\n",
|
||||
"\n",
|
||||
"crime_US = pd.read_csv(\"datasets/crime/report.csv\")\n",
|
||||
"\n",
|
||||
"NA_col_list = [\n",
|
||||
" \"JP_Sales\",\n",
|
||||
" \"Other_Sales\",\n",
|
||||
" \"Global_Sales\",\n",
|
||||
" \"GameName\",\n",
|
||||
" \"Review\",\n",
|
||||
" \"Console\",\n",
|
||||
" \"Score\",\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"print(crime_US.isnull())\n",
|
||||
"print(crime_CA.isnull())\n",
|
||||
"\n",
|
||||
"year_interval = gunner.year_interval(crime_US, crime_CA, \"report_year\", \"year\")\n",
|
||||
"\n",
|
||||
"year_max = year_interval[0]\n",
|
||||
"year_min = year_interval[1]\n",
|
||||
"\n",
|
||||
"crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, \"report_year\", \"year\")\n",
|
||||
"\n",
|
||||
"crime_US_intersect = crime_intersect[0]\n",
|
||||
"crime_CA_intersect = crime_intersect[1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)\n",
|
||||
"games_merged_dat.to_csv(\"datasets/videogames/games_merged.csv\", index=False)\n",
|
||||
"\n",
|
||||
"sale_tri_split = gunner.trisect_by_year(games_merged_dat, \"Year\", year_interval)\n",
|
||||
"\n",
|
||||
"games_sales_split_pre = sale_tri_split[0]\n",
|
||||
"games_sales_split_dur = sale_tri_split[1]\n",
|
||||
"games_sales_split_pos = sale_tri_split[2]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"print(\"Acquired Datasets:\\n\")\n",
|
||||
"print(sale_tri_split[0].head(5), sale_tri_split[1].head(5), sale_tri_split[2].head(5))\n",
|
||||
"\n",
|
||||
"print(\"Dataset Info:\\n\")\n",
|
||||
"sale_tri_split[0].info()\n",
|
||||
"sale_tri_split[1].info()\n",
|
||||
"sale_tri_split[2].info()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"Dataset Info:\\n\")\n",
|
||||
"games_sales_split_pre.info()\n",
|
||||
"games_sales_split_dur.info()\n",
|
||||
"games_sales_split_pos.info()\n",
|
||||
"\n",
|
||||
"print(\"Yer forsaken Statistical Description:\\n\", games_sales_split_dur.describe())\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" games_sales_split_pre.head(5),\n",
|
||||
" games_sales_split_dur.head(5),\n",
|
||||
" games_sales_split_pos.head(5),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Plotting\n",
|
||||
"Engineer.py steps up to the job"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Importing and setting up environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"import pandas as pd\n",
|
||||
"import mining_hq\n",
|
||||
"from numpy import count_nonzero\n",
|
||||
"\n",
|
||||
"games_pre = mining_hq.games_sales_split_pre\n",
|
||||
"games_dur = mining_hq.games_sales_split_dur\n",
|
||||
"games_pos = mining_hq.games_sales_split_pos\n",
|
||||
"\n",
|
||||
"crime_US = mining_hq.crime_US_intersect\n",
|
||||
"crime_CA = mining_hq.crime_CA_intersect\n",
|
||||
"\n",
|
||||
"custom_params = {\"axes.spines.right\": False, \"axes.spines.top\": False}\n",
|
||||
"\n",
|
||||
"sns.set_theme(style = 'ticks', rc = custom_params)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Game Sales pre 2000s"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### General Plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"plt.xticks(rotation = 90)\n",
|
||||
"games_fig_pre = sns.histplot(data = games_pre, x = \"Year\", palette = sns.color_palette(\"flare\"), kde = True)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### According to Genre"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"plt.xticks(rotation = 90)\n",
|
||||
"games_fig2_pre = sns.histplot(data = games_pre, x = \"Year\", hue = \"Genre\", multiple = \"stack\", kde = True)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Game Sales during 2000s"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### General Plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"plt.xticks(rotation = 90)\n",
|
||||
"games_fig_dur = sns.histplot(data = games_dur, x = \"Year\", kde = True)\n",
|
||||
"plt.show()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### According to Genre"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"plt.xticks(rotation = 90)\n",
|
||||
"games_fig2_dur = sns.histplot(data = games_dur, x = \"Year\", hue = \"Genre\", multiple = \"stack\", kde = True)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Crime in Canada and US"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"plt.xticks(rotation = 90)\n",
|
||||
"crime_CA_fig = sns.histplot(data = crime_CA, x = \"year\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"plt.xticks(rotation = 90)\n",
|
||||
"crime_US_fig = sns.histplot(data = crime_US, x = \"report_year\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Plotting Crime in both CA and US together"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"plt.xticks(rotation = 90)\n",
|
||||
"crime_US_fig = sns.histplot(data = crime_US, x = \"report_year\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Integrating violence crimes column into Game sales \n",
|
||||
"- Note: multiplying sales by 1000 because they're reduced by default\n",
|
||||
"- to plot later"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"games_dur['Violent_US'] = crime_US['violent_crimes']\n",
|
||||
"games_dur['NA_Sales'] = games_dur['NA_Sales'].multiply(1000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Plotting Sales against Crime with Relplot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.xticks(rotation = 90)\n",
|
||||
"games_violence_US = sns.relplot(data = games_dur, x = 'NA_Sales', y = 'Violent_US')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Joined Plot and Histograms"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.xticks(rotation = 90)\n",
|
||||
"games_crime_dur = sns.jointplot(data = games_dur, x = \"Year\", y = 'Violent_US')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
946
dwarves/Mining_HQ.ipynb
Normal file
946
dwarves/Mining_HQ.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -1,7 +1,7 @@
|
||||
# Getting and combining data
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from dwarves import scout
|
||||
import scout
|
||||
from numpy import ndarray
|
||||
from scipy.stats import binned_statistic
|
||||
|
||||
|
||||
@@ -53,8 +53,10 @@ games_dur['NA_Sales'] = games_dur['NA_Sales'].multiply(1000)
|
||||
|
||||
plt.xticks(rotation = 90)
|
||||
games_violence_US = sns.relplot(data = games_dur, x = 'NA_Sales', y = 'Violent_US')
|
||||
plt.close(1)
|
||||
plt.show()
|
||||
|
||||
plt.xticks(rotation = 90)
|
||||
games_crime_dur = sns.jointplot(data = games_dur, x = "Year", y = 'Violent_US')
|
||||
plt.close(1)
|
||||
plt.show()
|
||||
@@ -2,10 +2,9 @@
|
||||
# Collects stuff from the rest of the scripts
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
# containment breach
|
||||
import scipy as scp
|
||||
import digger, gunner, scout
|
||||
import gunner, digger, gunner, scout
|
||||
|
||||
# Instantiating globals to be used in other files
|
||||
global games_merged_dat
|
||||
@@ -32,8 +31,8 @@ crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
|
||||
|
||||
crime_US = pd.read_csv("datasets/crime/report.csv")
|
||||
|
||||
print(crime_US.isnull())
|
||||
print(crime_CA.isnull())
|
||||
print(crime_US.isnull().count())
|
||||
print(crime_CA.isnull().count())
|
||||
|
||||
year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user