Dw about it I forgot to pull :D (no explosions)
This commit is contained in:
@@ -5,19 +5,30 @@ import mining_hq
|
|||||||
from numpy import count_nonzero
|
from numpy import count_nonzero
|
||||||
|
|
||||||
sns.set()
|
sns.set()
|
||||||
plt.xticks(rotation = 90)
|
|
||||||
|
|
||||||
games_pre = mining_hq.games_sales_split_pre
|
games_pre = mining_hq.games_sales_split_pre
|
||||||
games_dur = mining_hq.games_sales_split_dur
|
games_dur = mining_hq.games_sales_split_dur
|
||||||
games_pos = mining_hq.games_sales_split_pos
|
games_pos = mining_hq.games_sales_split_pos
|
||||||
|
|
||||||
games_fig_pre = sns.barplot(data = games_pre, x = "Year", y = "NA_Sales", estimator = count_nonzero)
|
crime_US = mining_hq.crime_US_intersect
|
||||||
|
crime_CA = mining_hq.crime_CA_intersect
|
||||||
|
|
||||||
|
plt.xticks(rotation = 90)
|
||||||
|
games_fig_pre = sns.histplot(data = games_pre, x = "Year", palette = sns.color_palette("flare"), kde = True)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
plt.xticks(rotation = 90)
|
plt.xticks(rotation = 90)
|
||||||
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales", estimator = count_nonzero)
|
games_fig2_pre = sns.histplot(data = games_pre, x = "Year", hue = "Genre", multiple = "stack", shrink = 0.65)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
plt.xticks(rotation = 90)
|
plt.xticks(rotation = 90)
|
||||||
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales", estimator = count_nonzero)
|
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
plt.xticks(rotation = 90)
|
||||||
|
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
plt.xticks(rotation = 90)
|
||||||
|
crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|||||||
@@ -4,8 +4,7 @@ import pandas as pd
|
|||||||
import scout
|
import scout
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import digger
|
import digger, gunner
|
||||||
import gunner
|
|
||||||
|
|
||||||
# Instantiating globals to be used in other files
|
# Instantiating globals to be used in other files
|
||||||
global games_merged_dat
|
global games_merged_dat
|
||||||
@@ -16,12 +15,16 @@ global games_sales_split_pos
|
|||||||
games_review = pd.read_csv("datasets/videogames/Games.xls")
|
games_review = pd.read_csv("datasets/videogames/Games.xls")
|
||||||
games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
|
games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
|
||||||
|
|
||||||
|
print(games_review.count())
|
||||||
|
print(games_sales.count())
|
||||||
|
|
||||||
games_review_phase1 = digger.slice_column(games_review, "GameName", "Review")
|
games_review_phase1 = digger.slice_column(games_review, "GameName", "Review")
|
||||||
games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
|
games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
|
||||||
|
|
||||||
games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
||||||
|
|
||||||
# Acquisition of Merged dataset
|
# Acquisition of Merged dataset
|
||||||
|
print(games_merged_dat.count())
|
||||||
games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
|
games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
|
||||||
|
|
||||||
# Loading Crime Datasets
|
# Loading Crime Datasets
|
||||||
@@ -29,12 +32,19 @@ crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
|
|||||||
|
|
||||||
crime_US = pd.read_csv("datasets/crime/report.csv")
|
crime_US = pd.read_csv("datasets/crime/report.csv")
|
||||||
|
|
||||||
|
print(crime_US.isnull())
|
||||||
|
print(crime_CA.isnull())
|
||||||
|
|
||||||
year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year")
|
year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year")
|
||||||
|
|
||||||
print(year_interval[0], year_interval[1])
|
year_max = year_interval[0]
|
||||||
|
year_min = year_interval[1]
|
||||||
|
|
||||||
crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, "report_year", "year")
|
crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, "report_year", "year")
|
||||||
|
|
||||||
|
crime_US_intersect = crime_intersect[0]
|
||||||
|
crime_CA_intersect = crime_intersect[1]
|
||||||
|
|
||||||
NA_col_list = [
|
NA_col_list = [
|
||||||
"PAL_Sales",
|
"PAL_Sales",
|
||||||
"JP_Sales",
|
"JP_Sales",
|
||||||
@@ -60,6 +70,10 @@ games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
|
|||||||
|
|
||||||
sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval)
|
sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval)
|
||||||
|
|
||||||
|
games_sales_split_pre = sale_tri_split[0]
|
||||||
|
games_sales_split_dur = sale_tri_split[1]
|
||||||
|
games_sales_split_pos = sale_tri_split[2]
|
||||||
|
|
||||||
# Displaying Acquired Data
|
# Displaying Acquired Data
|
||||||
print("Acquired Datasets:\n")
|
print("Acquired Datasets:\n")
|
||||||
print(sale_tri_split[0].head(5), sale_tri_split[1].head(5), sale_tri_split[2].head(5))
|
print(sale_tri_split[0].head(5), sale_tri_split[1].head(5), sale_tri_split[2].head(5))
|
||||||
@@ -69,6 +83,26 @@ sale_tri_split[0].info()
|
|||||||
sale_tri_split[1].info()
|
sale_tri_split[1].info()
|
||||||
sale_tri_split[2].info()
|
sale_tri_split[2].info()
|
||||||
|
|
||||||
|
|
||||||
|
print("Dataset Info:\n")
|
||||||
|
games_sales_split_pre.info()
|
||||||
|
games_sales_split_dur.info()
|
||||||
|
games_sales_split_pos.info()
|
||||||
|
|
||||||
|
print(games_sales_split_dur.describe())
|
||||||
|
|
||||||
|
print(
|
||||||
|
games_sales_split_pre.head(5),
|
||||||
|
games_sales_split_dur.head(5),
|
||||||
|
games_sales_split_pos.head(5),
|
||||||
|
)
|
||||||
|
# Required to use binning for cleaning, idk
|
||||||
|
# https://towardsdatascience.com/data-preprocessing-with-python-pandas-part-5-binning-c5bd5fd1b950
|
||||||
|
|
||||||
|
# Also need to transform using Z-score (normal distr go brrrr lmao), or min-max
|
||||||
|
|
||||||
|
# Need similarity and dissimialrity, scipy time
|
||||||
|
|
||||||
# Load merged gammas
|
# Load merged gammas
|
||||||
|
|
||||||
gammas = pd.read_csv("datasets/videogames/games_merged.csv")
|
gammas = pd.read_csv("datasets/videogames/games_merged.csv")
|
||||||
|
|||||||
Reference in New Issue
Block a user