Dw about it I forgot to pull :D (no explosions)

This commit is contained in:
LinlyBoi
2023-03-29 22:26:14 +02:00
2 changed files with 52 additions and 7 deletions

View File

@@ -5,19 +5,30 @@ import mining_hq
from numpy import count_nonzero from numpy import count_nonzero
sns.set() sns.set()
plt.xticks(rotation = 90)
games_pre = mining_hq.games_sales_split_pre games_pre = mining_hq.games_sales_split_pre
games_dur = mining_hq.games_sales_split_dur games_dur = mining_hq.games_sales_split_dur
games_pos = mining_hq.games_sales_split_pos games_pos = mining_hq.games_sales_split_pos
games_fig_pre = sns.barplot(data = games_pre, x = "Year", y = "NA_Sales", estimator = count_nonzero) crime_US = mining_hq.crime_US_intersect
crime_CA = mining_hq.crime_CA_intersect
plt.xticks(rotation = 90)
games_fig_pre = sns.histplot(data = games_pre, x = "Year", palette = sns.color_palette("flare"), kde = True)
plt.show() plt.show()
plt.xticks(rotation = 90) plt.xticks(rotation = 90)
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales", estimator = count_nonzero) games_fig2_pre = sns.histplot(data = games_pre, x = "Year", hue = "Genre", multiple = "stack", shrink = 0.65)
plt.show() plt.show()
plt.xticks(rotation = 90) plt.xticks(rotation = 90)
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales", estimator = count_nonzero) games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales")
plt.show()
plt.xticks(rotation = 90)
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales")
plt.show()
plt.xticks(rotation = 90)
crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents")
plt.show() plt.show()

View File

@@ -4,8 +4,7 @@ import pandas as pd
import scout import scout
import numpy as np import numpy as np
import seaborn as sns import seaborn as sns
import digger import digger, gunner
import gunner
# Instantiating globals to be used in other files # Instantiating globals to be used in other files
global games_merged_dat global games_merged_dat
@@ -16,12 +15,16 @@ global games_sales_split_pos
games_review = pd.read_csv("datasets/videogames/Games.xls") games_review = pd.read_csv("datasets/videogames/Games.xls")
games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv") games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
print(games_review.count())
print(games_sales.count())
games_review_phase1 = digger.slice_column(games_review, "GameName", "Review") games_review_phase1 = digger.slice_column(games_review, "GameName", "Review")
games_review_final = digger.slice_column(games_review, "GameName", "(Import)") games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
games_merged_dat = digger.write_joined_df(games_sales, games_review_final) games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
# Acquisition of Merged dataset # Acquisition of Merged dataset
print(games_merged_dat.count())
games_merged_dat.to_csv("datasets/videogames/games_merged.csv") games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
# Loading Crime Datasets # Loading Crime Datasets
@@ -29,12 +32,19 @@ crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
crime_US = pd.read_csv("datasets/crime/report.csv") crime_US = pd.read_csv("datasets/crime/report.csv")
print(crime_US.isnull())
print(crime_CA.isnull())
year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year") year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year")
print(year_interval[0], year_interval[1]) year_max = year_interval[0]
year_min = year_interval[1]
crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, "report_year", "year") crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, "report_year", "year")
crime_US_intersect = crime_intersect[0]
crime_CA_intersect = crime_intersect[1]
NA_col_list = [ NA_col_list = [
"PAL_Sales", "PAL_Sales",
"JP_Sales", "JP_Sales",
@@ -60,6 +70,10 @@ games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval) sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval)
games_sales_split_pre = sale_tri_split[0]
games_sales_split_dur = sale_tri_split[1]
games_sales_split_pos = sale_tri_split[2]
# Displaying Acquired Data # Displaying Acquired Data
print("Acquired Datasets:\n") print("Acquired Datasets:\n")
print(sale_tri_split[0].head(5), sale_tri_split[1].head(5), sale_tri_split[2].head(5)) print(sale_tri_split[0].head(5), sale_tri_split[1].head(5), sale_tri_split[2].head(5))
@@ -69,6 +83,26 @@ sale_tri_split[0].info()
sale_tri_split[1].info() sale_tri_split[1].info()
sale_tri_split[2].info() sale_tri_split[2].info()
print("Dataset Info:\n")
games_sales_split_pre.info()
games_sales_split_dur.info()
games_sales_split_pos.info()
print(games_sales_split_dur.describe())
print(
games_sales_split_pre.head(5),
games_sales_split_dur.head(5),
games_sales_split_pos.head(5),
)
# Required to use binning for cleaning, idk
# https://towardsdatascience.com/data-preprocessing-with-python-pandas-part-5-binning-c5bd5fd1b950
# Also need to transform using Z-score (normal distr go brrrr lmao), or min-max
# Need similarity and dissimialrity, scipy time
# Load merged gammas # Load merged gammas
gammas = pd.read_csv("datasets/videogames/games_merged.csv") gammas = pd.read_csv("datasets/videogames/games_merged.csv")