Refactoring killed muh bebe
This commit is contained in:
@@ -3,34 +3,7 @@
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import mining_hq
|
||||
|
||||
# Sharing the dataset variables
|
||||
# Games' data
|
||||
|
||||
# Loading Datasets
|
||||
games_merged = mining_hq.games_merged_dat
|
||||
|
||||
victim1 = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
|
||||
victim2 = pd.read_csv("datasets/crime/report.csv")
|
||||
|
||||
# Printing information regarding datasets
|
||||
print("Game Datasets' Info:\n")
|
||||
games_merged.info()
|
||||
|
||||
print("Crime Datasets' Info:\n")
|
||||
victim2.info()
|
||||
victim1.info()
|
||||
|
||||
|
||||
# Regarding the Games.xls dataset:
|
||||
# Coercing the non-numeric values will result in NaN
|
||||
# thus allowing easier removal through `.notnull()`
|
||||
|
||||
|
||||
# Regarding the vgsales-12-4-2019 dataset
|
||||
# Considering we will be using a US (probs CA too) crime datasets
|
||||
# It wouldn't be that useful to have other columns regarding other regions
|
||||
def drop_kick(col_list, dataframe):
|
||||
return dataframe.drop(columns=col_list, axis=1)
|
||||
|
||||
@@ -38,8 +11,8 @@ def drop_kick(col_list, dataframe):
|
||||
# Getting the range of years which both datasets share
|
||||
def year_interval(victim1, victim2, col1, col2):
|
||||
return (
|
||||
min(victim2[col2].max(), victim1[col1].max()),
|
||||
max(victim2[col2].min(), victim1[col1].min()),
|
||||
min(victim2[col2].max(), victim1[col1].max()),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
import digger
|
||||
from tkinter.filedialog import askopenfilename
|
||||
import gunner
|
||||
|
||||
# Instantiating globals to be used in other files
|
||||
global games_merged_dat
|
||||
@@ -23,7 +23,16 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
||||
# Acquisition of Merged dataset
|
||||
games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
|
||||
|
||||
import gunner
|
||||
# Loading Crime Datasets
|
||||
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
|
||||
|
||||
crime_US = pd.read_csv('datasets/crime/report.csv')
|
||||
|
||||
year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year")
|
||||
|
||||
print(year_interval[0], year_interval[1])
|
||||
|
||||
crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, "report_year", "year")
|
||||
|
||||
NA_col_list = [
|
||||
"PAL_Sales",
|
||||
@@ -33,7 +42,6 @@ NA_col_list = [
|
||||
"User_Score",
|
||||
"GameName",
|
||||
"Review",
|
||||
"",
|
||||
]
|
||||
GLO_col_list = [
|
||||
"PAL_Sales",
|
||||
@@ -43,22 +51,21 @@ GLO_col_list = [
|
||||
"User_Score",
|
||||
"GameName",
|
||||
"Review",
|
||||
"",
|
||||
]
|
||||
|
||||
# Splitting crime datasets
|
||||
# Collecting Split-Up Datasets
|
||||
games_sales_split_pre = gunner.game_sales_NA_pre
|
||||
games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
|
||||
|
||||
games_sales_split_dur = gunner.game_sales_NA_dur
|
||||
|
||||
games_sales_split_pos = gunner.game_sales_NA_pos
|
||||
sale_tri_split = gunner.trisect_by_year(games_merged_dat, 'Year', year_interval)
|
||||
|
||||
# Displaying Acquired Data
|
||||
print("Acquired Datasets:\n")
|
||||
games_sales_split_pre.head(5)
|
||||
games_sales_split_dur.head(5)
|
||||
games_sales_split_pos.head(5)
|
||||
print(sale_tri_split[0].head(5),
|
||||
sale_tri_split[1].head(5),
|
||||
sale_tri_split[2].head(5))
|
||||
|
||||
print("Dataset Info:\n")
|
||||
games_sales_split_pre.info()
|
||||
games_sales_split_dur.info()
|
||||
games_sales_split_pos.info()
|
||||
sale_tri_split[0].info()
|
||||
sale_tri_split[1].info()
|
||||
sale_tri_split[2].info()
|
||||
|
||||
Reference in New Issue
Block a user