Refactoring killed muh bebe
This commit is contained in:
@@ -3,34 +3,7 @@
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import mining_hq
|
|
||||||
|
|
||||||
# Sharing the dataset variables
|
|
||||||
# Games' data
|
|
||||||
|
|
||||||
# Loading Datasets
|
|
||||||
games_merged = mining_hq.games_merged_dat
|
|
||||||
|
|
||||||
victim1 = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
|
|
||||||
victim2 = pd.read_csv("datasets/crime/report.csv")
|
|
||||||
|
|
||||||
# Printing information regarding datasets
|
|
||||||
print("Game Datasets' Info:\n")
|
|
||||||
games_merged.info()
|
|
||||||
|
|
||||||
print("Crime Datasets' Info:\n")
|
|
||||||
victim2.info()
|
|
||||||
victim1.info()
|
|
||||||
|
|
||||||
|
|
||||||
# Regarding the Games.xls dataset:
|
|
||||||
# Coercing the non-numeric values will result in NaN
|
|
||||||
# thus allowing easier removal through `.notnull()`
|
|
||||||
|
|
||||||
|
|
||||||
# Regarding the vgsales-12-4-2019 dataset
|
|
||||||
# Considering we will be using a US (probs CA too) crime datasets
|
|
||||||
# It wouldn't be that useful to have other columns regarding other regions
|
|
||||||
def drop_kick(col_list, dataframe):
|
def drop_kick(col_list, dataframe):
|
||||||
return dataframe.drop(columns=col_list, axis=1)
|
return dataframe.drop(columns=col_list, axis=1)
|
||||||
|
|
||||||
@@ -38,8 +11,8 @@ def drop_kick(col_list, dataframe):
|
|||||||
# Getting the range of years which both datasets share
|
# Getting the range of years which both datasets share
|
||||||
def year_interval(victim1, victim2, col1, col2):
|
def year_interval(victim1, victim2, col1, col2):
|
||||||
return (
|
return (
|
||||||
min(victim2[col2].max(), victim1[col1].max()),
|
|
||||||
max(victim2[col2].min(), victim1[col1].min()),
|
max(victim2[col2].min(), victim1[col1].min()),
|
||||||
|
min(victim2[col2].max(), victim1[col1].max()),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import digger
|
import digger
|
||||||
from tkinter.filedialog import askopenfilename
|
import gunner
|
||||||
|
|
||||||
# Instantiating globals to be used in other files
|
# Instantiating globals to be used in other files
|
||||||
global games_merged_dat
|
global games_merged_dat
|
||||||
@@ -23,7 +23,16 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
|||||||
# Acquisition of Merged dataset
|
# Acquisition of Merged dataset
|
||||||
games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
|
games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
|
||||||
|
|
||||||
import gunner
|
# Loading Crime Datasets
|
||||||
|
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
|
||||||
|
|
||||||
|
crime_US = pd.read_csv('datasets/crime/report.csv')
|
||||||
|
|
||||||
|
year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year")
|
||||||
|
|
||||||
|
print(year_interval[0], year_interval[1])
|
||||||
|
|
||||||
|
crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, "report_year", "year")
|
||||||
|
|
||||||
NA_col_list = [
|
NA_col_list = [
|
||||||
"PAL_Sales",
|
"PAL_Sales",
|
||||||
@@ -33,7 +42,6 @@ NA_col_list = [
|
|||||||
"User_Score",
|
"User_Score",
|
||||||
"GameName",
|
"GameName",
|
||||||
"Review",
|
"Review",
|
||||||
"",
|
|
||||||
]
|
]
|
||||||
GLO_col_list = [
|
GLO_col_list = [
|
||||||
"PAL_Sales",
|
"PAL_Sales",
|
||||||
@@ -43,22 +51,21 @@ GLO_col_list = [
|
|||||||
"User_Score",
|
"User_Score",
|
||||||
"GameName",
|
"GameName",
|
||||||
"Review",
|
"Review",
|
||||||
"",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Splitting crime datasets
|
||||||
# Collecting Split-Up Datasets
|
# Collecting Split-Up Datasets
|
||||||
games_sales_split_pre = gunner.game_sales_NA_pre
|
games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
|
||||||
|
|
||||||
games_sales_split_dur = gunner.game_sales_NA_dur
|
sale_tri_split = gunner.trisect_by_year(games_merged_dat, 'Year', year_interval)
|
||||||
|
|
||||||
games_sales_split_pos = gunner.game_sales_NA_pos
|
|
||||||
|
|
||||||
# Displaying Acquired Data
|
# Displaying Acquired Data
|
||||||
print("Acquired Datasets:\n")
|
print("Acquired Datasets:\n")
|
||||||
games_sales_split_pre.head(5)
|
print(sale_tri_split[0].head(5),
|
||||||
games_sales_split_dur.head(5)
|
sale_tri_split[1].head(5),
|
||||||
games_sales_split_pos.head(5)
|
sale_tri_split[2].head(5))
|
||||||
|
|
||||||
print("Dataset Info:\n")
|
print("Dataset Info:\n")
|
||||||
games_sales_split_pre.info()
|
sale_tri_split[0].info()
|
||||||
games_sales_split_dur.info()
|
sale_tri_split[1].info()
|
||||||
games_sales_split_pos.info()
|
sale_tri_split[2].info()
|
||||||
|
|||||||
Reference in New Issue
Block a user