Refactoring killed muh bebe

This commit is contained in:
2023-03-28 14:07:50 +02:00
parent a7abb05567
commit fea7af62f6
2 changed files with 22 additions and 42 deletions

View File

@@ -3,34 +3,7 @@
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import mining_hq
# Sharing the dataset variables
# Games' data
# Loading Datasets
games_merged = mining_hq.games_merged_dat
victim1 = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
victim2 = pd.read_csv("datasets/crime/report.csv")
# Printing information regarding datasets
print("Game Datasets' Info:\n")
games_merged.info()
print("Crime Datasets' Info:\n")
victim2.info()
victim1.info()
# Regarding the Games.xls dataset:
# Coercing the non-numeric values will result in NaN
# thus allowing easier removal through `.notnull()`
# Regarding the vgsales-12-4-2019 dataset
# Considering we will be using a US (probs CA too) crime datasets
# It wouldn't be that useful to have other columns regarding other regions
def drop_kick(col_list, dataframe): def drop_kick(col_list, dataframe):
return dataframe.drop(columns=col_list, axis=1) return dataframe.drop(columns=col_list, axis=1)
@@ -38,8 +11,8 @@ def drop_kick(col_list, dataframe):
# Getting the range of years which both datasets share # Getting the range of years which both datasets share
def year_interval(victim1, victim2, col1, col2): def year_interval(victim1, victim2, col1, col2):
return ( return (
min(victim2[col2].max(), victim1[col1].max()),
max(victim2[col2].min(), victim1[col1].min()), max(victim2[col2].min(), victim1[col1].min()),
min(victim2[col2].max(), victim1[col1].max()),
) )

View File

@@ -4,7 +4,7 @@ import pandas as pd
import numpy as np import numpy as np
import seaborn as sns import seaborn as sns
import digger import digger
from tkinter.filedialog import askopenfilename import gunner
# Instantiating globals to be used in other files # Instantiating globals to be used in other files
global games_merged_dat global games_merged_dat
@@ -23,7 +23,16 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
# Acquisition of Merged dataset # Acquisition of Merged dataset
games_merged_dat.to_csv("datasets/videogames/games_merged.csv") games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
import gunner # Loading Crime Datasets
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
crime_US = pd.read_csv('datasets/crime/report.csv')
year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year")
print(year_interval[0], year_interval[1])
crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, "report_year", "year")
NA_col_list = [ NA_col_list = [
"PAL_Sales", "PAL_Sales",
@@ -33,7 +42,6 @@ NA_col_list = [
"User_Score", "User_Score",
"GameName", "GameName",
"Review", "Review",
"",
] ]
GLO_col_list = [ GLO_col_list = [
"PAL_Sales", "PAL_Sales",
@@ -43,22 +51,21 @@ GLO_col_list = [
"User_Score", "User_Score",
"GameName", "GameName",
"Review", "Review",
"",
] ]
# Splitting crime datasets
# Collecting Split-Up Datasets # Collecting Split-Up Datasets
games_sales_split_pre = gunner.game_sales_NA_pre games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
games_sales_split_dur = gunner.game_sales_NA_dur sale_tri_split = gunner.trisect_by_year(games_merged_dat, 'Year', year_interval)
games_sales_split_pos = gunner.game_sales_NA_pos
# Displaying Acquired Data # Displaying Acquired Data
print("Acquired Datasets:\n") print("Acquired Datasets:\n")
games_sales_split_pre.head(5) print(sale_tri_split[0].head(5),
games_sales_split_dur.head(5) sale_tri_split[1].head(5),
games_sales_split_pos.head(5) sale_tri_split[2].head(5))
print("Dataset Info:\n") print("Dataset Info:\n")
games_sales_split_pre.info() sale_tri_split[0].info()
games_sales_split_dur.info() sale_tri_split[1].info()
games_sales_split_pos.info() sale_tri_split[2].info()