From fea7af62f69257378eb672edf245ad69d9b3b11b Mon Sep 17 00:00:00 2001 From: Supermjork Date: Tue, 28 Mar 2023 14:07:50 +0200 Subject: [PATCH] Refactoring killed muh bebe --- py_scripts/gunner.py | 29 +---------------------------- py_scripts/mining_hq.py | 35 +++++++++++++++++++++-------------- 2 files changed, 22 insertions(+), 42 deletions(-) diff --git a/py_scripts/gunner.py b/py_scripts/gunner.py index ad454ad..0f8e503 100644 --- a/py_scripts/gunner.py +++ b/py_scripts/gunner.py @@ -3,34 +3,7 @@ import pandas as pd import numpy as np -import mining_hq -# Sharing the dataset variables -# Games' data - -# Loading Datasets -games_merged = mining_hq.games_merged_dat - -victim1 = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx") -victim2 = pd.read_csv("datasets/crime/report.csv") - -# Printing information regarding datasets -print("Game Datasets' Info:\n") -games_merged.info() - -print("Crime Datasets' Info:\n") -victim2.info() -victim1.info() - - -# Regarding the Games.xls dataset: -# Coercing the non-numeric values will result in NaN -# thus allowing easier removal through `.notnull()` - - -# Regarding the vgsales-12-4-2019 dataset -# Considering we will be using a US (probs CA too) crime datasets -# It wouldn't be that useful to have other columns regarding other regions def drop_kick(col_list, dataframe): return dataframe.drop(columns=col_list, axis=1) @@ -38,8 +11,8 @@ def drop_kick(col_list, dataframe): # Getting the range of years which both datasets share def year_interval(victim1, victim2, col1, col2): return ( - min(victim2[col2].max(), victim1[col1].max()), max(victim2[col2].min(), victim1[col1].min()), + min(victim2[col2].max(), victim1[col1].max()), ) diff --git a/py_scripts/mining_hq.py b/py_scripts/mining_hq.py index 9a6854f..0c628de 100644 --- a/py_scripts/mining_hq.py +++ b/py_scripts/mining_hq.py @@ -4,7 +4,7 @@ import pandas as pd import numpy as np import seaborn as sns import digger -from tkinter.filedialog import askopenfilename +import gunner # Instantiating globals to be used in other files global games_merged_dat @@ -23,7 +23,16 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final) # Acquisition of Merged dataset games_merged_dat.to_csv("datasets/videogames/games_merged.csv") -import gunner +# Loading Crime Datasets +crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx') + +crime_US = pd.read_csv('datasets/crime/report.csv') + +year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year") + +print(year_interval[0], year_interval[1]) + +crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, "report_year", "year") NA_col_list = [ "PAL_Sales", @@ -33,7 +42,6 @@ NA_col_list = [ "User_Score", "GameName", "Review", - "", ] GLO_col_list = [ "PAL_Sales", @@ -43,22 +51,21 @@ GLO_col_list = [ "User_Score", "GameName", "Review", - "", ] + +# Splitting crime datasets # Collecting Split-Up Datasets -games_sales_split_pre = gunner.game_sales_NA_pre +games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat) -games_sales_split_dur = gunner.game_sales_NA_dur - -games_sales_split_pos = gunner.game_sales_NA_pos +sale_tri_split = gunner.trisect_by_year(games_merged_dat, 'Year', year_interval) # Displaying Acquired Data print("Acquired Datasets:\n") -games_sales_split_pre.head(5) -games_sales_split_dur.head(5) -games_sales_split_pos.head(5) +print(sale_tri_split[0].head(5), +sale_tri_split[1].head(5), +sale_tri_split[2].head(5)) print("Dataset Info:\n") -games_sales_split_pre.info() -games_sales_split_dur.info() -games_sales_split_pos.info() +sale_tri_split[0].info() +sale_tri_split[1].info() +sale_tri_split[2].info()