From 28e1c182bad1757a36cbfe527ed851aadde07fcd Mon Sep 17 00:00:00 2001 From: Supermjork Date: Tue, 28 Mar 2023 13:13:07 +0200 Subject: [PATCH] Circular bad --- .gitignore | 1 + py_scripts/engineer.py | 8 ++++---- py_scripts/gunner.py | 11 ++++++----- py_scripts/mining_hq.py | 30 ++++++++++++++++++++++++++++-- 4 files changed, 39 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 948496b..dc7b073 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,4 @@ dmypy.json .vscode/ jupyter-notes/merged_games.csv datasets/videogames/vgsales.csv +datasets/videogames/games_merged.csv diff --git a/py_scripts/engineer.py b/py_scripts/engineer.py index 9c93815..ab22516 100644 --- a/py_scripts/engineer.py +++ b/py_scripts/engineer.py @@ -1,15 +1,15 @@ # Visualisations for Data import matplotlib.pyplot as plt import seaborn as sns -import gunner +import mining_hq from numpy import count_nonzero sns.set() plt.xticks(rotation = 90) -games_pre = gunner.game_sales_NA_pre -games_dur = gunner.game_sales_NA_dur -games_pos = gunner.game_sales_NA_pos +games_pre = mining_hq.games_sales_split_pre +games_dur = mining_hq.games_sales_split_dur +games_pos = mining_hq.games_sales_split_pos games_fig_pre = sns.barplot(data = games_pre, x = "Year", y = "NA_Sales", estimator = count_nonzero) plt.show() diff --git a/py_scripts/gunner.py b/py_scripts/gunner.py index 238f872..44b1773 100644 --- a/py_scripts/gunner.py +++ b/py_scripts/gunner.py @@ -3,6 +3,7 @@ import pandas as pd import numpy as np +import mining_hq # Sharing the dataset variables # Games' data @@ -22,7 +23,7 @@ global crime_US global crime_CA # Loading Datasets -games_merged = pd.read_csv('datasets/videogames/merged_games.csv') +games_merged = mining_hq.games_merged_dat crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx') crime_US = pd.read_csv('datasets/crime/report.csv') @@ -54,14 +55,14 @@ games_merged.info() # Regarding the vgsales-12-4-2019 dataset # Considering we will be using a US (probs CA too) crime datasets # It wouldn't be that useful to have other columns regarding other regions -NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'] -GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales'] +NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales', 'User_Score', 'GameName', 'Review', ''] +GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales', 'User_Score', 'GameName', 'Review', ''] game_sales_NA = games_merged.drop(columns = NA_col_list, axis = 1) game_sales_GLO = games_merged.drop(columns = GLO_col_list, axis = 1) -print(f"Game Sales for NA:\n{game_sales_NA.head(10)} \nWith minimum year being: {game_sales_NA['Year'].min()}") -print(f"Game Sales Globally:\n{game_sales_GLO.head(10)}\nWith minimum year being: {game_sales_GLO['Year'].min()}") +print(f"Game Sales for NA:\n{game_sales_NA.head(5)} \nWith minimum year being: {game_sales_NA['Year'].min()}") +print(f"Game Sales Globally:\n{game_sales_GLO.head(5)}\nWith minimum year being: {game_sales_GLO['Year'].min()}") # Getting the range of years which both datasets share crime_year_min = max(crime_US['report_year'].min(), crime_CA['year'].min()) diff --git a/py_scripts/mining_hq.py b/py_scripts/mining_hq.py index 10b4f12..540666e 100644 --- a/py_scripts/mining_hq.py +++ b/py_scripts/mining_hq.py @@ -4,14 +4,40 @@ import pandas as pd import numpy as np import seaborn as sns import digger - -import pandas as pd from tkinter.filedialog import askopenfilename +# Instantiating globals to be used in other files +global games_merged_dat +global games_sales_split_pre +global games_sales_split_dur +global games_sales_split_pos + games_review = pd.read_csv("datasets/videogames/Games.xls") games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv") games_review_phase1 = digger.slice_column(games_review, "GameName", "Review") games_review_final = digger.slice_column(games_review, "GameName", "(Import)") + games_merged_dat = digger.write_joined_df(games_sales, games_review_final) + +# Acquisition of Merged dataset games_merged_dat.to_csv("datasets/videogames/games_merged.csv") + +import gunner +# Collecting Split-Up Datasets +games_sales_split_pre = gunner.game_sales_NA_pre + +games_sales_split_dur = gunner.game_sales_NA_dur + +games_sales_split_pos = gunner.game_sales_NA_pos + +# Displaying Acquired Data +print("Acquired Datasets:\n") +games_sales_split_pre.head(5) +games_sales_split_dur.head(5) +games_sales_split_pos.head(5) + +print("Dataset Info:\n") +games_sales_split_pre.info() +games_sales_split_dur.info() +games_sales_split_pos.info()