Circular bad

2023-03-28 13:13:07 +02:00
parent ae9ca5eee8
commit 28e1c182ba
4 changed files with 39 additions and 11 deletions
--- a/py_scripts/gunner.py
+++ b/py_scripts/gunner.py
@@ -3,6 +3,7 @@

 import pandas as pd
 import numpy as np
+import mining_hq

 # Sharing the dataset variables
 # Games' data
@@ -22,7 +23,7 @@ global crime_US
 global crime_CA

 # Loading Datasets
-games_merged = pd.read_csv('datasets/videogames/merged_games.csv')
+games_merged = mining_hq.games_merged_dat

 crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
 crime_US = pd.read_csv('datasets/crime/report.csv')
@@ -54,14 +55,14 @@ games_merged.info()
 # Regarding the vgsales-12-4-2019 dataset
 # Considering we will be using a US (probs CA too) crime datasets
 # It wouldn't be that useful to have other columns regarding other regions
-NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']
-GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales']
+NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales', 'User_Score', 'GameName', 'Review', '']
+GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales', 'User_Score', 'GameName', 'Review', '']

 game_sales_NA = games_merged.drop(columns = NA_col_list, axis = 1)
 game_sales_GLO = games_merged.drop(columns = GLO_col_list, axis = 1)

-print(f"Game Sales for NA:\n{game_sales_NA.head(10)} \nWith minimum year being: {game_sales_NA['Year'].min()}")
-print(f"Game Sales Globally:\n{game_sales_GLO.head(10)}\nWith minimum year being: {game_sales_GLO['Year'].min()}")
+print(f"Game Sales for NA:\n{game_sales_NA.head(5)} \nWith minimum year being: {game_sales_NA['Year'].min()}")
+print(f"Game Sales Globally:\n{game_sales_GLO.head(5)}\nWith minimum year being: {game_sales_GLO['Year'].min()}")

 # Getting the range of years which both datasets share
 crime_year_min = max(crime_US['report_year'].min(), crime_CA['year'].min())