Circular bad

This commit is contained in:
2023-03-28 13:13:07 +02:00
parent ae9ca5eee8
commit 28e1c182ba
4 changed files with 39 additions and 11 deletions

View File

@@ -3,6 +3,7 @@
import pandas as pd
import numpy as np
import mining_hq
# Sharing the dataset variables
# Games' data
@@ -22,7 +23,7 @@ global crime_US
global crime_CA
# Loading Datasets
games_merged = pd.read_csv('datasets/videogames/merged_games.csv')
games_merged = mining_hq.games_merged_dat
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
crime_US = pd.read_csv('datasets/crime/report.csv')
@@ -54,14 +55,14 @@ games_merged.info()
# Regarding the vgsales-12-4-2019 dataset
# Considering we will be using a US (probs CA too) crime datasets
# It wouldn't be that useful to have other columns regarding other regions
NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']
GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales']
NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales', 'User_Score', 'GameName', 'Review', '']
GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales', 'User_Score', 'GameName', 'Review', '']
game_sales_NA = games_merged.drop(columns = NA_col_list, axis = 1)
game_sales_GLO = games_merged.drop(columns = GLO_col_list, axis = 1)
print(f"Game Sales for NA:\n{game_sales_NA.head(10)} \nWith minimum year being: {game_sales_NA['Year'].min()}")
print(f"Game Sales Globally:\n{game_sales_GLO.head(10)}\nWith minimum year being: {game_sales_GLO['Year'].min()}")
print(f"Game Sales for NA:\n{game_sales_NA.head(5)} \nWith minimum year being: {game_sales_NA['Year'].min()}")
print(f"Game Sales Globally:\n{game_sales_GLO.head(5)}\nWith minimum year being: {game_sales_GLO['Year'].min()}")
# Getting the range of years which both datasets share
crime_year_min = max(crime_US['report_year'].min(), crime_CA['year'].min())