Circular bad

This commit is contained in:
2023-03-28 13:13:07 +02:00
parent ae9ca5eee8
commit 28e1c182ba
4 changed files with 39 additions and 11 deletions

1
.gitignore vendored
View File

@@ -131,3 +131,4 @@ dmypy.json
.vscode/ .vscode/
jupyter-notes/merged_games.csv jupyter-notes/merged_games.csv
datasets/videogames/vgsales.csv datasets/videogames/vgsales.csv
datasets/videogames/games_merged.csv

View File

@@ -1,15 +1,15 @@
# Visualisations for Data # Visualisations for Data
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns import seaborn as sns
import gunner import mining_hq
from numpy import count_nonzero from numpy import count_nonzero
sns.set() sns.set()
plt.xticks(rotation = 90) plt.xticks(rotation = 90)
games_pre = gunner.game_sales_NA_pre games_pre = mining_hq.games_sales_split_pre
games_dur = gunner.game_sales_NA_dur games_dur = mining_hq.games_sales_split_dur
games_pos = gunner.game_sales_NA_pos games_pos = mining_hq.games_sales_split_pos
games_fig_pre = sns.barplot(data = games_pre, x = "Year", y = "NA_Sales", estimator = count_nonzero) games_fig_pre = sns.barplot(data = games_pre, x = "Year", y = "NA_Sales", estimator = count_nonzero)
plt.show() plt.show()

View File

@@ -3,6 +3,7 @@
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import mining_hq
# Sharing the dataset variables # Sharing the dataset variables
# Games' data # Games' data
@@ -22,7 +23,7 @@ global crime_US
global crime_CA global crime_CA
# Loading Datasets # Loading Datasets
games_merged = pd.read_csv('datasets/videogames/merged_games.csv') games_merged = mining_hq.games_merged_dat
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx') crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
crime_US = pd.read_csv('datasets/crime/report.csv') crime_US = pd.read_csv('datasets/crime/report.csv')
@@ -54,14 +55,14 @@ games_merged.info()
# Regarding the vgsales-12-4-2019 dataset # Regarding the vgsales-12-4-2019 dataset
# Considering we will be using a US (probs CA too) crime datasets # Considering we will be using a US (probs CA too) crime datasets
# It wouldn't be that useful to have other columns regarding other regions # It wouldn't be that useful to have other columns regarding other regions
NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'] NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales', 'User_Score', 'GameName', 'Review', '']
GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales'] GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales', 'User_Score', 'GameName', 'Review', '']
game_sales_NA = games_merged.drop(columns = NA_col_list, axis = 1) game_sales_NA = games_merged.drop(columns = NA_col_list, axis = 1)
game_sales_GLO = games_merged.drop(columns = GLO_col_list, axis = 1) game_sales_GLO = games_merged.drop(columns = GLO_col_list, axis = 1)
print(f"Game Sales for NA:\n{game_sales_NA.head(10)} \nWith minimum year being: {game_sales_NA['Year'].min()}") print(f"Game Sales for NA:\n{game_sales_NA.head(5)} \nWith minimum year being: {game_sales_NA['Year'].min()}")
print(f"Game Sales Globally:\n{game_sales_GLO.head(10)}\nWith minimum year being: {game_sales_GLO['Year'].min()}") print(f"Game Sales Globally:\n{game_sales_GLO.head(5)}\nWith minimum year being: {game_sales_GLO['Year'].min()}")
# Getting the range of years which both datasets share # Getting the range of years which both datasets share
crime_year_min = max(crime_US['report_year'].min(), crime_CA['year'].min()) crime_year_min = max(crime_US['report_year'].min(), crime_CA['year'].min())

View File

@@ -4,14 +4,40 @@ import pandas as pd
import numpy as np import numpy as np
import seaborn as sns import seaborn as sns
import digger import digger
import pandas as pd
from tkinter.filedialog import askopenfilename from tkinter.filedialog import askopenfilename
# Instantiating globals to be used in other files
global games_merged_dat
global games_sales_split_pre
global games_sales_split_dur
global games_sales_split_pos
games_review = pd.read_csv("datasets/videogames/Games.xls") games_review = pd.read_csv("datasets/videogames/Games.xls")
games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv") games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
games_review_phase1 = digger.slice_column(games_review, "GameName", "Review") games_review_phase1 = digger.slice_column(games_review, "GameName", "Review")
games_review_final = digger.slice_column(games_review, "GameName", "(Import)") games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
games_merged_dat = digger.write_joined_df(games_sales, games_review_final) games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
# Acquisition of Merged dataset
games_merged_dat.to_csv("datasets/videogames/games_merged.csv") games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
import gunner
# Collecting Split-Up Datasets
games_sales_split_pre = gunner.game_sales_NA_pre
games_sales_split_dur = gunner.game_sales_NA_dur
games_sales_split_pos = gunner.game_sales_NA_pos
# Displaying Acquired Data
print("Acquired Datasets:\n")
games_sales_split_pre.head(5)
games_sales_split_dur.head(5)
games_sales_split_pos.head(5)
print("Dataset Info:\n")
games_sales_split_pre.info()
games_sales_split_dur.info()
games_sales_split_pos.info()