From 300ce67b60c7f078f23a42746904dfb9e765664e Mon Sep 17 00:00:00 2001 From: Supermjork Date: Tue, 28 Mar 2023 20:41:46 +0200 Subject: [PATCH] R visualisation better --- py_scripts/engineer.py | 19 +++++++++++++++---- py_scripts/mining_hq.py | 25 ++++++++++++------------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/py_scripts/engineer.py b/py_scripts/engineer.py index ab22516..f3c089d 100644 --- a/py_scripts/engineer.py +++ b/py_scripts/engineer.py @@ -5,19 +5,30 @@ import mining_hq from numpy import count_nonzero sns.set() -plt.xticks(rotation = 90) games_pre = mining_hq.games_sales_split_pre games_dur = mining_hq.games_sales_split_dur games_pos = mining_hq.games_sales_split_pos -games_fig_pre = sns.barplot(data = games_pre, x = "Year", y = "NA_Sales", estimator = count_nonzero) +crime_US = mining_hq.crime_US_intersect +crime_CA = mining_hq.crime_CA_intersect + +plt.xticks(rotation = 90) +games_fig_pre = sns.histplot(data = games_pre, x = "Year", palette = sns.color_palette("flare"), kde = True) plt.show() plt.xticks(rotation = 90) -games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales", estimator = count_nonzero) +games_fig2_pre = sns.histplot(data = games_pre, x = "Year", hue = "Genre", multiple = "stack", shrink = 0.65) plt.show() plt.xticks(rotation = 90) -games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales", estimator = count_nonzero) +games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales") +plt.show() + +plt.xticks(rotation = 90) +games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales") +plt.show() + +plt.xticks(rotation = 90) +crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents") plt.show() diff --git a/py_scripts/mining_hq.py b/py_scripts/mining_hq.py index c9a8baa..1e64b33 100644 --- a/py_scripts/mining_hq.py +++ b/py_scripts/mining_hq.py @@ -3,8 +3,7 @@ import pandas as pd import numpy as np import seaborn as sns -import digger -import gunner +import digger, gunner # Instantiating globals to be used in other files global games_merged_dat @@ -24,7 +23,7 @@ games_review_final = digger.slice_column(games_review, "GameName", "(Import)") games_merged_dat = digger.write_joined_df(games_sales, games_review_final) # Acquisition of Merged dataset -print(games_merged_dat.isnull()) +print(games_merged_dat.count()) games_merged_dat.to_csv("datasets/videogames/games_merged.csv") # Loading Crime Datasets @@ -70,22 +69,22 @@ games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat) sale_tri_split = gunner.trisect_by_year(games_merged_dat, 'Year', year_interval) -game_sales_split_pre = sale_tri_split[0] -game_sales_split_dur = sale_tri_split[1] -game_sales_split_pos = sale_tri_split[2] +games_sales_split_pre = sale_tri_split[0] +games_sales_split_dur = sale_tri_split[1] +games_sales_split_pos = sale_tri_split[2] # Displaying Acquired Data print("Acquired Datasets:\n") -print(game_sales_split_pre.head(5), -game_sales_split_dur.head(5), -game_sales_split_pos.head(5)) +print(games_sales_split_pre.head(5), +games_sales_split_dur.head(5), +games_sales_split_pos.head(5)) print("Dataset Info:\n") -game_sales_split_pre.info() -game_sales_split_dur.info() -game_sales_split_pos.info() +games_sales_split_pre.info() +games_sales_split_dur.info() +games_sales_split_pos.info() -print(game_sales_split_dur.describe()) +print(games_sales_split_dur.describe()) # Required to use binning for cleaning, idk # https://towardsdatascience.com/data-preprocessing-with-python-pandas-part-5-binning-c5bd5fd1b950