R visualisation better

2023-03-28 20:41:46 +02:00
parent ca748eb57e
commit 300ce67b60
2 changed files with 27 additions and 17 deletions
--- a/py_scripts/engineer.py
+++ b/py_scripts/engineer.py
@@ -5,19 +5,30 @@ import mining_hq
 from numpy import count_nonzero

 sns.set()
-plt.xticks(rotation = 90)

 games_pre = mining_hq.games_sales_split_pre
 games_dur = mining_hq.games_sales_split_dur
 games_pos = mining_hq.games_sales_split_pos

-games_fig_pre = sns.barplot(data = games_pre, x = "Year", y = "NA_Sales", estimator = count_nonzero)
+crime_US = mining_hq.crime_US_intersect
+crime_CA = mining_hq.crime_CA_intersect
+
+plt.xticks(rotation = 90)
+games_fig_pre = sns.histplot(data = games_pre, x = "Year", palette = sns.color_palette("flare"), kde = True)
 plt.show()

 plt.xticks(rotation = 90)
-games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales", estimator = count_nonzero)
+games_fig2_pre = sns.histplot(data = games_pre, x = "Year", hue = "Genre", multiple = "stack", shrink = 0.65)
 plt.show()

 plt.xticks(rotation = 90)
-games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales", estimator = count_nonzero)
+games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales")
+plt.show()
+
+plt.xticks(rotation = 90)
+games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales")
+plt.show()
+
+plt.xticks(rotation = 90)
+crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents")
 plt.show()
--- a/py_scripts/mining_hq.py
+++ b/py_scripts/mining_hq.py
@@ -3,8 +3,7 @@
 import pandas as pd
 import numpy as np
 import seaborn as sns
-import digger
-import gunner
+import digger, gunner

 # Instantiating globals to be used in other files
 global games_merged_dat
@@ -24,7 +23,7 @@ games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
 games_merged_dat = digger.write_joined_df(games_sales, games_review_final)

 # Acquisition of Merged dataset
-print(games_merged_dat.isnull())
+print(games_merged_dat.count())
 games_merged_dat.to_csv("datasets/videogames/games_merged.csv")

 # Loading Crime Datasets
@@ -70,22 +69,22 @@ games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)

 sale_tri_split = gunner.trisect_by_year(games_merged_dat, 'Year', year_interval)

-game_sales_split_pre = sale_tri_split[0]
-game_sales_split_dur = sale_tri_split[1]
-game_sales_split_pos = sale_tri_split[2]
+games_sales_split_pre = sale_tri_split[0]
+games_sales_split_dur = sale_tri_split[1]
+games_sales_split_pos = sale_tri_split[2]

 # Displaying Acquired Data
 print("Acquired Datasets:\n")
-print(game_sales_split_pre.head(5),
-game_sales_split_dur.head(5),
-game_sales_split_pos.head(5))
+print(games_sales_split_pre.head(5),
+games_sales_split_dur.head(5),
+games_sales_split_pos.head(5))

 print("Dataset Info:\n")
-game_sales_split_pre.info()
-game_sales_split_dur.info()
-game_sales_split_pos.info()
+games_sales_split_pre.info()
+games_sales_split_dur.info()
+games_sales_split_pos.info()

-print(game_sales_split_dur.describe())
+print(games_sales_split_dur.describe())

 # Required to use binning for cleaning, idk
 # https://towardsdatascience.com/data-preprocessing-with-python-pandas-part-5-binning-c5bd5fd1b950