From 86107d5bb696cae92e42d267915c381e23922c2c Mon Sep 17 00:00:00 2001 From: Supermjork Date: Thu, 30 Mar 2023 18:22:32 +0200 Subject: [PATCH] Yes --- datasets/videogames/vgsales.csv | 4 ++-- py_scripts/engineer.py | 8 ++++++-- py_scripts/mining_hq.py | 15 +++------------ 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/datasets/videogames/vgsales.csv b/datasets/videogames/vgsales.csv index 5e9c9f6..cc43295 100644 --- a/datasets/videogames/vgsales.csv +++ b/datasets/videogames/vgsales.csv @@ -11592,7 +11592,7 @@ Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,G 11592,Minecraft: Story Mode,PC,2015,Adventure,Mojang,0.02,0.05,0,0.01,0.08 11593,P.N.03 - Product Number Three,GC,2003,Shooter,Capcom,0.06,0.02,0,0,0.08 11594,Samurai Warriors 2,X360,2006,Action,Tecmo Koei,0.07,0.01,0,0.01,0.08 -11595,Boku no Natsuyasumi 3: Hokkoku Hen: Chiisana Boku no Dai Sougen??PS3,2007,Adventure,Sony Computer Entertainment,0,0,0.08,0,0.08, +11595,Boku no Natsuyasumi 3: Hokkoku Hen: Chiisana Boku no Dai Sougen??PS3,X360,2007,Sony Computer Entertainment,0,0,0.08,0,0.08, 11596,Ape Escape: Pumped & Primed,PS2,2004,Misc,Sony Computer Entertainment,0.04,0.03,0,0.01,0.08 11597,Alien Syndrome,PSP,2007,Role-Playing,Sega,0.07,0,0,0.01,0.08 11598,Monster Trucks DS,DS,2005,Racing,Majesco Entertainment,0.07,0,0,0.01,0.08 @@ -13537,7 +13537,7 @@ Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,G 13537,Top Gun,DS,2006,Simulation,505 Games,0.04,0,0,0,0.04 13538,Gem Smashers,3DS,2011,Platform,Crave Entertainment,0.04,0,0,0,0.04 13539,Split/Second,PC,2010,Racing,Disney Interactive Studios,0,0.04,0,0.01,0.04 -13540,B's-LOG Party??PSP,2010,Adventure,Idea Factory,0,0,0.04,0,0.04, +13540,B's-LOG Party??PSP,2010,2010,Idea Factory,0,0,0.04,0,0.04, 13541,King Arthur,GC,2004,Action,Konami Digital Entertainment,0.03,0.01,0,0,0.04 13542,Chicken Shoot,GBA,2005,Action,Zoo Digital Publishing,0.03,0.01,0,0,0.04 13543,Dai Senryaku VII: Modern Military Tactics,XB,2003,Strategy,Kool Kizz,0.03,0.01,0,0,0.04 diff --git a/py_scripts/engineer.py b/py_scripts/engineer.py index a6d3796..c9fca5b 100644 --- a/py_scripts/engineer.py +++ b/py_scripts/engineer.py @@ -4,8 +4,6 @@ import seaborn as sns import mining_hq from numpy import count_nonzero -sns.set() - games_pre = mining_hq.games_sales_split_pre games_dur = mining_hq.games_sales_split_dur games_pos = mining_hq.games_sales_split_pos @@ -13,6 +11,10 @@ games_pos = mining_hq.games_sales_split_pos crime_US = mining_hq.crime_US_intersect crime_CA = mining_hq.crime_CA_intersect +custom_params = {"axes.spines.right": False, "axes.spines.top": False} + +sns.set_theme(style = 'ticks', rc = custom_params) + plt.xticks(rotation = 90) games_fig_pre = sns.histplot(data = games_pre, x = "Year", palette = sns.color_palette("flare"), kde = True) plt.show() @@ -23,6 +25,8 @@ plt.show() plt.xticks(rotation = 90) games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales") +plt.xlabel("Years") +plt.ylabel("Sales in North America (Canada, USA)") plt.show() plt.xticks(rotation = 90) diff --git a/py_scripts/mining_hq.py b/py_scripts/mining_hq.py index 0cfcfb2..a21390d 100644 --- a/py_scripts/mining_hq.py +++ b/py_scripts/mining_hq.py @@ -78,15 +78,6 @@ games_sales_split_dur = sale_tri_split[1] games_sales_split_pos = sale_tri_split[2] # Displaying Acquired Data -print("Acquired Datasets:\n") -print(sale_tri_split[0].head(5), sale_tri_split[1].head(5), sale_tri_split[2].head(5)) - -print("Dataset Info:\n") -sale_tri_split[0].info() -sale_tri_split[1].info() -sale_tri_split[2].info() - - print("Dataset Info:\n") games_sales_split_pre.info() games_sales_split_dur.info() @@ -113,8 +104,8 @@ gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels) # Also need to transform using Z-score (normal distr go brrrr lmao), or min-max # ah, scheiße # nvm, done, kekW -gammas['Critic_Score'] = scout.scaling_zscore(gammas, 'Critic_Score') -print(gammas['Critic_Score'].head(10)) +gammas['Critic_Score_Norm'] = scout.scaling_zscore(gammas, 'Critic_Score') +print(gammas['Critic_Score_Norm'].head(10)) # Saving all into a file gammas.to_csv("output.csv", index=False) @@ -125,4 +116,4 @@ chosen_idx = np.random.choice(len(gammas), replace = False, size = 5) sample_rows = gammas.iloc[chosen_idx] print(sample_rows.head()) -scout.dissimilarity(sample_rows.select_dtypes(include = np.number)) +# scout.dissimilarity(sample_rows.select_dtypes(include = np.number))