Merge pull request #7 from LinlyBoi/splittermans

Splittermans
Visualisation 100
2023-05-15 23:23:09 +03:00 · 2023-05-15 23:15:31 +03:00 · 2023-05-15 22:42:50 +03:00 · 2023-05-15 22:38:02 +03:00 · 2023-05-15 22:29:21 +03:00 · 2023-05-15 21:49:51 +03:00
6 changed files with 435 additions and 83 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -140,3 +140,5 @@ output.csv
 output.xlsx
 .gitignore
 datasets/videogames/games_train.csv
 datasets/videogames/games_test.csv
--- a/dwarves/Mining_HQ.ipynb
+++ b/dwarves/Mining_HQ.ipynb
--- a/dwarves/digger.py
+++ b/dwarves/digger.py
@@ -29,6 +29,7 @@ def slam_dunk(dataset, column, labels):
    max_value = dataset[column].max()
    print("min: ", min_value, " max: ", max_value)
    bins = np.linspace(min_value, max_value, len(labels) + 1)
    bins
    dunked_column = "bin_" + column
    dataset[dunked_column] = pd.cut(
--- a/dwarves/engineer.py
+++ b/dwarves/engineer.py
@@ -2,7 +2,6 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 import pandas as pd
 import plotly.express as exp
 import mining_hq
 from numpy import count_nonzero
@@ -18,8 +17,7 @@ custom_params = {"axes.spines.right": False, "axes.spines.top": False}
 sns.set_theme(style = 'ticks', rc = custom_params)
 plt.xticks(rotation = 90)
-games_fig_pre = sns.histplot(data = games_pre, x = "Year", kde = True)
+games_fig_pre = sns.histplot(data = games_pre, x = "Year", palette = sns.color_palette("flare"), kde = True)
 games_fig_pre.set_title('Game Sales Pre-2000')
 plt.show()
 plt.xticks(rotation = 90)
@@ -62,7 +60,3 @@ plt.xticks(rotation = 90)
 games_crime_dur = sns.jointplot(data = games_dur, x = "Year", y = 'Violent_US')
 plt.close(1)
 plt.show()
 # Need to floor the years, shows trailing bars on the histogram :/
 test_fig = exp.histogram(games_dur, x = "Year")
 test_fig.show()
--- a/dwarves/mining_hq.py
+++ b/dwarves/mining_hq.py
@@ -2,9 +2,16 @@
 # Collects stuff from the rest of the scripts
 import pandas as pd
 import numpy as np
 # containment breach
 import scipy as scp
-import gunner, digger, gunner, scout
+from sklearn.model_selection import train_test_split
 from sklearn.cluster import KMeans
 from sklearn import metrics
 from sklearn.naive_bayes import GaussianNB
 import gunner
 import digger
 import scout
 # Instantiating globals to be used in other files
 global games_merged_dat
@@ -13,7 +20,9 @@ global games_sales_split_dur
 global games_sales_split_pos
 games_review = pd.read_csv("datasets/videogames/Games.xls")
-games_sales = scout.cure_depression(pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv"))
+games_sales = scout.cure_depression(
    pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
 )
 print(games_review.count())
 print(games_sales.count())
@@ -23,6 +32,7 @@ games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
 games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
 # Acquisition of Merged dataset
 print(games_merged_dat.count())
@@ -103,12 +113,12 @@ gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels)
 # Also need to transform using Z-score (normal distr go brrrr lmao), or min-max
 # ah, scheiße
 # nvm, done, kekW
-gammas['Critic_Score_Norm'] = scout.scaling_zscore(gammas, 'Critic_Score')
+gammas["Critic_Score_Norm"] = scout.scaling_zscore(gammas, "Critic_Score")
-print(gammas['Critic_Score_Norm'].head(10))
+print(gammas["Critic_Score_Norm"].head(10))
 # Saving all into a file
 gammas = gammas.dropna(how="any", axis=0)  # nuke them empty poopers
 gammas.to_csv("datasets/videogames/games_cleanish.csv", index=False)
 # Need similarity and dissimialrity, scipy time
 # Selecting 5 random rows
 chosen_idx = np.random.choice(len(gammas), replace=False, size=5)
--- a/dwarves/tre.png
+++ b/dwarves/tre.png
Author	SHA1	Message	Date
Mjørk	e6c8c70ad6	Merge pull request #7 from LinlyBoi/splittermans Splittermans	2023-05-15 23:23:09 +03:00
Supermjork	f59015aa81	Visualisation 100	2023-05-15 23:15:31 +03:00
LinlyBoi	413f7a8f1f	yeah you got this chief	2023-05-15 22:42:50 +03:00
Supermjork	9c09d5649a	Critik zkore naught n 1	2023-05-15 22:38:02 +03:00
LinlyBoi	8131670c57	dont fuck	2023-05-15 22:29:21 +03:00
Supermjork	1e552f6c6e	DUMDUM PYDOR 2: Electrid DAETh	2023-05-15 21:49:51 +03:00
Supermjork	5cf650e9dc	DUMDUM PYDOR	2023-05-15 21:16:35 +03:00
Supermjork	29d1e75817	Nuked Linly's splitting in mining_hq.py	2023-05-15 20:20:01 +03:00
LinlyBoi	64fc005bfc	yaya YEET	2023-05-15 19:48:38 +03:00
LinlyBoi	701c3c6a87	kmeans + naive based	2023-05-15 19:11:38 +03:00
LinlyBoi	65d268a902	migrated naive based stuff to notebook	2023-05-15 19:11:26 +03:00
LinlyBoi	957451ae33	python kept yelling at me	2023-05-15 16:33:40 +03:00
LinlyBoi	d01fa8ee1d	kmeans	2023-05-15 16:17:07 +03:00
LinlyBoi	4dffa3dc88	In de splittingng	2023-05-15 15:07:24 +03:00