Compare commits

14 Commits

Author SHA1 Message Date
Mjørk
e6c8c70ad6 Merge pull request #7 from LinlyBoi/splittermans
Splittermans
2023-05-15 23:23:09 +03:00
f59015aa81 Visualisation 100 2023-05-15 23:15:31 +03:00
LinlyBoi
413f7a8f1f yeah you got this chief 2023-05-15 22:42:50 +03:00
9c09d5649a Critik zkore naught n 1 2023-05-15 22:38:02 +03:00
LinlyBoi
8131670c57 dont fuck 2023-05-15 22:29:21 +03:00
1e552f6c6e DUMDUM PYDOR 2: Electrid DAETh 2023-05-15 21:49:51 +03:00
5cf650e9dc DUMDUM PYDOR 2023-05-15 21:16:35 +03:00
29d1e75817 Nuked Linly's splitting in mining_hq.py 2023-05-15 20:20:01 +03:00
LinlyBoi
64fc005bfc yaya YEET 2023-05-15 19:48:38 +03:00
LinlyBoi
701c3c6a87 kmeans + naive based 2023-05-15 19:11:38 +03:00
LinlyBoi
65d268a902 migrated naive based stuff to notebook 2023-05-15 19:11:26 +03:00
LinlyBoi
957451ae33 python kept yelling at me 2023-05-15 16:33:40 +03:00
LinlyBoi
d01fa8ee1d kmeans 2023-05-15 16:17:07 +03:00
LinlyBoi
4dffa3dc88 In de splittingng 2023-05-15 15:07:24 +03:00
6 changed files with 435 additions and 83 deletions

2
.gitignore vendored
View File

@@ -140,3 +140,5 @@ output.csv
output.xlsx output.xlsx
.gitignore .gitignore
datasets/videogames/games_train.csv
datasets/videogames/games_test.csv

File diff suppressed because one or more lines are too long

View File

@@ -29,6 +29,7 @@ def slam_dunk(dataset, column, labels):
max_value = dataset[column].max() max_value = dataset[column].max()
print("min: ", min_value, " max: ", max_value) print("min: ", min_value, " max: ", max_value)
bins = np.linspace(min_value, max_value, len(labels) + 1) bins = np.linspace(min_value, max_value, len(labels) + 1)
bins
dunked_column = "bin_" + column dunked_column = "bin_" + column
dataset[dunked_column] = pd.cut( dataset[dunked_column] = pd.cut(

View File

@@ -2,7 +2,6 @@
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns import seaborn as sns
import pandas as pd import pandas as pd
import plotly.express as exp
import mining_hq import mining_hq
from numpy import count_nonzero from numpy import count_nonzero
@@ -18,8 +17,7 @@ custom_params = {"axes.spines.right": False, "axes.spines.top": False}
sns.set_theme(style = 'ticks', rc = custom_params) sns.set_theme(style = 'ticks', rc = custom_params)
plt.xticks(rotation = 90) plt.xticks(rotation = 90)
games_fig_pre = sns.histplot(data = games_pre, x = "Year", kde = True) games_fig_pre = sns.histplot(data = games_pre, x = "Year", palette = sns.color_palette("flare"), kde = True)
games_fig_pre.set_title('Game Sales Pre-2000')
plt.show() plt.show()
plt.xticks(rotation = 90) plt.xticks(rotation = 90)
@@ -62,7 +60,3 @@ plt.xticks(rotation = 90)
games_crime_dur = sns.jointplot(data = games_dur, x = "Year", y = 'Violent_US') games_crime_dur = sns.jointplot(data = games_dur, x = "Year", y = 'Violent_US')
plt.close(1) plt.close(1)
plt.show() plt.show()
# Need to floor the years, shows trailing bars on the histogram :/
test_fig = exp.histogram(games_dur, x = "Year")
test_fig.show()

View File

@@ -2,9 +2,16 @@
# Collects stuff from the rest of the scripts # Collects stuff from the rest of the scripts
import pandas as pd import pandas as pd
import numpy as np import numpy as np
# containment breach # containment breach
import scipy as scp import scipy as scp
import gunner, digger, gunner, scout from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
import gunner
import digger
import scout
# Instantiating globals to be used in other files # Instantiating globals to be used in other files
global games_merged_dat global games_merged_dat
@@ -13,7 +20,9 @@ global games_sales_split_dur
global games_sales_split_pos global games_sales_split_pos
games_review = pd.read_csv("datasets/videogames/Games.xls") games_review = pd.read_csv("datasets/videogames/Games.xls")
games_sales = scout.cure_depression(pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")) games_sales = scout.cure_depression(
pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
)
print(games_review.count()) print(games_review.count())
print(games_sales.count()) print(games_sales.count())
@@ -23,6 +32,7 @@ games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
games_merged_dat = digger.write_joined_df(games_sales, games_review_final) games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
# Acquisition of Merged dataset # Acquisition of Merged dataset
print(games_merged_dat.count()) print(games_merged_dat.count())
@@ -103,12 +113,12 @@ gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels)
# Also need to transform using Z-score (normal distr go brrrr lmao), or min-max # Also need to transform using Z-score (normal distr go brrrr lmao), or min-max
# ah, scheiße # ah, scheiße
# nvm, done, kekW # nvm, done, kekW
gammas['Critic_Score_Norm'] = scout.scaling_zscore(gammas, 'Critic_Score') gammas["Critic_Score_Norm"] = scout.scaling_zscore(gammas, "Critic_Score")
print(gammas['Critic_Score_Norm'].head(10)) print(gammas["Critic_Score_Norm"].head(10))
# Saving all into a file # Saving all into a file
gammas = gammas.dropna(how="any", axis=0) # nuke them empty poopers
gammas.to_csv("datasets/videogames/games_cleanish.csv", index=False) gammas.to_csv("datasets/videogames/games_cleanish.csv", index=False)
# Need similarity and dissimialrity, scipy time # Need similarity and dissimialrity, scipy time
# Selecting 5 random rows # Selecting 5 random rows
chosen_idx = np.random.choice(len(gammas), replace=False, size=5) chosen_idx = np.random.choice(len(gammas), replace=False, size=5)

BIN
dwarves/tre.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 886 KiB