From 572c0175bf4130e8a07174aaaecf7fb283155c2e Mon Sep 17 00:00:00 2001 From: Supermjork Date: Thu, 30 Mar 2023 10:53:53 +0200 Subject: [PATCH] I have depression --- py_scripts/digger.py | 8 +++++--- py_scripts/engineer.py | 6 +++--- py_scripts/mining_hq.py | 19 ++++++++++--------- py_scripts/scout.py | 18 ++++++++++++++++-- 4 files changed, 34 insertions(+), 17 deletions(-) diff --git a/py_scripts/digger.py b/py_scripts/digger.py index f0e9f99..16b92a0 100644 --- a/py_scripts/digger.py +++ b/py_scripts/digger.py @@ -21,13 +21,15 @@ def write_joined_df(left, right, lsuf="new_key"): return merged -def slam_dunk(dataset, column, size, labels): +def slam_dunk(dataset, column, labels): min_value = dataset[column].min() max_value = dataset[column].max() - bins = np.linspace(min_value, max_value, size) + print("min: ", min_value, " max: ", max_value) + bins = np.linspace(min_value, max_value, len(labels) + 1) + bins dunked_column = "bin_" + column dataset[dunked_column] = pd.cut( dataset[column], bins=bins, labels=labels, include_lowest=True ) - return dataset[dunked_column] + return dataset diff --git a/py_scripts/engineer.py b/py_scripts/engineer.py index f3c089d..ceda338 100644 --- a/py_scripts/engineer.py +++ b/py_scripts/engineer.py @@ -22,13 +22,13 @@ games_fig2_pre = sns.histplot(data = games_pre, x = "Year", hue = "Genre", multi plt.show() plt.xticks(rotation = 90) -games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales") +games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales", estimator=count_nonzero) plt.show() plt.xticks(rotation = 90) -games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales") +games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales", estimator=count_nonzero) plt.show() plt.xticks(rotation = 90) -crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents") +crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents", estimator=count_nonzero) plt.show() diff --git a/py_scripts/mining_hq.py b/py_scripts/mining_hq.py index b4ec933..4da0777 100644 --- a/py_scripts/mining_hq.py +++ b/py_scripts/mining_hq.py @@ -25,7 +25,6 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final) # Acquisition of Merged dataset print(games_merged_dat.count()) -games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False) # Loading Crime Datasets crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx") @@ -50,23 +49,26 @@ NA_col_list = [ "JP_Sales", "Other_Sales", "Global_Sales", - "User_Score", "GameName", "Review", + "Console", + "Score", ] GLO_col_list = [ "PAL_Sales", "JP_Sales", "Other_Sales", "NA_Sales", - "User_Score", "GameName", "Review", + "Console", + "Score", ] # Splitting crime datasets # Collecting Split-Up Datasets games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat) +games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False) sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval) @@ -89,7 +91,7 @@ games_sales_split_pre.info() games_sales_split_dur.info() games_sales_split_pos.info() -print(games_sales_split_dur.describe()) +print("Yer forsaken Statistical Description:\n", games_sales_split_dur.describe()) print( games_sales_split_pre.head(5), @@ -105,10 +107,9 @@ print( # Load merged gammas -gammas = pd.read_excel("datasets/videogames/merged_games.xlsx") +gammas = pd.read_csv("datasets/videogames/games_merged.csv") labels = ["smol", "epik", "larg"] -gammas["User_Score"] = digger.slam_dunk(gammas, "User_Score", 3, labels=labels) -gammas = gammas[gammas["Genre"].isna() == False] -gammas = scout.cure_depression(gammas) +gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels) +# gammas = gammas[gammas["Genre"].isna() == False] +# gammas = scout.cure_depression(gammas) gammas.to_csv("output.csv", index=False) -scout.regression_expression(gammas, "Global_Sales", 0) diff --git a/py_scripts/scout.py b/py_scripts/scout.py index 2ea4073..0d79cbb 100644 --- a/py_scripts/scout.py +++ b/py_scripts/scout.py @@ -1,8 +1,9 @@ # Regression/Prediction (Totally gonna do later trust bro) from sklearn.linear_model import LinearRegression - from sklearn.impute import SimpleImputer +from sklearn import preprocessing import numpy as np +import pandas as pd def cure_depression(dataset): @@ -16,6 +17,7 @@ def cure_depression(dataset): return dataset +# Fuck you and whateevr you fucking stand for you dumb whore faggot def regression_expression(dataset, column, missing_value): lr = LinearRegression() numeric = dataset.select_dtypes(include=np.number) @@ -32,9 +34,21 @@ def regression_expression(dataset, column, missing_value): y = traindf[column] traindf.drop(column, axis=1, inplace=True) lr.fit(traindf, y) - testdf.drop(column, axis=1, inplace=True) pred = lr.predict(testdf) # can't put this in data set directly because length no match # join testdf and traindf to form dataset perhaps?? testdf[column] = pred print(testdf.head(30)) + +# https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing +# That helps ^ +# This boi should work, idk i'm implementing blindly +def scaling_zscore(datashitter, col): + scaler = preprocessing.StandardScaler().fit(datashitter[col]) + return scaler.transform(datashitter[col]) + +def scaling_range(datashitter, col): + nonnull = datashitter[col].isna() + minmax_scaler = preprocessing.MinMaxScaler() + trainer = minmax_scaler.fit_transform(datashitter[nonnull]) + return minmax_scaler.transform(datashitter[col])