I have depression

This commit is contained in:
2023-03-30 10:53:53 +02:00
parent cab64843da
commit 572c0175bf
4 changed files with 34 additions and 17 deletions

View File

@@ -21,13 +21,15 @@ def write_joined_df(left, right, lsuf="new_key"):
return merged return merged
def slam_dunk(dataset, column, size, labels): def slam_dunk(dataset, column, labels):
min_value = dataset[column].min() min_value = dataset[column].min()
max_value = dataset[column].max() max_value = dataset[column].max()
bins = np.linspace(min_value, max_value, size) print("min: ", min_value, " max: ", max_value)
bins = np.linspace(min_value, max_value, len(labels) + 1)
bins
dunked_column = "bin_" + column dunked_column = "bin_" + column
dataset[dunked_column] = pd.cut( dataset[dunked_column] = pd.cut(
dataset[column], bins=bins, labels=labels, include_lowest=True dataset[column], bins=bins, labels=labels, include_lowest=True
) )
return dataset[dunked_column] return dataset

View File

@@ -22,13 +22,13 @@ games_fig2_pre = sns.histplot(data = games_pre, x = "Year", hue = "Genre", multi
plt.show() plt.show()
plt.xticks(rotation = 90) plt.xticks(rotation = 90)
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales") games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales", estimator=count_nonzero)
plt.show() plt.show()
plt.xticks(rotation = 90) plt.xticks(rotation = 90)
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales") games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales", estimator=count_nonzero)
plt.show() plt.show()
plt.xticks(rotation = 90) plt.xticks(rotation = 90)
crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents") crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents", estimator=count_nonzero)
plt.show() plt.show()

View File

@@ -25,7 +25,6 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
# Acquisition of Merged dataset # Acquisition of Merged dataset
print(games_merged_dat.count()) print(games_merged_dat.count())
games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False)
# Loading Crime Datasets # Loading Crime Datasets
crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx") crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
@@ -50,23 +49,26 @@ NA_col_list = [
"JP_Sales", "JP_Sales",
"Other_Sales", "Other_Sales",
"Global_Sales", "Global_Sales",
"User_Score",
"GameName", "GameName",
"Review", "Review",
"Console",
"Score",
] ]
GLO_col_list = [ GLO_col_list = [
"PAL_Sales", "PAL_Sales",
"JP_Sales", "JP_Sales",
"Other_Sales", "Other_Sales",
"NA_Sales", "NA_Sales",
"User_Score",
"GameName", "GameName",
"Review", "Review",
"Console",
"Score",
] ]
# Splitting crime datasets # Splitting crime datasets
# Collecting Split-Up Datasets # Collecting Split-Up Datasets
games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat) games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False)
sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval) sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval)
@@ -89,7 +91,7 @@ games_sales_split_pre.info()
games_sales_split_dur.info() games_sales_split_dur.info()
games_sales_split_pos.info() games_sales_split_pos.info()
print(games_sales_split_dur.describe()) print("Yer forsaken Statistical Description:\n", games_sales_split_dur.describe())
print( print(
games_sales_split_pre.head(5), games_sales_split_pre.head(5),
@@ -105,10 +107,9 @@ print(
# Load merged gammas # Load merged gammas
gammas = pd.read_excel("datasets/videogames/merged_games.xlsx") gammas = pd.read_csv("datasets/videogames/games_merged.csv")
labels = ["smol", "epik", "larg"] labels = ["smol", "epik", "larg"]
gammas["User_Score"] = digger.slam_dunk(gammas, "User_Score", 3, labels=labels) gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels)
gammas = gammas[gammas["Genre"].isna() == False] # gammas = gammas[gammas["Genre"].isna() == False]
gammas = scout.cure_depression(gammas) # gammas = scout.cure_depression(gammas)
gammas.to_csv("output.csv", index=False) gammas.to_csv("output.csv", index=False)
scout.regression_expression(gammas, "Global_Sales", 0)

View File

@@ -1,8 +1,9 @@
# Regression/Prediction (Totally gonna do later trust bro) # Regression/Prediction (Totally gonna do later trust bro)
from sklearn.linear_model import LinearRegression from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer from sklearn.impute import SimpleImputer
from sklearn import preprocessing
import numpy as np import numpy as np
import pandas as pd
def cure_depression(dataset): def cure_depression(dataset):
@@ -16,6 +17,7 @@ def cure_depression(dataset):
return dataset return dataset
# Fuck you and whateevr you fucking stand for you dumb whore faggot
def regression_expression(dataset, column, missing_value): def regression_expression(dataset, column, missing_value):
lr = LinearRegression() lr = LinearRegression()
numeric = dataset.select_dtypes(include=np.number) numeric = dataset.select_dtypes(include=np.number)
@@ -32,9 +34,21 @@ def regression_expression(dataset, column, missing_value):
y = traindf[column] y = traindf[column]
traindf.drop(column, axis=1, inplace=True) traindf.drop(column, axis=1, inplace=True)
lr.fit(traindf, y) lr.fit(traindf, y)
testdf.drop(column, axis=1, inplace=True)
pred = lr.predict(testdf) pred = lr.predict(testdf)
# can't put this in data set directly because length no match # can't put this in data set directly because length no match
# join testdf and traindf to form dataset perhaps?? # join testdf and traindf to form dataset perhaps??
testdf[column] = pred testdf[column] = pred
print(testdf.head(30)) print(testdf.head(30))
# https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing
# That helps ^
# This boi should work, idk i'm implementing blindly
def scaling_zscore(datashitter, col):
scaler = preprocessing.StandardScaler().fit(datashitter[col])
return scaler.transform(datashitter[col])
def scaling_range(datashitter, col):
nonnull = datashitter[col].isna()
minmax_scaler = preprocessing.MinMaxScaler()
trainer = minmax_scaler.fit_transform(datashitter[nonnull])
return minmax_scaler.transform(datashitter[col])