I have depression

This commit is contained in:
2023-03-30 10:53:53 +02:00
parent cab64843da
commit 572c0175bf
4 changed files with 34 additions and 17 deletions

View File

@@ -21,13 +21,15 @@ def write_joined_df(left, right, lsuf="new_key"):
return merged
def slam_dunk(dataset, column, size, labels):
def slam_dunk(dataset, column, labels):
min_value = dataset[column].min()
max_value = dataset[column].max()
bins = np.linspace(min_value, max_value, size)
print("min: ", min_value, " max: ", max_value)
bins = np.linspace(min_value, max_value, len(labels) + 1)
bins
dunked_column = "bin_" + column
dataset[dunked_column] = pd.cut(
dataset[column], bins=bins, labels=labels, include_lowest=True
)
return dataset[dunked_column]
return dataset

View File

@@ -22,13 +22,13 @@ games_fig2_pre = sns.histplot(data = games_pre, x = "Year", hue = "Genre", multi
plt.show()
plt.xticks(rotation = 90)
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales")
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales", estimator=count_nonzero)
plt.show()
plt.xticks(rotation = 90)
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales")
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales", estimator=count_nonzero)
plt.show()
plt.xticks(rotation = 90)
crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents")
crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents", estimator=count_nonzero)
plt.show()

View File

@@ -25,7 +25,6 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
# Acquisition of Merged dataset
print(games_merged_dat.count())
games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False)
# Loading Crime Datasets
crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
@@ -50,23 +49,26 @@ NA_col_list = [
"JP_Sales",
"Other_Sales",
"Global_Sales",
"User_Score",
"GameName",
"Review",
"Console",
"Score",
]
GLO_col_list = [
"PAL_Sales",
"JP_Sales",
"Other_Sales",
"NA_Sales",
"User_Score",
"GameName",
"Review",
"Console",
"Score",
]
# Splitting crime datasets
# Collecting Split-Up Datasets
games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False)
sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval)
@@ -89,7 +91,7 @@ games_sales_split_pre.info()
games_sales_split_dur.info()
games_sales_split_pos.info()
print(games_sales_split_dur.describe())
print("Yer forsaken Statistical Description:\n", games_sales_split_dur.describe())
print(
games_sales_split_pre.head(5),
@@ -105,10 +107,9 @@ print(
# Load merged gammas
gammas = pd.read_excel("datasets/videogames/merged_games.xlsx")
gammas = pd.read_csv("datasets/videogames/games_merged.csv")
labels = ["smol", "epik", "larg"]
gammas["User_Score"] = digger.slam_dunk(gammas, "User_Score", 3, labels=labels)
gammas = gammas[gammas["Genre"].isna() == False]
gammas = scout.cure_depression(gammas)
gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels)
# gammas = gammas[gammas["Genre"].isna() == False]
# gammas = scout.cure_depression(gammas)
gammas.to_csv("output.csv", index=False)
scout.regression_expression(gammas, "Global_Sales", 0)

View File

@@ -1,8 +1,9 @@
# Regression/Prediction (Totally gonna do later trust bro)
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn import preprocessing
import numpy as np
import pandas as pd
def cure_depression(dataset):
@@ -16,6 +17,7 @@ def cure_depression(dataset):
return dataset
# Fuck you and whateevr you fucking stand for you dumb whore faggot
def regression_expression(dataset, column, missing_value):
lr = LinearRegression()
numeric = dataset.select_dtypes(include=np.number)
@@ -32,9 +34,21 @@ def regression_expression(dataset, column, missing_value):
y = traindf[column]
traindf.drop(column, axis=1, inplace=True)
lr.fit(traindf, y)
testdf.drop(column, axis=1, inplace=True)
pred = lr.predict(testdf)
# can't put this in data set directly because length no match
# join testdf and traindf to form dataset perhaps??
testdf[column] = pred
print(testdf.head(30))
# https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing
# That helps ^
# This boi should work, idk i'm implementing blindly
def scaling_zscore(datashitter, col):
scaler = preprocessing.StandardScaler().fit(datashitter[col])
return scaler.transform(datashitter[col])
def scaling_range(datashitter, col):
nonnull = datashitter[col].isna()
minmax_scaler = preprocessing.MinMaxScaler()
trainer = minmax_scaler.fit_transform(datashitter[nonnull])
return minmax_scaler.transform(datashitter[col])