I have depression
This commit is contained in:
@@ -21,13 +21,15 @@ def write_joined_df(left, right, lsuf="new_key"):
|
||||
return merged
|
||||
|
||||
|
||||
def slam_dunk(dataset, column, size, labels):
|
||||
def slam_dunk(dataset, column, labels):
|
||||
min_value = dataset[column].min()
|
||||
max_value = dataset[column].max()
|
||||
bins = np.linspace(min_value, max_value, size)
|
||||
print("min: ", min_value, " max: ", max_value)
|
||||
bins = np.linspace(min_value, max_value, len(labels) + 1)
|
||||
bins
|
||||
|
||||
dunked_column = "bin_" + column
|
||||
dataset[dunked_column] = pd.cut(
|
||||
dataset[column], bins=bins, labels=labels, include_lowest=True
|
||||
)
|
||||
return dataset[dunked_column]
|
||||
return dataset
|
||||
|
||||
@@ -22,13 +22,13 @@ games_fig2_pre = sns.histplot(data = games_pre, x = "Year", hue = "Genre", multi
|
||||
plt.show()
|
||||
|
||||
plt.xticks(rotation = 90)
|
||||
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales")
|
||||
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales", estimator=count_nonzero)
|
||||
plt.show()
|
||||
|
||||
plt.xticks(rotation = 90)
|
||||
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales")
|
||||
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales", estimator=count_nonzero)
|
||||
plt.show()
|
||||
|
||||
plt.xticks(rotation = 90)
|
||||
crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents")
|
||||
crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents", estimator=count_nonzero)
|
||||
plt.show()
|
||||
|
||||
@@ -25,7 +25,6 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
||||
|
||||
# Acquisition of Merged dataset
|
||||
print(games_merged_dat.count())
|
||||
games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False)
|
||||
|
||||
# Loading Crime Datasets
|
||||
crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
|
||||
@@ -50,23 +49,26 @@ NA_col_list = [
|
||||
"JP_Sales",
|
||||
"Other_Sales",
|
||||
"Global_Sales",
|
||||
"User_Score",
|
||||
"GameName",
|
||||
"Review",
|
||||
"Console",
|
||||
"Score",
|
||||
]
|
||||
GLO_col_list = [
|
||||
"PAL_Sales",
|
||||
"JP_Sales",
|
||||
"Other_Sales",
|
||||
"NA_Sales",
|
||||
"User_Score",
|
||||
"GameName",
|
||||
"Review",
|
||||
"Console",
|
||||
"Score",
|
||||
]
|
||||
|
||||
# Splitting crime datasets
|
||||
# Collecting Split-Up Datasets
|
||||
games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
|
||||
games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False)
|
||||
|
||||
sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval)
|
||||
|
||||
@@ -89,7 +91,7 @@ games_sales_split_pre.info()
|
||||
games_sales_split_dur.info()
|
||||
games_sales_split_pos.info()
|
||||
|
||||
print(games_sales_split_dur.describe())
|
||||
print("Yer forsaken Statistical Description:\n", games_sales_split_dur.describe())
|
||||
|
||||
print(
|
||||
games_sales_split_pre.head(5),
|
||||
@@ -105,10 +107,9 @@ print(
|
||||
|
||||
# Load merged gammas
|
||||
|
||||
gammas = pd.read_excel("datasets/videogames/merged_games.xlsx")
|
||||
gammas = pd.read_csv("datasets/videogames/games_merged.csv")
|
||||
labels = ["smol", "epik", "larg"]
|
||||
gammas["User_Score"] = digger.slam_dunk(gammas, "User_Score", 3, labels=labels)
|
||||
gammas = gammas[gammas["Genre"].isna() == False]
|
||||
gammas = scout.cure_depression(gammas)
|
||||
gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels)
|
||||
# gammas = gammas[gammas["Genre"].isna() == False]
|
||||
# gammas = scout.cure_depression(gammas)
|
||||
gammas.to_csv("output.csv", index=False)
|
||||
scout.regression_expression(gammas, "Global_Sales", 0)
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
# Regression/Prediction (Totally gonna do later trust bro)
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn import preprocessing
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def cure_depression(dataset):
|
||||
@@ -16,6 +17,7 @@ def cure_depression(dataset):
|
||||
return dataset
|
||||
|
||||
|
||||
# Fuck you and whateevr you fucking stand for you dumb whore faggot
|
||||
def regression_expression(dataset, column, missing_value):
|
||||
lr = LinearRegression()
|
||||
numeric = dataset.select_dtypes(include=np.number)
|
||||
@@ -32,9 +34,21 @@ def regression_expression(dataset, column, missing_value):
|
||||
y = traindf[column]
|
||||
traindf.drop(column, axis=1, inplace=True)
|
||||
lr.fit(traindf, y)
|
||||
testdf.drop(column, axis=1, inplace=True)
|
||||
pred = lr.predict(testdf)
|
||||
# can't put this in data set directly because length no match
|
||||
# join testdf and traindf to form dataset perhaps??
|
||||
testdf[column] = pred
|
||||
print(testdf.head(30))
|
||||
|
||||
# https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing
|
||||
# That helps ^
|
||||
# This boi should work, idk i'm implementing blindly
|
||||
def scaling_zscore(datashitter, col):
|
||||
scaler = preprocessing.StandardScaler().fit(datashitter[col])
|
||||
return scaler.transform(datashitter[col])
|
||||
|
||||
def scaling_range(datashitter, col):
|
||||
nonnull = datashitter[col].isna()
|
||||
minmax_scaler = preprocessing.MinMaxScaler()
|
||||
trainer = minmax_scaler.fit_transform(datashitter[nonnull])
|
||||
return minmax_scaler.transform(datashitter[col])
|
||||
|
||||
Reference in New Issue
Block a user