I have depression
This commit is contained in:
@@ -21,13 +21,15 @@ def write_joined_df(left, right, lsuf="new_key"):
|
|||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
|
||||||
def slam_dunk(dataset, column, size, labels):
|
def slam_dunk(dataset, column, labels):
|
||||||
min_value = dataset[column].min()
|
min_value = dataset[column].min()
|
||||||
max_value = dataset[column].max()
|
max_value = dataset[column].max()
|
||||||
bins = np.linspace(min_value, max_value, size)
|
print("min: ", min_value, " max: ", max_value)
|
||||||
|
bins = np.linspace(min_value, max_value, len(labels) + 1)
|
||||||
|
bins
|
||||||
|
|
||||||
dunked_column = "bin_" + column
|
dunked_column = "bin_" + column
|
||||||
dataset[dunked_column] = pd.cut(
|
dataset[dunked_column] = pd.cut(
|
||||||
dataset[column], bins=bins, labels=labels, include_lowest=True
|
dataset[column], bins=bins, labels=labels, include_lowest=True
|
||||||
)
|
)
|
||||||
return dataset[dunked_column]
|
return dataset
|
||||||
|
|||||||
@@ -22,13 +22,13 @@ games_fig2_pre = sns.histplot(data = games_pre, x = "Year", hue = "Genre", multi
|
|||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
plt.xticks(rotation = 90)
|
plt.xticks(rotation = 90)
|
||||||
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales")
|
games_fig_dur = sns.barplot(data = games_dur, x = "Year", y = "NA_Sales", estimator=count_nonzero)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
plt.xticks(rotation = 90)
|
plt.xticks(rotation = 90)
|
||||||
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales")
|
games_fig_pos = sns.barplot(data = games_pos, x = "Year", y = "NA_Sales", estimator=count_nonzero)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
plt.xticks(rotation = 90)
|
plt.xticks(rotation = 90)
|
||||||
crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents")
|
crime_CA_fig = sns.barplot(data = crime_CA, x = "year", y = "incidents", estimator=count_nonzero)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
|||||||
|
|
||||||
# Acquisition of Merged dataset
|
# Acquisition of Merged dataset
|
||||||
print(games_merged_dat.count())
|
print(games_merged_dat.count())
|
||||||
games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False)
|
|
||||||
|
|
||||||
# Loading Crime Datasets
|
# Loading Crime Datasets
|
||||||
crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
|
crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
|
||||||
@@ -50,23 +49,26 @@ NA_col_list = [
|
|||||||
"JP_Sales",
|
"JP_Sales",
|
||||||
"Other_Sales",
|
"Other_Sales",
|
||||||
"Global_Sales",
|
"Global_Sales",
|
||||||
"User_Score",
|
|
||||||
"GameName",
|
"GameName",
|
||||||
"Review",
|
"Review",
|
||||||
|
"Console",
|
||||||
|
"Score",
|
||||||
]
|
]
|
||||||
GLO_col_list = [
|
GLO_col_list = [
|
||||||
"PAL_Sales",
|
"PAL_Sales",
|
||||||
"JP_Sales",
|
"JP_Sales",
|
||||||
"Other_Sales",
|
"Other_Sales",
|
||||||
"NA_Sales",
|
"NA_Sales",
|
||||||
"User_Score",
|
|
||||||
"GameName",
|
"GameName",
|
||||||
"Review",
|
"Review",
|
||||||
|
"Console",
|
||||||
|
"Score",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Splitting crime datasets
|
# Splitting crime datasets
|
||||||
# Collecting Split-Up Datasets
|
# Collecting Split-Up Datasets
|
||||||
games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
|
games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
|
||||||
|
games_merged_dat.to_csv("datasets/videogames/games_merged.csv", index=False)
|
||||||
|
|
||||||
sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval)
|
sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval)
|
||||||
|
|
||||||
@@ -89,7 +91,7 @@ games_sales_split_pre.info()
|
|||||||
games_sales_split_dur.info()
|
games_sales_split_dur.info()
|
||||||
games_sales_split_pos.info()
|
games_sales_split_pos.info()
|
||||||
|
|
||||||
print(games_sales_split_dur.describe())
|
print("Yer forsaken Statistical Description:\n", games_sales_split_dur.describe())
|
||||||
|
|
||||||
print(
|
print(
|
||||||
games_sales_split_pre.head(5),
|
games_sales_split_pre.head(5),
|
||||||
@@ -105,10 +107,9 @@ print(
|
|||||||
|
|
||||||
# Load merged gammas
|
# Load merged gammas
|
||||||
|
|
||||||
gammas = pd.read_excel("datasets/videogames/merged_games.xlsx")
|
gammas = pd.read_csv("datasets/videogames/games_merged.csv")
|
||||||
labels = ["smol", "epik", "larg"]
|
labels = ["smol", "epik", "larg"]
|
||||||
gammas["User_Score"] = digger.slam_dunk(gammas, "User_Score", 3, labels=labels)
|
gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels)
|
||||||
gammas = gammas[gammas["Genre"].isna() == False]
|
# gammas = gammas[gammas["Genre"].isna() == False]
|
||||||
gammas = scout.cure_depression(gammas)
|
# gammas = scout.cure_depression(gammas)
|
||||||
gammas.to_csv("output.csv", index=False)
|
gammas.to_csv("output.csv", index=False)
|
||||||
scout.regression_expression(gammas, "Global_Sales", 0)
|
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
# Regression/Prediction (Totally gonna do later trust bro)
|
# Regression/Prediction (Totally gonna do later trust bro)
|
||||||
from sklearn.linear_model import LinearRegression
|
from sklearn.linear_model import LinearRegression
|
||||||
|
|
||||||
from sklearn.impute import SimpleImputer
|
from sklearn.impute import SimpleImputer
|
||||||
|
from sklearn import preprocessing
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
def cure_depression(dataset):
|
def cure_depression(dataset):
|
||||||
@@ -16,6 +17,7 @@ def cure_depression(dataset):
|
|||||||
return dataset
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
|
# Fuck you and whateevr you fucking stand for you dumb whore faggot
|
||||||
def regression_expression(dataset, column, missing_value):
|
def regression_expression(dataset, column, missing_value):
|
||||||
lr = LinearRegression()
|
lr = LinearRegression()
|
||||||
numeric = dataset.select_dtypes(include=np.number)
|
numeric = dataset.select_dtypes(include=np.number)
|
||||||
@@ -32,9 +34,21 @@ def regression_expression(dataset, column, missing_value):
|
|||||||
y = traindf[column]
|
y = traindf[column]
|
||||||
traindf.drop(column, axis=1, inplace=True)
|
traindf.drop(column, axis=1, inplace=True)
|
||||||
lr.fit(traindf, y)
|
lr.fit(traindf, y)
|
||||||
testdf.drop(column, axis=1, inplace=True)
|
|
||||||
pred = lr.predict(testdf)
|
pred = lr.predict(testdf)
|
||||||
# can't put this in data set directly because length no match
|
# can't put this in data set directly because length no match
|
||||||
# join testdf and traindf to form dataset perhaps??
|
# join testdf and traindf to form dataset perhaps??
|
||||||
testdf[column] = pred
|
testdf[column] = pred
|
||||||
print(testdf.head(30))
|
print(testdf.head(30))
|
||||||
|
|
||||||
|
# https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing
|
||||||
|
# That helps ^
|
||||||
|
# This boi should work, idk i'm implementing blindly
|
||||||
|
def scaling_zscore(datashitter, col):
|
||||||
|
scaler = preprocessing.StandardScaler().fit(datashitter[col])
|
||||||
|
return scaler.transform(datashitter[col])
|
||||||
|
|
||||||
|
def scaling_range(datashitter, col):
|
||||||
|
nonnull = datashitter[col].isna()
|
||||||
|
minmax_scaler = preprocessing.MinMaxScaler()
|
||||||
|
trainer = minmax_scaler.fit_transform(datashitter[nonnull])
|
||||||
|
return minmax_scaler.transform(datashitter[col])
|
||||||
|
|||||||
Reference in New Issue
Block a user