In de splittingng

This commit is contained in:
LinlyBoi
2023-05-15 15:07:24 +03:00
parent 9cf11abef0
commit 4dffa3dc88
2 changed files with 17 additions and 5 deletions

2
.gitignore vendored
View File

@@ -140,3 +140,5 @@ output.csv
output.xlsx output.xlsx
.gitignore .gitignore
datasets/videogames/games_train.csv
datasets/videogames/games_test.csv

View File

@@ -2,8 +2,10 @@
# Collects stuff from the rest of the scripts # Collects stuff from the rest of the scripts
import pandas as pd import pandas as pd
import numpy as np import numpy as np
# containment breach # containment breach
import scipy as scp import scipy as scp
from sklearn.model_selection import train_test_split
import gunner, digger, gunner, scout import gunner, digger, gunner, scout
# Instantiating globals to be used in other files # Instantiating globals to be used in other files
@@ -13,7 +15,9 @@ global games_sales_split_dur
global games_sales_split_pos global games_sales_split_pos
games_review = pd.read_csv("datasets/videogames/Games.xls") games_review = pd.read_csv("datasets/videogames/Games.xls")
games_sales = scout.cure_depression(pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")) games_sales = scout.cure_depression(
pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
)
print(games_review.count()) print(games_review.count())
print(games_sales.count()) print(games_sales.count())
@@ -23,6 +27,7 @@ games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
games_merged_dat = digger.write_joined_df(games_sales, games_review_final) games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
# Acquisition of Merged dataset # Acquisition of Merged dataset
print(games_merged_dat.count()) print(games_merged_dat.count())
@@ -103,11 +108,16 @@ gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels)
# Also need to transform using Z-score (normal distr go brrrr lmao), or min-max # Also need to transform using Z-score (normal distr go brrrr lmao), or min-max
# ah, scheiße # ah, scheiße
# nvm, done, kekW # nvm, done, kekW
gammas['Critic_Score_Norm'] = scout.scaling_zscore(gammas, 'Critic_Score') gammas["Critic_Score_Norm"] = scout.scaling_zscore(gammas, "Critic_Score")
print(gammas['Critic_Score_Norm'].head(10)) print(gammas["Critic_Score_Norm"].head(10))
# Saving all into a file # Saving all into a file
gammas = gammas.dropna(how="any", axis=0) # nuke them empty poopers
gammas.to_csv("datasets/videogames/games_cleanish.csv", index=False) gammas.to_csv("datasets/videogames/games_cleanish.csv", index=False)
# split the data set
gammas_train, gammas_test = train_test_split(gammas, test_size=0.20, random_state=69)
gammas_train.to_csv("datasets/videogames/games_train.csv", index=False)
gammas_test.to_csv("datasets/videogames/games_test.csv", index=False)
# Need similarity and dissimialrity, scipy time # Need similarity and dissimialrity, scipy time
# Selecting 5 random rows # Selecting 5 random rows