In de splittingng
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -140,3 +140,5 @@ output.csv
|
|||||||
output.xlsx
|
output.xlsx
|
||||||
|
|
||||||
.gitignore
|
.gitignore
|
||||||
|
datasets/videogames/games_train.csv
|
||||||
|
datasets/videogames/games_test.csv
|
||||||
|
|||||||
@@ -2,8 +2,10 @@
|
|||||||
# Collects stuff from the rest of the scripts
|
# Collects stuff from the rest of the scripts
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# containment breach
|
# containment breach
|
||||||
import scipy as scp
|
import scipy as scp
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
import gunner, digger, gunner, scout
|
import gunner, digger, gunner, scout
|
||||||
|
|
||||||
# Instantiating globals to be used in other files
|
# Instantiating globals to be used in other files
|
||||||
@@ -13,7 +15,9 @@ global games_sales_split_dur
|
|||||||
global games_sales_split_pos
|
global games_sales_split_pos
|
||||||
|
|
||||||
games_review = pd.read_csv("datasets/videogames/Games.xls")
|
games_review = pd.read_csv("datasets/videogames/Games.xls")
|
||||||
games_sales = scout.cure_depression(pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv"))
|
games_sales = scout.cure_depression(
|
||||||
|
pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
|
||||||
|
)
|
||||||
|
|
||||||
print(games_review.count())
|
print(games_review.count())
|
||||||
print(games_sales.count())
|
print(games_sales.count())
|
||||||
@@ -23,6 +27,7 @@ games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
|
|||||||
|
|
||||||
games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
||||||
|
|
||||||
|
|
||||||
# Acquisition of Merged dataset
|
# Acquisition of Merged dataset
|
||||||
print(games_merged_dat.count())
|
print(games_merged_dat.count())
|
||||||
|
|
||||||
@@ -103,11 +108,16 @@ gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels)
|
|||||||
# Also need to transform using Z-score (normal distr go brrrr lmao), or min-max
|
# Also need to transform using Z-score (normal distr go brrrr lmao), or min-max
|
||||||
# ah, scheiße
|
# ah, scheiße
|
||||||
# nvm, done, kekW
|
# nvm, done, kekW
|
||||||
gammas['Critic_Score_Norm'] = scout.scaling_zscore(gammas, 'Critic_Score')
|
gammas["Critic_Score_Norm"] = scout.scaling_zscore(gammas, "Critic_Score")
|
||||||
print(gammas['Critic_Score_Norm'].head(10))
|
print(gammas["Critic_Score_Norm"].head(10))
|
||||||
|
|
||||||
# Saving all into a file
|
# Saving all into a file
|
||||||
|
gammas = gammas.dropna(how="any", axis=0) # nuke them empty poopers
|
||||||
gammas.to_csv("datasets/videogames/games_cleanish.csv", index=False)
|
gammas.to_csv("datasets/videogames/games_cleanish.csv", index=False)
|
||||||
|
# split the data set
|
||||||
|
gammas_train, gammas_test = train_test_split(gammas, test_size=0.20, random_state=69)
|
||||||
|
gammas_train.to_csv("datasets/videogames/games_train.csv", index=False)
|
||||||
|
gammas_test.to_csv("datasets/videogames/games_test.csv", index=False)
|
||||||
|
|
||||||
# Need similarity and dissimialrity, scipy time
|
# Need similarity and dissimialrity, scipy time
|
||||||
# Selecting 5 random rows
|
# Selecting 5 random rows
|
||||||
|
|||||||
Reference in New Issue
Block a user