Compare commits
1 Commits
main
...
second-win
| Author | SHA1 | Date | |
|---|---|---|---|
| 1de191f3da |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -140,5 +140,3 @@ output.csv
|
|||||||
output.xlsx
|
output.xlsx
|
||||||
|
|
||||||
.gitignore
|
.gitignore
|
||||||
datasets/videogames/games_train.csv
|
|
||||||
datasets/videogames/games_test.csv
|
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -29,7 +29,6 @@ def slam_dunk(dataset, column, labels):
|
|||||||
max_value = dataset[column].max()
|
max_value = dataset[column].max()
|
||||||
print("min: ", min_value, " max: ", max_value)
|
print("min: ", min_value, " max: ", max_value)
|
||||||
bins = np.linspace(min_value, max_value, len(labels) + 1)
|
bins = np.linspace(min_value, max_value, len(labels) + 1)
|
||||||
bins
|
|
||||||
|
|
||||||
dunked_column = "bin_" + column
|
dunked_column = "bin_" + column
|
||||||
dataset[dunked_column] = pd.cut(
|
dataset[dunked_column] = pd.cut(
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import plotly.express as exp
|
||||||
import mining_hq
|
import mining_hq
|
||||||
from numpy import count_nonzero
|
from numpy import count_nonzero
|
||||||
|
|
||||||
@@ -17,7 +18,8 @@ custom_params = {"axes.spines.right": False, "axes.spines.top": False}
|
|||||||
sns.set_theme(style = 'ticks', rc = custom_params)
|
sns.set_theme(style = 'ticks', rc = custom_params)
|
||||||
|
|
||||||
plt.xticks(rotation = 90)
|
plt.xticks(rotation = 90)
|
||||||
games_fig_pre = sns.histplot(data = games_pre, x = "Year", palette = sns.color_palette("flare"), kde = True)
|
games_fig_pre = sns.histplot(data = games_pre, x = "Year", kde = True)
|
||||||
|
games_fig_pre.set_title('Game Sales Pre-2000')
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
plt.xticks(rotation = 90)
|
plt.xticks(rotation = 90)
|
||||||
@@ -60,3 +62,7 @@ plt.xticks(rotation = 90)
|
|||||||
games_crime_dur = sns.jointplot(data = games_dur, x = "Year", y = 'Violent_US')
|
games_crime_dur = sns.jointplot(data = games_dur, x = "Year", y = 'Violent_US')
|
||||||
plt.close(1)
|
plt.close(1)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
# Need to floor the years, shows trailing bars on the histogram :/
|
||||||
|
test_fig = exp.histogram(games_dur, x = "Year")
|
||||||
|
test_fig.show()
|
||||||
|
|||||||
@@ -2,16 +2,9 @@
|
|||||||
# Collects stuff from the rest of the scripts
|
# Collects stuff from the rest of the scripts
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# containment breach
|
# containment breach
|
||||||
import scipy as scp
|
import scipy as scp
|
||||||
from sklearn.model_selection import train_test_split
|
import gunner, digger, gunner, scout
|
||||||
from sklearn.cluster import KMeans
|
|
||||||
from sklearn import metrics
|
|
||||||
from sklearn.naive_bayes import GaussianNB
|
|
||||||
import gunner
|
|
||||||
import digger
|
|
||||||
import scout
|
|
||||||
|
|
||||||
# Instantiating globals to be used in other files
|
# Instantiating globals to be used in other files
|
||||||
global games_merged_dat
|
global games_merged_dat
|
||||||
@@ -20,9 +13,7 @@ global games_sales_split_dur
|
|||||||
global games_sales_split_pos
|
global games_sales_split_pos
|
||||||
|
|
||||||
games_review = pd.read_csv("datasets/videogames/Games.xls")
|
games_review = pd.read_csv("datasets/videogames/Games.xls")
|
||||||
games_sales = scout.cure_depression(
|
games_sales = scout.cure_depression(pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv"))
|
||||||
pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
|
|
||||||
)
|
|
||||||
|
|
||||||
print(games_review.count())
|
print(games_review.count())
|
||||||
print(games_sales.count())
|
print(games_sales.count())
|
||||||
@@ -32,7 +23,6 @@ games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
|
|||||||
|
|
||||||
games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
||||||
|
|
||||||
|
|
||||||
# Acquisition of Merged dataset
|
# Acquisition of Merged dataset
|
||||||
print(games_merged_dat.count())
|
print(games_merged_dat.count())
|
||||||
|
|
||||||
@@ -113,15 +103,15 @@ gammas = digger.slam_dunk(gammas, "Critic_Score", labels=labels)
|
|||||||
# Also need to transform using Z-score (normal distr go brrrr lmao), or min-max
|
# Also need to transform using Z-score (normal distr go brrrr lmao), or min-max
|
||||||
# ah, scheiße
|
# ah, scheiße
|
||||||
# nvm, done, kekW
|
# nvm, done, kekW
|
||||||
gammas["Critic_Score_Norm"] = scout.scaling_zscore(gammas, "Critic_Score")
|
gammas['Critic_Score_Norm'] = scout.scaling_zscore(gammas, 'Critic_Score')
|
||||||
print(gammas["Critic_Score_Norm"].head(10))
|
print(gammas['Critic_Score_Norm'].head(10))
|
||||||
|
|
||||||
# Saving all into a file
|
# Saving all into a file
|
||||||
gammas = gammas.dropna(how="any", axis=0) # nuke them empty poopers
|
|
||||||
gammas.to_csv("datasets/videogames/games_cleanish.csv", index=False)
|
gammas.to_csv("datasets/videogames/games_cleanish.csv", index=False)
|
||||||
|
|
||||||
# Need similarity and dissimialrity, scipy time
|
# Need similarity and dissimialrity, scipy time
|
||||||
# Selecting 5 random rows
|
# Selecting 5 random rows
|
||||||
chosen_idx = np.random.choice(len(gammas), replace=False, size=5)
|
chosen_idx = np.random.choice(len(gammas), replace = False, size = 5)
|
||||||
sample_rows = gammas.iloc[chosen_idx]
|
sample_rows = gammas.iloc[chosen_idx]
|
||||||
print(sample_rows.head())
|
print(sample_rows.head())
|
||||||
|
|
||||||
|
|||||||
BIN
dwarves/tre.png
BIN
dwarves/tre.png
Binary file not shown.
|
Before Width: | Height: | Size: 886 KiB |
Reference in New Issue
Block a user