we can now fill with means in the HQ(idk how we didn't get this sooner)

This commit is contained in:
LinlyBoi
2023-03-29 22:22:50 +02:00
parent 4b30cc55b1
commit 6a1189a405

View File

@@ -1,6 +1,7 @@
# Instantiating Main Python Script File # Instantiating Main Python Script File
# Collects stuff from the rest of the scripts # Collects stuff from the rest of the scripts
import pandas as pd import pandas as pd
import scout
import numpy as np import numpy as np
import seaborn as sns import seaborn as sns
import digger import digger
@@ -24,9 +25,9 @@ games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
games_merged_dat.to_csv("datasets/videogames/games_merged.csv") games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
# Loading Crime Datasets # Loading Crime Datasets
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx') crime_CA = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
crime_US = pd.read_csv('datasets/crime/report.csv') crime_US = pd.read_csv("datasets/crime/report.csv")
year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year") year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year")
@@ -57,15 +58,19 @@ GLO_col_list = [
# Collecting Split-Up Datasets # Collecting Split-Up Datasets
games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat) games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat)
sale_tri_split = gunner.trisect_by_year(games_merged_dat, 'Year', year_interval) sale_tri_split = gunner.trisect_by_year(games_merged_dat, "Year", year_interval)
# Displaying Acquired Data # Displaying Acquired Data
print("Acquired Datasets:\n") print("Acquired Datasets:\n")
print(sale_tri_split[0].head(5), print(sale_tri_split[0].head(5), sale_tri_split[1].head(5), sale_tri_split[2].head(5))
sale_tri_split[1].head(5),
sale_tri_split[2].head(5))
print("Dataset Info:\n") print("Dataset Info:\n")
sale_tri_split[0].info() sale_tri_split[0].info()
sale_tri_split[1].info() sale_tri_split[1].info()
sale_tri_split[2].info() sale_tri_split[2].info()
# Load merged gammas
gammas = pd.read_csv("datasets/videogames/games_merged.csv")
gammas["User_Score"] = scout.cure_depression(gammas, "User_Score")
print(gammas["User_Score"])