diff --git a/py_scripts/mining_hq.py b/py_scripts/mining_hq.py index 0c628de..c0d74fa 100644 --- a/py_scripts/mining_hq.py +++ b/py_scripts/mining_hq.py @@ -15,12 +15,16 @@ global games_sales_split_pos games_review = pd.read_csv("datasets/videogames/Games.xls") games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv") +print(games_review.isnull()) +print(games_sales.isnull()) + games_review_phase1 = digger.slice_column(games_review, "GameName", "Review") games_review_final = digger.slice_column(games_review, "GameName", "(Import)") games_merged_dat = digger.write_joined_df(games_sales, games_review_final) # Acquisition of Merged dataset +print(games_merged_dat.isnull()) games_merged_dat.to_csv("datasets/videogames/games_merged.csv") # Loading Crime Datasets @@ -28,12 +32,19 @@ crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx') crime_US = pd.read_csv('datasets/crime/report.csv') +print(crime_US.isnull()) +print(crime_CA.isnull()) + year_interval = gunner.year_interval(crime_US, crime_CA, "report_year", "year") -print(year_interval[0], year_interval[1]) +year_max = year_interval[0] +year_min = year_interval[1] crime_intersect = gunner.intersect_by_year(crime_US, crime_CA, "report_year", "year") +crime_US_intersect = crime_intersect[0] +crime_CA_intersect = crime_intersect[1] + NA_col_list = [ "PAL_Sales", "JP_Sales", @@ -59,13 +70,19 @@ games_merged_dat = gunner.drop_kick(NA_col_list, games_merged_dat) sale_tri_split = gunner.trisect_by_year(games_merged_dat, 'Year', year_interval) +game_sales_split_pre = sale_tri_split[0] +game_sales_split_dur = sale_tri_split[1] +game_sales_split_pos = sale_tri_split[2] + # Displaying Acquired Data print("Acquired Datasets:\n") -print(sale_tri_split[0].head(5), -sale_tri_split[1].head(5), -sale_tri_split[2].head(5)) +print(game_sales_split_pre.head(5), +game_sales_split_dur.head(5), +game_sales_split_pos.head(5)) print("Dataset Info:\n") -sale_tri_split[0].info() -sale_tri_split[1].info() -sale_tri_split[2].info() +game_sales_split_pre.info() +game_sales_split_dur.info() +game_sales_split_pos.info() + +print(game_sales_split_dur.describe())