FIX MERGE IDOT
This commit is contained in:
@@ -135,7 +135,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"merged2.to_csv('merged_games.csv')"
|
"merged.to_csv('merged_games.csv')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -2,33 +2,20 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
global games_merged_dat
|
||||||
|
|
||||||
# reading the data
|
# reading the data
|
||||||
# -> MAKE SURE OF THE DATA FRAMES NAMES PEFORE YOU RUN IT
|
# -> MAKE SURE OF THE DATA FRAMES NAMES PEFORE YOU RUN IT
|
||||||
|
|
||||||
|
games_dat = pd.read_csv("Games.xls")
|
||||||
|
games_sales_dat = pd.read_csv("vgsales-12-4-2019-short.csv")
|
||||||
|
|
||||||
df1 = pd.read_csv("output_6th_df.csv")
|
games_merged_dat = games_dat.merge(games_sales_dat, left_on="Name", right_on="Name", how="left")
|
||||||
df2 = pd.read_csv("vgsales-12-4-2019-short.csv")
|
print(games_merged_dat)
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# print(pf1.head)
|
|
||||||
# print(pf2.head)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# merging
|
|
||||||
|
|
||||||
combined_df = df1.merge(df2, left_on="Name", right_on="Name", how="left")
|
|
||||||
print(combined_df)
|
|
||||||
combined_df.to_csv("output_final_df.csv")
|
|
||||||
df = combined_df
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
|
|
||||||
|
games_merged_dat.to_csv("output_final_df.csv")
|
||||||
|
|
||||||
|
# Defining useful Functions to be used later
|
||||||
def slice_column(input_df, output_df, column, expression=" "):
|
def slice_column(input_df, output_df, column, expression=" "):
|
||||||
unclean = input_df[column].to_list()
|
unclean = input_df[column].to_list()
|
||||||
clean = list()
|
clean = list()
|
||||||
|
|||||||
@@ -22,37 +22,34 @@ global crime_US
|
|||||||
global crime_CA
|
global crime_CA
|
||||||
|
|
||||||
# Loading Datasets
|
# Loading Datasets
|
||||||
game_sales_dat = pd.read_csv('datasets/videogames/vgsales-12-4-2019-short.csv')
|
games_merged = pd.read_csv('datasets/videogames/merged_games.csv')
|
||||||
games_dat = pd.read_csv('datasets/videogames/Games.xls')
|
|
||||||
|
|
||||||
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
|
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
|
||||||
crime_US = pd.read_csv('datasets/crime/report.csv')
|
crime_US = pd.read_csv('datasets/crime/report.csv')
|
||||||
|
|
||||||
# Printing information regarding datasets
|
# Printing information regarding datasets
|
||||||
print("Game Datasets' Info:\n")
|
print("Game Datasets' Info:\n")
|
||||||
game_sales_dat.info()
|
games_merged.info()
|
||||||
games_dat.info()
|
|
||||||
|
|
||||||
print("Crime Datasets' Info:\n")
|
print("Crime Datasets' Info:\n")
|
||||||
crime_US.info()
|
crime_US.info()
|
||||||
crime_CA.info()
|
crime_CA.info()
|
||||||
|
|
||||||
# Printing First n values (index start: 0)
|
# Printing First n values (index start: 0)
|
||||||
print("Game Sale Data:\n", game_sales_dat.head(10))
|
print("Game Sale Data:\n", games_merged.head(5))
|
||||||
print("Game Scores:\n", games_dat.head(10))
|
|
||||||
|
|
||||||
print("US Crime Data:\n", crime_US.head(10))
|
print("US Crime Data:\n", crime_US.head(5))
|
||||||
print("CA Crime Data:\n", crime_CA.head(10))
|
print("CA Crime Data:\n", crime_CA.head(5))
|
||||||
|
|
||||||
# Regarding the Games.xls dataset:
|
# Regarding the Games.xls dataset:
|
||||||
# Coercing the non-numeric values will result in NaN
|
# Coercing the non-numeric values will result in NaN
|
||||||
# thus allowing easier removal through `.notnull()`
|
# thus allowing easier removal through `.notnull()`
|
||||||
games_dat['Score'] = pd.to_numeric(games_dat['Score'], errors = 'coerce')
|
games_merged['Score'] = pd.to_numeric(games_merged['Score'], errors = 'coerce')
|
||||||
|
|
||||||
games_dat = games_dat[games_dat['Score'].notnull()]
|
games_merged = games_merged[games_merged['Score'].notnull()]
|
||||||
|
|
||||||
print("Game Scores (Cleaned):\n", games_dat.head())
|
print("Game Scores (Cleaned):\n", games_merged.head())
|
||||||
games_dat.info()
|
games_merged.info()
|
||||||
|
|
||||||
# Regarding the vgsales-12-4-2019 dataset
|
# Regarding the vgsales-12-4-2019 dataset
|
||||||
# Considering we will be using a US (probs CA too) crime datasets
|
# Considering we will be using a US (probs CA too) crime datasets
|
||||||
@@ -60,8 +57,8 @@ games_dat.info()
|
|||||||
NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']
|
NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']
|
||||||
GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales']
|
GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales']
|
||||||
|
|
||||||
game_sales_NA = game_sales_dat.drop(columns = NA_col_list, axis = 1)
|
game_sales_NA = games_merged.drop(columns = NA_col_list, axis = 1)
|
||||||
game_sales_GLO = game_sales_dat.drop(columns = GLO_col_list, axis = 1)
|
game_sales_GLO = games_merged.drop(columns = GLO_col_list, axis = 1)
|
||||||
|
|
||||||
print(f"Game Sales for NA:\n{game_sales_NA.head(10)} \nWith minimum year being: {game_sales_NA['Year'].min()}")
|
print(f"Game Sales for NA:\n{game_sales_NA.head(10)} \nWith minimum year being: {game_sales_NA['Year'].min()}")
|
||||||
print(f"Game Sales Globally:\n{game_sales_GLO.head(10)}\nWith minimum year being: {game_sales_GLO['Year'].min()}")
|
print(f"Game Sales Globally:\n{game_sales_GLO.head(10)}\nWith minimum year being: {game_sales_GLO['Year'].min()}")
|
||||||
|
|||||||
Reference in New Issue
Block a user