diff --git a/datasets/crime/clean_crime_canada_dataset.xlsx b/datasets/crime/clean_crime_canada_dataset.xlsx new file mode 100644 index 0000000..4ca75ee Binary files /dev/null and b/datasets/crime/clean_crime_canada_dataset.xlsx differ diff --git a/py_scripts/digger.py b/py_scripts/digger.py index cc5e319..049a6bb 100644 --- a/py_scripts/digger.py +++ b/py_scripts/digger.py @@ -1 +1,3 @@ -# Getting and combining data \ No newline at end of file +# Getting and combining data +import pandas as pd +import numpy as np diff --git a/py_scripts/engineer.py b/py_scripts/engineer.py index 2187190..2570c46 100644 --- a/py_scripts/engineer.py +++ b/py_scripts/engineer.py @@ -1 +1,7 @@ -# Visualisations for Data \ No newline at end of file +# Visualisations for Data +import seaborn as sns +import gunner + +games_vis = gunner.game_sales_NA + +sns.relplot(data = games_vis, x = "Year", y = "NA_Sales") diff --git a/py_scripts/gunner.py b/py_scripts/gunner.py index 8842298..ac02f63 100644 --- a/py_scripts/gunner.py +++ b/py_scripts/gunner.py @@ -4,23 +4,45 @@ import pandas as pd import numpy as np +# Sharing the dataset variables +# Games' data +global games_dat +# Sales in NA +global game_sales_NA +# Sales Globally +global game_sales_GLO + # Loading Datasets game_sales2019_dat = pd.read_csv('datasets/videogames/vgsales-12-4-2019-short.csv') games_dat = pd.read_csv('datasets/videogames/Games.xls') # Printing information regarding datasets +print("Data Sets' Info:\n") game_sales2019_dat.info() games_dat.info() # Printing First n values (index start: 0) -print(game_sales2019_dat.head(10)) -print(games_dat.head(10)) +print("Game Sale Data:\n", game_sales2019_dat.head(10)) +print("Game Scores:\n", games_dat.head(10)) +# Regarding the Games.xls dataset: # Coercing the non-numeric values will result in NaN # thus allowing easier removal through `.notnull()` games_dat['Score'] = pd.to_numeric(games_dat['Score'], errors = 'coerce') games_dat = games_dat[games_dat['Score'].notnull()] +print("Game Scores (Cleaned):\n", games_dat.head()) games_dat.info() -print(games_dat.head()) + +# Regarding the vgsales-12-4-2019 dataset +# Considering we will be using a US (probs CA too) crime datasets +# It wouldn't be that useful to have other columns regarding other regions +NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'] +GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales'] + +game_sales_NA = game_sales2019_dat.drop(columns = NA_col_list, axis = 1) +game_sales_GLO = game_sales2019_dat.drop(columns = GLO_col_list, axis = 1) + +print("Game Sales for NA:\n", game_sales_NA.head(10)) +print("Game Sales Globally:\n", game_sales_GLO.head(10)) diff --git a/py_scripts/mining_hq.py b/py_scripts/mining_hq.py index 00134b0..a57989b 100644 --- a/py_scripts/mining_hq.py +++ b/py_scripts/mining_hq.py @@ -2,4 +2,4 @@ # Collects stuff from the rest of the scripts import pandas as pd import numpy as np -import seaborn as sns \ No newline at end of file +import seaborn as sns diff --git a/py_scripts/scout.py b/py_scripts/scout.py index 7f86c2b..621de4c 100644 --- a/py_scripts/scout.py +++ b/py_scripts/scout.py @@ -1 +1 @@ -# Regression/Prediction (Totally gonna do later trust bro) \ No newline at end of file +# Regression/Prediction (Totally gonna do later trust bro)