Circular bad
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -131,3 +131,4 @@ dmypy.json
|
|||||||
.vscode/
|
.vscode/
|
||||||
jupyter-notes/merged_games.csv
|
jupyter-notes/merged_games.csv
|
||||||
datasets/videogames/vgsales.csv
|
datasets/videogames/vgsales.csv
|
||||||
|
datasets/videogames/games_merged.csv
|
||||||
|
|||||||
@@ -1,15 +1,15 @@
|
|||||||
# Visualisations for Data
|
# Visualisations for Data
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import gunner
|
import mining_hq
|
||||||
from numpy import count_nonzero
|
from numpy import count_nonzero
|
||||||
|
|
||||||
sns.set()
|
sns.set()
|
||||||
plt.xticks(rotation = 90)
|
plt.xticks(rotation = 90)
|
||||||
|
|
||||||
games_pre = gunner.game_sales_NA_pre
|
games_pre = mining_hq.games_sales_split_pre
|
||||||
games_dur = gunner.game_sales_NA_dur
|
games_dur = mining_hq.games_sales_split_dur
|
||||||
games_pos = gunner.game_sales_NA_pos
|
games_pos = mining_hq.games_sales_split_pos
|
||||||
|
|
||||||
games_fig_pre = sns.barplot(data = games_pre, x = "Year", y = "NA_Sales", estimator = count_nonzero)
|
games_fig_pre = sns.barplot(data = games_pre, x = "Year", y = "NA_Sales", estimator = count_nonzero)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import mining_hq
|
||||||
|
|
||||||
# Sharing the dataset variables
|
# Sharing the dataset variables
|
||||||
# Games' data
|
# Games' data
|
||||||
@@ -22,7 +23,7 @@ global crime_US
|
|||||||
global crime_CA
|
global crime_CA
|
||||||
|
|
||||||
# Loading Datasets
|
# Loading Datasets
|
||||||
games_merged = pd.read_csv('datasets/videogames/merged_games.csv')
|
games_merged = mining_hq.games_merged_dat
|
||||||
|
|
||||||
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
|
crime_CA = pd.read_excel('datasets/crime/clean_crime_canada_dataset.xlsx')
|
||||||
crime_US = pd.read_csv('datasets/crime/report.csv')
|
crime_US = pd.read_csv('datasets/crime/report.csv')
|
||||||
@@ -54,14 +55,14 @@ games_merged.info()
|
|||||||
# Regarding the vgsales-12-4-2019 dataset
|
# Regarding the vgsales-12-4-2019 dataset
|
||||||
# Considering we will be using a US (probs CA too) crime datasets
|
# Considering we will be using a US (probs CA too) crime datasets
|
||||||
# It wouldn't be that useful to have other columns regarding other regions
|
# It wouldn't be that useful to have other columns regarding other regions
|
||||||
NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']
|
NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales', 'User_Score', 'GameName', 'Review', '']
|
||||||
GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales']
|
GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales', 'User_Score', 'GameName', 'Review', '']
|
||||||
|
|
||||||
game_sales_NA = games_merged.drop(columns = NA_col_list, axis = 1)
|
game_sales_NA = games_merged.drop(columns = NA_col_list, axis = 1)
|
||||||
game_sales_GLO = games_merged.drop(columns = GLO_col_list, axis = 1)
|
game_sales_GLO = games_merged.drop(columns = GLO_col_list, axis = 1)
|
||||||
|
|
||||||
print(f"Game Sales for NA:\n{game_sales_NA.head(10)} \nWith minimum year being: {game_sales_NA['Year'].min()}")
|
print(f"Game Sales for NA:\n{game_sales_NA.head(5)} \nWith minimum year being: {game_sales_NA['Year'].min()}")
|
||||||
print(f"Game Sales Globally:\n{game_sales_GLO.head(10)}\nWith minimum year being: {game_sales_GLO['Year'].min()}")
|
print(f"Game Sales Globally:\n{game_sales_GLO.head(5)}\nWith minimum year being: {game_sales_GLO['Year'].min()}")
|
||||||
|
|
||||||
# Getting the range of years which both datasets share
|
# Getting the range of years which both datasets share
|
||||||
crime_year_min = max(crime_US['report_year'].min(), crime_CA['year'].min())
|
crime_year_min = max(crime_US['report_year'].min(), crime_CA['year'].min())
|
||||||
|
|||||||
@@ -4,14 +4,40 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import digger
|
import digger
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
from tkinter.filedialog import askopenfilename
|
from tkinter.filedialog import askopenfilename
|
||||||
|
|
||||||
|
# Instantiating globals to be used in other files
|
||||||
|
global games_merged_dat
|
||||||
|
global games_sales_split_pre
|
||||||
|
global games_sales_split_dur
|
||||||
|
global games_sales_split_pos
|
||||||
|
|
||||||
games_review = pd.read_csv("datasets/videogames/Games.xls")
|
games_review = pd.read_csv("datasets/videogames/Games.xls")
|
||||||
games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
|
games_sales = pd.read_csv("datasets/videogames/vgsales-12-4-2019-short.csv")
|
||||||
|
|
||||||
games_review_phase1 = digger.slice_column(games_review, "GameName", "Review")
|
games_review_phase1 = digger.slice_column(games_review, "GameName", "Review")
|
||||||
games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
|
games_review_final = digger.slice_column(games_review, "GameName", "(Import)")
|
||||||
|
|
||||||
games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
games_merged_dat = digger.write_joined_df(games_sales, games_review_final)
|
||||||
|
|
||||||
|
# Acquisition of Merged dataset
|
||||||
games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
|
games_merged_dat.to_csv("datasets/videogames/games_merged.csv")
|
||||||
|
|
||||||
|
import gunner
|
||||||
|
# Collecting Split-Up Datasets
|
||||||
|
games_sales_split_pre = gunner.game_sales_NA_pre
|
||||||
|
|
||||||
|
games_sales_split_dur = gunner.game_sales_NA_dur
|
||||||
|
|
||||||
|
games_sales_split_pos = gunner.game_sales_NA_pos
|
||||||
|
|
||||||
|
# Displaying Acquired Data
|
||||||
|
print("Acquired Datasets:\n")
|
||||||
|
games_sales_split_pre.head(5)
|
||||||
|
games_sales_split_dur.head(5)
|
||||||
|
games_sales_split_pos.head(5)
|
||||||
|
|
||||||
|
print("Dataset Info:\n")
|
||||||
|
games_sales_split_pre.info()
|
||||||
|
games_sales_split_dur.info()
|
||||||
|
games_sales_split_pos.info()
|
||||||
|
|||||||
Reference in New Issue
Block a user