Working whilst not at home is meh

This commit is contained in:
2023-03-26 12:12:39 +02:00
parent 15f54e64d4
commit d3686c8b0a
6 changed files with 37 additions and 7 deletions

Binary file not shown.

View File

@@ -1 +1,3 @@
# Getting and combining data # Getting and combining data
import pandas as pd
import numpy as np

View File

@@ -1 +1,7 @@
# Visualisations for Data # Visualisations for Data
import seaborn as sns
import gunner
games_vis = gunner.game_sales_NA
sns.relplot(data = games_vis, x = "Year", y = "NA_Sales")

View File

@@ -4,23 +4,45 @@
import pandas as pd import pandas as pd
import numpy as np import numpy as np
# Sharing the dataset variables
# Games' data
global games_dat
# Sales in NA
global game_sales_NA
# Sales Globally
global game_sales_GLO
# Loading Datasets # Loading Datasets
game_sales2019_dat = pd.read_csv('datasets/videogames/vgsales-12-4-2019-short.csv') game_sales2019_dat = pd.read_csv('datasets/videogames/vgsales-12-4-2019-short.csv')
games_dat = pd.read_csv('datasets/videogames/Games.xls') games_dat = pd.read_csv('datasets/videogames/Games.xls')
# Printing information regarding datasets # Printing information regarding datasets
print("Data Sets' Info:\n")
game_sales2019_dat.info() game_sales2019_dat.info()
games_dat.info() games_dat.info()
# Printing First n values (index start: 0) # Printing First n values (index start: 0)
print(game_sales2019_dat.head(10)) print("Game Sale Data:\n", game_sales2019_dat.head(10))
print(games_dat.head(10)) print("Game Scores:\n", games_dat.head(10))
# Regarding the Games.xls dataset:
# Coercing the non-numeric values will result in NaN # Coercing the non-numeric values will result in NaN
# thus allowing easier removal through `.notnull()` # thus allowing easier removal through `.notnull()`
games_dat['Score'] = pd.to_numeric(games_dat['Score'], errors = 'coerce') games_dat['Score'] = pd.to_numeric(games_dat['Score'], errors = 'coerce')
games_dat = games_dat[games_dat['Score'].notnull()] games_dat = games_dat[games_dat['Score'].notnull()]
print("Game Scores (Cleaned):\n", games_dat.head())
games_dat.info() games_dat.info()
print(games_dat.head())
# Regarding the vgsales-12-4-2019 dataset
# Considering we will be using a US (probs CA too) crime datasets
# It wouldn't be that useful to have other columns regarding other regions
NA_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']
GLO_col_list = ['PAL_Sales', 'JP_Sales', 'Other_Sales', 'NA_Sales']
game_sales_NA = game_sales2019_dat.drop(columns = NA_col_list, axis = 1)
game_sales_GLO = game_sales2019_dat.drop(columns = GLO_col_list, axis = 1)
print("Game Sales for NA:\n", game_sales_NA.head(10))
print("Game Sales Globally:\n", game_sales_GLO.head(10))

View File

@@ -2,4 +2,4 @@
# Collects stuff from the rest of the scripts # Collects stuff from the rest of the scripts
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import seaborn as sns import seaborn as sns

View File

@@ -1 +1 @@
# Regression/Prediction (Totally gonna do later trust bro) # Regression/Prediction (Totally gonna do later trust bro)