From 15f54e64d497c904a46c3e8e5d80d7abf04c5915 Mon Sep 17 00:00:00 2001 From: Supermjork Date: Fri, 24 Mar 2023 15:02:03 +0200 Subject: [PATCH] Some cleaning --- py_scripts/gunner.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/py_scripts/gunner.py b/py_scripts/gunner.py index 31c564f..8842298 100644 --- a/py_scripts/gunner.py +++ b/py_scripts/gunner.py @@ -1,3 +1,26 @@ # Cleaning of datasets # Somewhat main in the beninging -import pandas as pd \ No newline at end of file + +import pandas as pd +import numpy as np + +# Loading Datasets +game_sales2019_dat = pd.read_csv('datasets/videogames/vgsales-12-4-2019-short.csv') +games_dat = pd.read_csv('datasets/videogames/Games.xls') + +# Printing information regarding datasets +game_sales2019_dat.info() +games_dat.info() + +# Printing First n values (index start: 0) +print(game_sales2019_dat.head(10)) +print(games_dat.head(10)) + +# Coercing the non-numeric values will result in NaN +# thus allowing easier removal through `.notnull()` +games_dat['Score'] = pd.to_numeric(games_dat['Score'], errors = 'coerce') + +games_dat = games_dat[games_dat['Score'].notnull()] + +games_dat.info() +print(games_dat.head())