Refactoring killed muh bebe
This commit is contained in:
@@ -3,34 +3,7 @@
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import mining_hq
|
||||
|
||||
# Sharing the dataset variables
|
||||
# Games' data
|
||||
|
||||
# Loading Datasets
|
||||
games_merged = mining_hq.games_merged_dat
|
||||
|
||||
victim1 = pd.read_excel("datasets/crime/clean_crime_canada_dataset.xlsx")
|
||||
victim2 = pd.read_csv("datasets/crime/report.csv")
|
||||
|
||||
# Printing information regarding datasets
|
||||
print("Game Datasets' Info:\n")
|
||||
games_merged.info()
|
||||
|
||||
print("Crime Datasets' Info:\n")
|
||||
victim2.info()
|
||||
victim1.info()
|
||||
|
||||
|
||||
# Regarding the Games.xls dataset:
|
||||
# Coercing the non-numeric values will result in NaN
|
||||
# thus allowing easier removal through `.notnull()`
|
||||
|
||||
|
||||
# Regarding the vgsales-12-4-2019 dataset
|
||||
# Considering we will be using a US (probs CA too) crime datasets
|
||||
# It wouldn't be that useful to have other columns regarding other regions
|
||||
def drop_kick(col_list, dataframe):
|
||||
return dataframe.drop(columns=col_list, axis=1)
|
||||
|
||||
@@ -38,8 +11,8 @@ def drop_kick(col_list, dataframe):
|
||||
# Getting the range of years which both datasets share
|
||||
def year_interval(victim1, victim2, col1, col2):
|
||||
return (
|
||||
min(victim2[col2].max(), victim1[col1].max()),
|
||||
max(victim2[col2].min(), victim1[col1].min()),
|
||||
min(victim2[col2].max(), victim1[col1].max()),
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user