32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
# Cleaning of datasets
|
|
# Somewhat main in the beninging
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
def drop_kick(col_list, dataframe):
|
|
return dataframe.drop(columns=col_list, axis=1)
|
|
|
|
|
|
# Getting the range of years which both datasets share
|
|
def year_interval(victim1, victim2, col1, col2):
|
|
return (
|
|
max(victim2[col2].min(), victim1[col1].min()),
|
|
min(victim2[col2].max(), victim1[col1].max()),
|
|
)
|
|
|
|
|
|
def intersect_by_year(victim1, victim2, col1, col2):
|
|
interval = year_interval(victim1, victim2, col1, col2)
|
|
victim1 = victim1[(victim1[col1] >= interval[0]) & (victim1[col1] <= interval[1])]
|
|
victim2 = victim2[(victim2[col2] >= interval[0]) & (victim2[col2] <= interval[1])]
|
|
return (victim1, victim2)
|
|
|
|
|
|
# Updating the NA game dataset to fit with the time ranges
|
|
def trisect_by_year(victim1, col, interval):
|
|
victim1_pre = victim1[victim1[col] < interval[0]]
|
|
victim1_dur = victim1[(victim1[col] <= interval[1]) & (victim1[col] >= interval[0])]
|
|
victim1_pos = victim1[victim1[col] > interval[1]]
|
|
return (victim1_pre, victim1_dur, victim1_pos)
|