Committing binning

This commit is contained in:
2023-03-30 16:19:43 +02:00
parent 17b3e05b41
commit eb4861ecc9
3 changed files with 6 additions and 7 deletions

View File

@@ -1,6 +1,7 @@
# Getting and combining data
import pandas as pd
import numpy as np
import scout
from numpy import ndarray
from scipy.stats import binned_statistic
@@ -24,7 +25,6 @@ def write_joined_df(left, right, lsuf="new_key"):
def slam_dunk(dataset, column, labels):
dataset[column] = dataset[column].fillna(0, inplace = True)
min_value = dataset[column].min()
max_value = dataset[column].max()
print("min: ", min_value, " max: ", max_value)
@@ -36,14 +36,13 @@ def slam_dunk(dataset, column, labels):
dataset[column], bins=bins, labels=labels, include_lowest=True
)
# filling column with means
dataset[column] = dataset[column].interpolate(method = "linear", limit_direction = "backward", limit = 1)
dataset = scout.cure_depression(dataset)
x_data = np.arange(0, len(dataset))
y_data = dataset[column]
x_bins, bin_edges, misc = binned_statistic(y_data, x_data, statistic='median', bins=len(labels))
bin_intervals = pd.IntervalIndex.from_arrays(bin_edges[:-1], bin_edges[1:])
dataset['PooShi'] = dataset[column].apply(lambda x: set_to_median(x, bin_intervals))
dataset['bin_value'] = dataset[column].apply(lambda x: set_to_median(x, bin_intervals))
return dataset