new folder who dis
This commit is contained in:
57
dwarves/digger.py
Normal file
57
dwarves/digger.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# Getting and combining data
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from dwarves import scout
|
||||
from numpy import ndarray
|
||||
from scipy.stats import binned_statistic
|
||||
|
||||
|
||||
# Defining useful Functions to be used later
|
||||
def slice_column(input_df, column, expression=" "):
|
||||
unclean = input_df[column].to_list()
|
||||
clean = list()
|
||||
for record in unclean:
|
||||
record = record.split(expression)[0]
|
||||
clean.append(record)
|
||||
|
||||
input_df = input_df.drop(columns=[column])
|
||||
input_df[column] = clean
|
||||
return input_df
|
||||
|
||||
|
||||
def write_joined_df(left, right, lsuf="new_key"):
|
||||
merged = pd.concat([left, right], sort=False, ignore_index=True) # Good
|
||||
return merged
|
||||
|
||||
|
||||
def slam_dunk(dataset, column, labels):
|
||||
min_value = dataset[column].min()
|
||||
max_value = dataset[column].max()
|
||||
print("min: ", min_value, " max: ", max_value)
|
||||
bins = np.linspace(min_value, max_value, len(labels) + 1)
|
||||
bins
|
||||
|
||||
dunked_column = "bin_" + column
|
||||
dataset[dunked_column] = pd.cut(
|
||||
dataset[column], bins=bins, labels=labels, include_lowest=True
|
||||
)
|
||||
|
||||
dataset = scout.cure_depression(dataset)
|
||||
|
||||
x_data = np.arange(0, len(dataset))
|
||||
y_data = dataset[column]
|
||||
x_bins, bin_edges, misc = binned_statistic(
|
||||
y_data, x_data, statistic="median", bins=len(labels)
|
||||
)
|
||||
bin_intervals = pd.IntervalIndex.from_arrays(bin_edges[:-1], bin_edges[1:])
|
||||
dataset["bin_value"] = dataset[column].apply(
|
||||
lambda x: set_to_median(x, bin_intervals)
|
||||
)
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def set_to_median(x, bin_intervals):
|
||||
for interval in bin_intervals:
|
||||
if x in interval:
|
||||
return interval.mid
|
||||
Reference in New Issue
Block a user