From d53c926d5df1b1a585555d40a820e49b13d84f1c Mon Sep 17 00:00:00 2001 From: Supermjork Date: Fri, 31 Mar 2023 16:42:36 +0200 Subject: [PATCH] AIGHT DONE FR THIS TIME --- dwarves/mining_hq.py | 1 + dwarves/scout.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/dwarves/mining_hq.py b/dwarves/mining_hq.py index 8560250..59e3483 100644 --- a/dwarves/mining_hq.py +++ b/dwarves/mining_hq.py @@ -116,3 +116,4 @@ sample_rows = gammas.iloc[chosen_idx] print(sample_rows.head()) scout.dissimilarity(sample_rows) +scout.similarity(sample_rows) diff --git a/dwarves/scout.py b/dwarves/scout.py index 0d396e9..d84eba7 100644 --- a/dwarves/scout.py +++ b/dwarves/scout.py @@ -52,7 +52,7 @@ def dissimilarity(row_arr): row_arr = row_arr.select_dtypes(include = np.number) row_arr = row_arr.drop('Rank', axis = 1) - print(" | Entry 1 | Entry 2 | Entry 3 | Entry 4 | Entry 5 |") + print(" Dissim | Entry 1 | Entry 2 | Entry 3 | Entry 4 | Entry 5 |") for i in range(len(row_arr)): print("Entry " , i + 1, " | ", end = "") for j in range(len(row_arr)): @@ -60,6 +60,18 @@ def dissimilarity(row_arr): print(" {:#.6g} |".format(eucDist), end = "") print("\n") +def similarity(row_arr): + row_arr = row_arr.select_dtypes(include = np.number) + row_arr = row_arr.drop('Rank', axis = 1) + + print("Similarity| Entry 1 | Entry 2 | Entry 3 | Entry 4 | Entry 5 |") + for i in range(len(row_arr)): + print("Entry ", i + 1, " | ", end = "") + for j in range(len(row_arr)): + sim = 1 - distance.cosine(row_arr.iloc[i], row_arr.iloc[j]) + print(" {:#.6g} |".format(sim), end = "") + print("\n") + def scaling_range(datashitter, col): nonnull = datashitter[col].isna() minmax_scaler = preprocessing.MinMaxScaler()