diff --git a/dwarves/mining_hq.py b/dwarves/mining_hq.py index 2f0601a..5acd9a5 100644 --- a/dwarves/mining_hq.py +++ b/dwarves/mining_hq.py @@ -127,26 +127,3 @@ print(sample_rows.head()) scout.dissimilarity(sample_rows) scout.similarity(sample_rows) -# split the data set -gammas_train, gammas_test = train_test_split(gammas, test_size=0.20, random_state=69) -gammas_train.to_csv("datasets/videogames/games_train.csv", index=False) -gammas_test.to_csv("datasets/videogames/games_test.csv", index=False) - -# kmeans pls -gammas_train_kmeans = KMeans(n_clusters=10, random_state=420, n_init="auto").fit( - gammas_train[["Critic_Score", "User_Score", "Total_Shipped"]] -) -gammas_labels = gammas_train_kmeans.labels_ - -silh_score = metrics.silhouette_score( - gammas_train[["Critic_Score", "User_Score", "Total_Shipped"]], - gammas_labels, - metric="euclidean", -) -print(silh_score) -gammas_train["Kmean Labels"] = gammas_labels -print(gammas_train.head()) - -# Naive based -gnb = GaussianNB() -prediction = gnb.fit(gammas_train)