Nuked Linly's splitting in mining_hq.py
This commit is contained in:
@@ -127,26 +127,3 @@ print(sample_rows.head())
|
|||||||
|
|
||||||
scout.dissimilarity(sample_rows)
|
scout.dissimilarity(sample_rows)
|
||||||
scout.similarity(sample_rows)
|
scout.similarity(sample_rows)
|
||||||
# split the data set
|
|
||||||
gammas_train, gammas_test = train_test_split(gammas, test_size=0.20, random_state=69)
|
|
||||||
gammas_train.to_csv("datasets/videogames/games_train.csv", index=False)
|
|
||||||
gammas_test.to_csv("datasets/videogames/games_test.csv", index=False)
|
|
||||||
|
|
||||||
# kmeans pls
|
|
||||||
gammas_train_kmeans = KMeans(n_clusters=10, random_state=420, n_init="auto").fit(
|
|
||||||
gammas_train[["Critic_Score", "User_Score", "Total_Shipped"]]
|
|
||||||
)
|
|
||||||
gammas_labels = gammas_train_kmeans.labels_
|
|
||||||
|
|
||||||
silh_score = metrics.silhouette_score(
|
|
||||||
gammas_train[["Critic_Score", "User_Score", "Total_Shipped"]],
|
|
||||||
gammas_labels,
|
|
||||||
metric="euclidean",
|
|
||||||
)
|
|
||||||
print(silh_score)
|
|
||||||
gammas_train["Kmean Labels"] = gammas_labels
|
|
||||||
print(gammas_train.head())
|
|
||||||
|
|
||||||
# Naive based
|
|
||||||
gnb = GaussianNB()
|
|
||||||
prediction = gnb.fit(gammas_train)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user