Skip to content

Commit

Permalink
Do not quantize if bins > unique.
Browse files Browse the repository at this point in the history
  • Loading branch information
richardwu committed Jun 22, 2019
1 parent ba1cc4b commit 2303c31
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions dataset/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ def quantize_km(env, df_raw, num_attr_groups_bins):
# Matrix of possibly n-dimension values
X_attrs = df_group.values.astype(np.float)

n_clusters = min(bins, np.unique(X_attrs, axis=0).shape[0])
if bins >= np.unique(X_attrs, axis=0).shape[0]:
# No need to quantize since more bins than unique values.
continue

km = KMeans(n_clusters=n_clusters)
km = KMeans(n_clusters=bins)
km.fit(X_attrs)

label_pred = km.labels_
Expand Down

0 comments on commit 2303c31

Please sign in to comment.