Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
Re-organized custering code arguments.
Added R-code for multiple testing.
  • Loading branch information
Malnammi committed Jan 28, 2024
1 parent 9eac413 commit ce2e746
Show file tree
Hide file tree
Showing 6 changed files with 22,466 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,4 @@ analysis_notebooks_local/*
!datasets/sample_data
!datasets/sample_data/training_data/*.csv.gz
!datasets/sample_data/unlabeled_data/*.csv.gz
!analysis_notebooks/R-code/*
9 changes: 5 additions & 4 deletions active_learning_dd/utils/generate_bt_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,9 @@ def cluster_features(n_instances, n_features, dist_func, output_dir, tmp_dir,
nn_total_vector.flush()
cluster_leader_idx_vector.flush()

neighbor_matrix.flush()
nn_total_vector.flush()
cluster_leader_idx_vector.flush()
del cluster_assigment_vector
del cluster_leader_idx_vector
del neighbor_matrix
Expand Down Expand Up @@ -427,11 +430,9 @@ def cluster_features(n_instances, n_features, dist_func, output_dir, tmp_dir,
parser = argparse.ArgumentParser()
parser.add_argument('--csv_file_or_dir', action="store", dest="csv_file_or_dir", required=True)
parser.add_argument('--output_dir', action="store", dest="output_dir", required=True)
parser.add_argument('--feature_name', default='Morgan FP_2_1024', action="store",
dest="feature_name", required=False)
parser.add_argument('--feature_name', default='Morgan FP_2_1024', action="store", dest="feature_name", required=False)
parser.add_argument('--cutoff', type=float, default=0.2, action="store", dest="cutoff", required=False)
parser.add_argument('--dist_function', default='tanimoto_dissimilarity', action="store",
dest="dist_function", required=False)
parser.add_argument('--dist_function', default='tanimoto_dissimilarity', action="store", dest="dist_function", required=False)
parser.add_argument('--process_count', type=int, default=1, action="store", dest="process_count", required=False)
parser.add_argument('--process_batch_size', type=int, default=2**16, action="store", dest="process_batch_size", required=False)
parser.add_argument('--dissimilarity_memmap_filename', default=None, action="store", dest="dissimilarity_memmap_filename", required=False)
Expand Down
Loading

0 comments on commit ce2e746

Please sign in to comment.