diff --git a/callingcardstools/PeakCalling/yeast/call_peaks.py b/callingcardstools/PeakCalling/yeast/call_peaks.py index 9e2de33..8514634 100644 --- a/callingcardstools/PeakCalling/yeast/call_peaks.py +++ b/callingcardstools/PeakCalling/yeast/call_peaks.py @@ -106,8 +106,8 @@ def call_peaks( background_data_path: str, background_orig_chr_convention: str, chrmap_data_path: str, - deduplicate_experiment: bool = True, unified_chr_convention: str = "ucsc", + deduplicate_experiment: bool = True, ) -> pd.DataFrame: """ Call peaks for the given Calling Cards data. @@ -190,7 +190,9 @@ def call_peaks( ).set_index("name", drop=True) promoter_hops_df = ( - promoter_df.set_index("name", drop=True) + promoter_df + .drop("score", axis=1) + .set_index("name") .join( [experiment_hops_df, background_hops_df], how="left", @@ -209,6 +211,7 @@ def call_peaks( "experiment_total_hops": "int64", } ) + .reset_index() ) start_time = time.time() @@ -333,6 +336,13 @@ def parse_args( "something else, eg 'mitochondrial' or 'plasmid'.", required=True, ) + parser.add_argument( + "--unified_chr_convention", + type=str, + help="the chromosome naming convention to use in the output " "DataFrame.", + required=False, + default="ucsc", + ) parser.add_argument( "--deduplicate_experiment", action="store_true", @@ -341,13 +351,6 @@ def parse_args( "coordinate on different strands, only one of those records will be " "retained.", ) - parser.add_argument( - "--unified_chr_convention", - type=str, - help="the chromosome naming convention to use in the output " "DataFrame.", - required=False, - default="ucsc", - ) parser.add_argument( "--output_path", default="sig_results.csv", @@ -395,8 +398,8 @@ def main(args: argparse.Namespace) -> None: args.background_data_path, args.background_orig_chr_convention, args.chrmap_data_path, - args.deduplicate_experiment, args.unified_chr_convention, + args.deduplicate_experiment ) result_df.to_csv( diff --git a/callingcardstools/PeakCalling/yeast/enrichment.py b/callingcardstools/PeakCalling/yeast/enrichment.py index a81dd28..a608845 100644 --- a/callingcardstools/PeakCalling/yeast/enrichment.py +++ b/callingcardstools/PeakCalling/yeast/enrichment.py @@ -1,4 +1,5 @@ import logging +import warnings logger = logging.getLogger(__name__) @@ -25,6 +26,8 @@ def enrichment(total_background_hops: int, :return: The Calling Cards effect (enrichment) value. :rtype: float """ + warnings.warn("This function is deprecated and will be removed in a future release. " + "use the vectorized function instead") numerator = (experiment_hops / (total_experiment_hops + pseudocount)) denominator = (background_hops / (total_background_hops + pseudocount)) diff --git a/callingcardstools/PeakCalling/yeast/hypergeom_pval.py b/callingcardstools/PeakCalling/yeast/hypergeom_pval.py index bb04477..94df4e6 100644 --- a/callingcardstools/PeakCalling/yeast/hypergeom_pval.py +++ b/callingcardstools/PeakCalling/yeast/hypergeom_pval.py @@ -1,8 +1,11 @@ import logging +import warnings + from scipy.stats import hypergeom logger = logging.getLogger(__name__) + def hypergeom_pval(total_background_hops: int, total_experiment_hops: int, background_hops: int, @@ -21,6 +24,8 @@ def hypergeom_pval(total_background_hops: int, :return: The hypergeometric p-value. :rtype: float """ + warnings.warn("This function is deprecated and will be removed in a future release. " + "use the vectorized function instead") # check input if total_background_hops < 0 or not isinstance(total_background_hops, int): raise ValueError(('total_background_hops must ' diff --git a/callingcardstools/PeakCalling/yeast/poisson_pval.py b/callingcardstools/PeakCalling/yeast/poisson_pval.py index a5e8227..4b196c2 100644 --- a/callingcardstools/PeakCalling/yeast/poisson_pval.py +++ b/callingcardstools/PeakCalling/yeast/poisson_pval.py @@ -1,4 +1,5 @@ import logging +import warnings from scipy.stats import poisson @@ -27,6 +28,8 @@ def poisson_pval(total_background_hops: int, :return: The Poisson p-value. :rtype: float """ + warnings.warn("This function is deprecated and will be removed in a future release. " + "use the vectorized function instead") # check input if total_background_hops < 0 or not isinstance(total_background_hops, int): raise ValueError(('total_background_hops must ' diff --git a/docs/API/Analysis/yeast/chipexo_promoter_sig/chipexo_promoter_sig.md b/docs/API/Analysis/yeast/chipexo_promoter_sig/chipexo_promoter_sig.md new file mode 100644 index 0000000..28bef91 --- /dev/null +++ b/docs/API/Analysis/yeast/chipexo_promoter_sig/chipexo_promoter_sig.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.chipexo_promoter_sig.chipexo_promoter_sig + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/chipexo_promoter_sig/main.md b/docs/API/Analysis/yeast/chipexo_promoter_sig/main.md new file mode 100644 index 0000000..ac96a8c --- /dev/null +++ b/docs/API/Analysis/yeast/chipexo_promoter_sig/main.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.chipexo_promoter_sig.main + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/chipexo_promoter_sig/parse_args.md b/docs/API/Analysis/yeast/chipexo_promoter_sig/parse_args.md new file mode 100644 index 0000000..7e22a47 --- /dev/null +++ b/docs/API/Analysis/yeast/chipexo_promoter_sig/parse_args.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.chipexo_promoter_sig.parse_args + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/chipexo_promoter_sig/read_in_chipexo_data.md b/docs/API/Analysis/yeast/chipexo_promoter_sig/read_in_chipexo_data.md new file mode 100644 index 0000000..d1a8dbd --- /dev/null +++ b/docs/API/Analysis/yeast/chipexo_promoter_sig/read_in_chipexo_data.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.chipexo_promoter_sig.read_in_chipexo_data + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/bin_by_binding_rank.md b/docs/API/Analysis/yeast/rank_response/bin_by_binding_rank.md new file mode 100644 index 0000000..2921861 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/bin_by_binding_rank.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.bin_by_binding_rank.bin_by_binding_rank + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/calculate_random_expectation.md b/docs/API/Analysis/yeast/rank_response/calculate_random_expectation.md new file mode 100644 index 0000000..fd2bcf0 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/calculate_random_expectation.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.calculate_random_expectation.calculate_random_expectation + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/compute_rank_response.md b/docs/API/Analysis/yeast/rank_response/compute_rank_response.md new file mode 100644 index 0000000..eed8072 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/compute_rank_response.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.compute_rank_response.compute_rank_response + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/create_partitions.md b/docs/API/Analysis/yeast/rank_response/create_partitions.md new file mode 100644 index 0000000..f6d720a --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/create_partitions.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.create_partitions.create_partitions + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/create_rank_response_table.md b/docs/API/Analysis/yeast/rank_response/create_rank_response_table.md new file mode 100644 index 0000000..ee5dc94 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/create_rank_response_table.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.create_rank_response_table.create_rank_response_table + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/find_min_responsive/find_min_responsive.md b/docs/API/Analysis/yeast/rank_response/find_min_responsive/find_min_responsive.md new file mode 100644 index 0000000..c68a5f4 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/find_min_responsive/find_min_responsive.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.find_min_responsive.find_min_responsive + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/find_min_responsive/main.md b/docs/API/Analysis/yeast/rank_response/find_min_responsive/main.md new file mode 100644 index 0000000..7dfec26 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/find_min_responsive/main.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.find_min_responsive_main.main + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/find_min_responsive/parse_args.md b/docs/API/Analysis/yeast/rank_response/find_min_responsive/parse_args.md new file mode 100644 index 0000000..aff9f8d --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/find_min_responsive/parse_args.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.find_min_responsive_main.parse_args + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/label_responsive_genes.md b/docs/API/Analysis/yeast/rank_response/label_responsive_genes.md new file mode 100644 index 0000000..d4ff3c0 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/label_responsive_genes.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.label_responsive_genes.label_responsive_genes + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/parse_binomtest_results.md b/docs/API/Analysis/yeast/rank_response/parse_binomtest_results.md new file mode 100644 index 0000000..0a5052b --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/parse_binomtest_results.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.parse_binomtest_results.parse_binomtest_results + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/rank_response_main/main.md b/docs/API/Analysis/yeast/rank_response/rank_response_main/main.md new file mode 100644 index 0000000..90971d5 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/rank_response_main/main.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.rank_response_main.main + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/rank_response_main/parse_args.md b/docs/API/Analysis/yeast/rank_response/rank_response_main/parse_args.md new file mode 100644 index 0000000..8e7c44e --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/rank_response_main/parse_args.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.rank_response_main.parse_args + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/rank_response_ratio_summarize.md b/docs/API/Analysis/yeast/rank_response/rank_response_ratio_summarize.md new file mode 100644 index 0000000..1dd3839 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/rank_response_ratio_summarize.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.rank_response_ratio_summarize.rank_response_ratio_summarize + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/read_in_data.md b/docs/API/Analysis/yeast/rank_response/read_in_data.md new file mode 100644 index 0000000..3c3cb50 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/read_in_data.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.read_in_data.read_in_data + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/set_none_str_to_none.md b/docs/API/Analysis/yeast/rank_response/set_none_str_to_none.md new file mode 100644 index 0000000..ee574d8 --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/set_none_str_to_none.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.set_none_str_to_none.set_none_str_to_none + handler: python \ No newline at end of file diff --git a/docs/API/Analysis/yeast/rank_response/validate_config.md b/docs/API/Analysis/yeast/rank_response/validate_config.md new file mode 100644 index 0000000..5159f4d --- /dev/null +++ b/docs/API/Analysis/yeast/rank_response/validate_config.md @@ -0,0 +1,2 @@ +::: callingcardstools.Analysis.yeast.rank_response.validate_config.validate_config + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/call_peaks/add_metrics.md b/docs/API/PeakCalling/yeast/call_peaks/add_metrics.md new file mode 100644 index 0000000..ee81ff4 --- /dev/null +++ b/docs/API/PeakCalling/yeast/call_peaks/add_metrics.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.call_peaks.add_metrics + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/call_peaks/call_peaks.md b/docs/API/PeakCalling/yeast/call_peaks/call_peaks.md new file mode 100644 index 0000000..6bf8556 --- /dev/null +++ b/docs/API/PeakCalling/yeast/call_peaks/call_peaks.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.call_peaks.call_peaks + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/call_peaks/count_hops.md b/docs/API/PeakCalling/yeast/call_peaks/count_hops.md new file mode 100644 index 0000000..76061c3 --- /dev/null +++ b/docs/API/PeakCalling/yeast/call_peaks/count_hops.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.call_peaks.count_hops + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/call_peaks/main.md b/docs/API/PeakCalling/yeast/call_peaks/main.md new file mode 100644 index 0000000..fa664b4 --- /dev/null +++ b/docs/API/PeakCalling/yeast/call_peaks/main.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.call_peaks.main + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/call_peaks/parse_args.md b/docs/API/PeakCalling/yeast/call_peaks/parse_args.md new file mode 100644 index 0000000..adb5a0b --- /dev/null +++ b/docs/API/PeakCalling/yeast/call_peaks/parse_args.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.call_peaks.parse_args + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/enrichment_vectorized.md b/docs/API/PeakCalling/yeast/enrichment_vectorized.md new file mode 100644 index 0000000..c660f2e --- /dev/null +++ b/docs/API/PeakCalling/yeast/enrichment_vectorized.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.enrichment_vectorized.enrichment_vectorized + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/hypergeom_pval_vectorized.md b/docs/API/PeakCalling/yeast/hypergeom_pval_vectorized.md new file mode 100644 index 0000000..b649ddc --- /dev/null +++ b/docs/API/PeakCalling/yeast/hypergeom_pval_vectorized.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.hypergeom_pval_vectorized.hypergeom_pval_vectorized + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/poisson_pval_vectorized.md b/docs/API/PeakCalling/yeast/poisson_pval_vectorized.md new file mode 100644 index 0000000..d5e8022 --- /dev/null +++ b/docs/API/PeakCalling/yeast/poisson_pval_vectorized.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.poisson_pval_vectorized.poisson_pval_vectorized + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/read_in_data/qbed_df_to_pyranges.md b/docs/API/PeakCalling/yeast/read_in_data/qbed_df_to_pyranges.md new file mode 100644 index 0000000..d50ad1f --- /dev/null +++ b/docs/API/PeakCalling/yeast/read_in_data/qbed_df_to_pyranges.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.read_in_data.qbed_df_to_pyranges + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/read_in_data/read_in_background_data.md b/docs/API/PeakCalling/yeast/read_in_data/read_in_background_data.md new file mode 100644 index 0000000..e28e977 --- /dev/null +++ b/docs/API/PeakCalling/yeast/read_in_data/read_in_background_data.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.read_in_data.read_in_background_data + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/read_in_data/read_in_chrmap.md b/docs/API/PeakCalling/yeast/read_in_data/read_in_chrmap.md new file mode 100644 index 0000000..7215f28 --- /dev/null +++ b/docs/API/PeakCalling/yeast/read_in_data/read_in_chrmap.md @@ -0,0 +1,3 @@ +::: callingcardstools.PeakCalling.yeast.read_in_data.read_in_chrmap + + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/read_in_data/read_in_experiment_data.md b/docs/API/PeakCalling/yeast/read_in_data/read_in_experiment_data.md new file mode 100644 index 0000000..175a07b --- /dev/null +++ b/docs/API/PeakCalling/yeast/read_in_data/read_in_experiment_data.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.read_in_data.read_in_experiment_data + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/read_in_data/read_in_promoter_data.md b/docs/API/PeakCalling/yeast/read_in_data/read_in_promoter_data.md new file mode 100644 index 0000000..54634f2 --- /dev/null +++ b/docs/API/PeakCalling/yeast/read_in_data/read_in_promoter_data.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.read_in_data.read_in_promoter_data + handler: python \ No newline at end of file diff --git a/docs/API/PeakCalling/yeast/read_in_data/relabel_chr_column.md b/docs/API/PeakCalling/yeast/read_in_data/relabel_chr_column.md new file mode 100644 index 0000000..bc41813 --- /dev/null +++ b/docs/API/PeakCalling/yeast/read_in_data/relabel_chr_column.md @@ -0,0 +1,2 @@ +::: callingcardstools.PeakCalling.yeast.read_in_data.relabel_chr_column + handler: python \ No newline at end of file diff --git a/docs/home/changelog.md b/docs/home/changelog.md index e966331..9d35082 100644 --- a/docs/home/changelog.md +++ b/docs/home/changelog.md @@ -4,6 +4,13 @@ ### Changes +- Needed to keep `name` in the output of PeakCalling.yeast.call_peaks +- adding Analysis and PeakCalling modules to the documentation API section + +## Version 1.5.0 + +### Changes + - overhaul of the PeakCalling/yeast module to address memory usage. adding pyranges as a depedency as a result. removed `consider_strand` and added a argument to deduplicate the experiment qbeds based on diff --git a/mkdocs.yml b/mkdocs.yml index bd953b3..8f8209f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -37,11 +37,52 @@ nav: - SummaryParser: 'API/Alignment/SummaryParser.md' - mammals: - Qbed: 'API/Alignment/mammals/Qbed.md' + - Analysis: + - yeast: + - rank_response: + - bin_by_binding_rank: 'API/Analysis/yeast/rank_response/bin_by_binding_rank.md' + - calculate_random_expectation: 'API/Analysis/yeast/rank_response/calculate_random_expectation.md' + - compute_rank_response: 'API/Analysis/yeast/rank_response/compute_rank_response.md' + - create_partitions: 'API/Analysis/yeast/rank_response/create_partitions.md' + - create_rank_response_table: 'API/Analysis/yeast/rank_response/create_rank_response_table.md' + - label_responsive_genes: 'API/Analysis/yeast/rank_response/label_responsive_genes.md' + - parse_binomtest_results: 'API/Analysis/yeast/rank_response/parse_binomtest_results.md' + - rank_response_ratio_summarize: 'API/Analysis/yeast/rank_response/rank_response_ratio_summarize.md' + - read_in_data: 'API/Analysis/yeast/rank_response/read_in_data.md' + - set_none_str_to_none: 'API/Analysis/yeast/rank_response/set_none_str_to_none.md' + - validate_config: 'API/Analysis/yeast/rank_response/validate_config.md' + - find_min_responsive: + - find_min_responsive: 'API/Analysis/yeast/rank_response/find_min_responsive/find_min_responsive.md' + - parse_args: 'API/Analysis/yeast/rank_response/find_min_responsive/parse_args.md' + - main: 'API/Analysis/yeast/rank_response/find_min_responsive/main.md' + - chipexo_promoter_sig: + - read_in_chipexo_data: 'API/Analysis/yeast/chipexo_promoter_sig/read_in_chipexo_data.md' + - chipexo_promoter_sig: 'API/Analysis/yeast/chipexo_promoter_sig/chipexo_promoter_sig.md' + - parse_args: 'API/Analysis/yeast/chipexo_promoter_sig/parse_args.md' + - main: 'API/Analysis/yeast/chipexo_promoter_sig/main.md' - BarcodeParser: - mammals: - BarcodeCounterQc: 'API/BarcodeParser/mammals/BarcodeCounterQc.md' - yeast: - BarcodeCounterQc: 'API/BarcodeParser/yeast/BarcodeCounterQc.md' + - PeakCalling: + - yeast: + - read_in_data: + - qbed_df_to_pyranges: "API/PeakCalling/yeast/read_in_data/qbed_df_to_pyranges.md" + - read_in_background_data: "API/PeakCalling/yeast/read_in_data/read_in_background_data.md" + - read_in_chrmap: "API/PeakCalling/yeast/read_in_data/read_in_chrmap.md" + - read_in_experiment_data: "API/PeakCalling/yeast/read_in_data/read_in_experiment_data.md" + - read_in_promoter_data: "API/PeakCalling/yeast/read_in_data/read_in_promoter_data.md" + - relabel_chr_column: "API/PeakCalling/yeast/read_in_data/relabel_chr_column.md" + - enrichment_vectorized: "API/PeakCalling/yeast/enrichment_vectorized.md" + - hypergeom_pval_vectorized: "API/PeakCalling/yeast/hypergeom_pval_vectorized.md" + - poisson_pval_vectorized: "API/PeakCalling/yeast/poisson_pval_vectorized.md" + - call_peaks: + - count_hops: 'API/PeakCalling/yeast/call_peaks/count_hops.md' + - add_metrics: 'API/PeakCalling/yeast/call_peaks/add_metrics.md' + - call_peaks: 'API/PeakCalling/yeast/call_peaks/call_peaks.md' + - parse_args: 'API/PeakCalling/yeast/call_peaks/parse_args.md' + - main: 'API/PeakCalling/yeast/call_peaks/main.md' - QC: - create_status_coder: 'API/QC/create_status_coder.md' - StatusFlags: 'API/QC/StatusFlags.md' diff --git a/pyproject.toml b/pyproject.toml index 02bedb8..6e0378f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "callingCardsTools" -version = "1.5.0" +version = "1.5.1" description = "A collection of objects and functions to work with calling cards sequencing tools" authors = ["chase mateusiak "] license = "MIT" diff --git a/tests/PeakCalling/yeast/test_call_peaks.py b/tests/PeakCalling/yeast/test_call_peaks.py index 1816cf4..c52cd30 100644 --- a/tests/PeakCalling/yeast/test_call_peaks.py +++ b/tests/PeakCalling/yeast/test_call_peaks.py @@ -95,7 +95,7 @@ def test_count_hops(): ) result_stranded = ( - promoter_df.set_index("name", drop=True) + promoter_df.set_index("name") .join( [ experiment_result_stranded.set_index("name", drop=True), @@ -107,6 +107,19 @@ def test_count_hops(): .fillna(0) ) + assert ( + result_stranded.columns + == [ + "name", + "chr", + "start", + "end", + "strand", + "experiment_hops", + "background_hops", + ] + ).all() + pd.testing.assert_frame_equal( result_stranded, expected_result_stranded, check_dtype=False ) @@ -182,6 +195,24 @@ def test_with_data(tmpdir): experiment_df = pd.read_csv(args.experiment_data_path, sep="\t") background_df = pd.read_csv(args.background_data_path, sep="\t") + assert ( + output_df.columns + == [ + "name", + "chr", + "start", + "end", + "strand", + "experiment_hops", + "background_hops", + "background_total_hops", + "experiment_total_hops", + "callingcards_enrichment", + "poisson_pval", + "hypergeometric_pval", + ] + ).all() + # check that the deduplication worked as expected assert (output_df["experiment_total_hops"] != experiment_df.shape[0]).all() assert ( @@ -198,7 +229,7 @@ def test_with_data(tmpdir): ] # do the same with the background_df background_df_subset = background_df[ - (background_df["chr"] == 'chrII') + (background_df["chr"] == "chrII") & (background_df["start"] >= 36350) & (background_df["start"] <= 37050) ] @@ -212,8 +243,8 @@ def test_with_data(tmpdir): & (output_df["end"] == 37050) ] - assert (output_df['background_total_hops'] == background_df.shape[0]).all() - assert (output_df_subset['background_hops'] == background_df_subset.shape[0]).all() + assert (output_df["background_total_hops"] == background_df.shape[0]).all() + assert (output_df_subset["background_hops"] == background_df_subset.shape[0]).all() assert ( output_df_subset["experiment_hops"] == experiment_df_subset.drop_duplicates(subset=["chr", "start", "end"]).shape[0] @@ -330,9 +361,9 @@ def test_combine_replicates(tmpdir): df3 = pd.read_csv(output_path3) combined_df = df1.merge( - df2, on=["chr", "start", "end", "score", "strand"], suffixes=("_1", "_2") + df2, on=["name", "chr", "start", "end", "strand"], suffixes=("_1", "_2") ) - combined_df = combined_df.merge(df3, on=["chr", "start", "end", "score", "strand"]) + combined_df = combined_df.merge(df3, on=["name", "chr", "start", "end", "strand"]) assert ( combined_df["experiment_hops_1"] + combined_df["experiment_hops_2"]