bumping to version 1.4.0

cmatKhan · Dec 14, 2023 · 34c5b95 · 34c5b95
1 parent b3ae372
commit 34c5b95
Show file tree

Hide file tree

Showing 14 changed files with 88 additions and 3,977 deletions.
diff --git a/callingcardstools/Analysis/yeast/__init__.py b/callingcardstools/Analysis/yeast/__init__.py
@@ -1,2 +0,0 @@
-from callingcardstools.Analysis.yeast.chipexo_promoter_sig \
-    import chipexo_promoter_sig

diff --git a/callingcardstools/Analysis/yeast/chipexo_promoter_sig.py b/callingcardstools/Analysis/yeast/chipexo_promoter_sig.py
@@ -1,10 +1,12 @@
-import logging
 import argparse
+import logging
 import os
+
 import pandas as pd
+
 from callingcardstools.PeakCalling.yeast import (read_in_chrmap,
-                                                 relabel_chr_column,
-                                                 read_in_promoter_data)
+                                                 read_in_promoter_data,
+                                                 relabel_chr_column)
 
 logger = logging.getLogger(__name__)
 
@@ -187,7 +189,7 @@ def parse_args(
         help='Set this flag to gzip the output file.'
     )
 
-    return parser
+    return subparser
 
 
 def main(args: argparse.Namespace) -> None:

diff --git a/callingcardstools/Analysis/yeast/rank_response/__init__.py b/callingcardstools/Analysis/yeast/rank_response/__init__.py
@@ -3,14 +3,15 @@
 from .compute_rank_response import compute_rank_response
 from .create_partitions import create_partitions
 from .create_rank_response_table import create_rank_response_table
-from .find_min_responsive_main import parse_args as find_min_responsive_parse_args # noqa
-from .find_min_responsive_main import main as find_min_responsive_main
 from .find_min_responsive import find_min_responsive
+from .find_min_responsive_main import main as find_min_responsive_main
+from .find_min_responsive_main import \
+    parse_args as find_min_responsive_parse_args
 from .label_responsive_genes import label_responsive_genes
 from .parse_binomtest_results import parse_binomtest_results
-from .rank_response_main import parse_args as rank_response_parse_args
 from .rank_response_main import main as rank_response_main
+from .rank_response_main import parse_args as rank_response_parse_args
 from .rank_response_ratio_summarize import rank_response_ratio_summarize
 from .read_in_data import read_in_data
 from .set_none_str_to_none import set_none_str_to_none
-from .validate_config import validate_config
+from .validate_config import validate_config
diff --git a/callingcardstools/Analysis/yeast/rank_response/rank_response_main.py b/callingcardstools/Analysis/yeast/rank_response/rank_response_main.py
@@ -1,8 +1,9 @@
-import logging
 import argparse
 import json
-from .validate_config import validate_config
+import logging
+
 from .create_rank_response_table import create_rank_response_table
+from .validate_config import validate_config
 
 logger = logging.getLogger(__name__)
 

diff --git a/callingcardstools/PeakCalling/yeast/call_peaks.py b/callingcardstools/PeakCalling/yeast/call_peaks.py
@@ -18,22 +18,23 @@
 .. author:: Chase Mateusiak
 .. date:: 2023-11-23
 """
+import argparse
 import logging
-import time
 import os
-import argparse
+import time
+
 import pandas as pd
-from callingcardstools.PeakCalling.yeast import \
-    (read_in_chrmap,
-     read_in_experiment_data,
-     read_in_promoter_data,
-     read_in_background_data)
+
+from callingcardstools.PeakCalling.yeast import (read_in_background_data,
+                                                 read_in_chrmap,
+                                                 read_in_experiment_data,
+                                                 read_in_promoter_data)
 from callingcardstools.PeakCalling.yeast.enrichment_vectorized import \
     enrichment_vectorized
-from callingcardstools.PeakCalling.yeast.poisson_pval_vectorized import \
-    poisson_pval_vectorized
 from callingcardstools.PeakCalling.yeast.hypergeom_pval_vectorized import \
     hypergeom_pval_vectorized
+from callingcardstools.PeakCalling.yeast.poisson_pval_vectorized import \
+    poisson_pval_vectorized
 
 logger = logging.getLogger(__name__)
 
@@ -72,6 +73,7 @@ def count_hops(promoter_df: pd.DataFrame,
     ...     'chr': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
     ...     'start': [150, 250, 350, 450, 550],
     ...     'end': [200, 300, 400, 500, 600],
+    ...     'depth': [1, 1, 1, 1, 1],
     ...     'strand': ['+', '-', '+', '-', '+']
     ... })
     >>> count_hops(promoter_df, qbed_df, 'hops', True)
@@ -82,10 +84,18 @@ def count_hops(promoter_df: pd.DataFrame,
     3   chr1    400  500      -     1
     4   chr1    500  600      +     1
     """
-
-    query_str = '(start <= qbed_start <= end) and strand == qbed_strand' \
-        if consider_strand \
-        else 'start <= qbed_start <= end'
+    if consider_strand:
+        query_str = '(start <= qbed_start <= end) and strand == qbed_strand'
+    else:
+        # if consider_strand is false, then combine rows with the same
+        # coordinates but different strand values and sum the depth. Set the
+        # strand to "*" for all rows
+        qbed_df = qbed_df\
+            .groupby(['chr', 'start', 'end'])\
+            .agg({'depth': 'sum'})\
+            .reset_index()\
+            .assign(strand='*')
+        query_str = 'start <= qbed_start <= end'
 
     return promoter_df\
         .merge(qbed_df.rename(columns={'start': 'qbed_start',
@@ -184,7 +194,7 @@ def call_peaks(
         .fillna(0)\
         .assign(background_total_hops=background_total_hops,
                 experiment_total_hops=experiment_total_hops)
-    
+
     promoter_hops_df['background_hops'] = \
         promoter_hops_df['background_hops'].astype('int64')
 
@@ -360,7 +370,7 @@ def main(args: argparse.Namespace) -> None:
         if not os.path.isfile(file):
             raise FileNotFoundError('The following path '
                                     f'does not exist: {file}')
-                                    
+
     result_df = call_peaks(
         args.experiment_data_path,
         args.experiment_orig_chr_convention,

diff --git a/callingcardstools/PeakCalling/yeast/read_in_data.py b/callingcardstools/PeakCalling/yeast/read_in_data.py
@@ -1,4 +1,5 @@
 import os
+
 import pandas as pd
 
 
@@ -199,8 +200,8 @@ def read_in_experiment_data(experiment_data_path: str,
     ...   'curr_chr_name_convention',
     ...   'new_chr_name_convention',
     ...    chrmap_df)
-    >>> list(experiment_df.columns) == ['chr', 'start', 'end', 'strand',
-    ...                                 'depth']
+    >>> list(experiment_df.columns) == ['chr', 'start', 'end', 'depth',
+    ...                                 'strand']
     True
     >>> experiment_total_hops
     1
@@ -218,7 +219,7 @@ def read_in_experiment_data(experiment_data_path: str,
                          sep='\t',
                          compression='gzip' if gzipped else None,
                          nrows=0)
-    if header.columns.tolist() != ['chr', 'start', 'end', 'strand', 'depth']:
+    if header.columns.tolist() != ['chr', 'start', 'end', 'depth', 'strand']:
         header = None
     else:
         header = 0
@@ -228,12 +229,12 @@ def read_in_experiment_data(experiment_data_path: str,
                                     sep='\t',
                                     header=header,
                                     names=['chr', 'start', 'end',
-                                           'strand', 'depth'],
+                                           'depth', 'strand'],
                                     dtype={'chr': str,
                                            'start': int,
                                            'end': int,
-                                           'strand': str,
-                                           'depth': int},
+                                           'depth': int,
+                                           'strand': str},
                                     compression='gzip' if gzipped else None)
     except ValueError as e:
         raise ValueError('experiment_data_path must be a qbed file '
@@ -393,8 +394,8 @@ def read_in_background_data(background_data_path: str,
     ...   'curr_chr_name_convention',
     ...   'new_chr_name_convention',
     ...    chrmap_df)
-    >>> list(background_df.columns) == ['chr', 'start', 'end', 'strand',
-    ...                                  'depth']
+    >>> list(background_df.columns) == ['chr', 'start', 'end', 'depth',
+    ...                                  'strand']
     True
     >>> background_total_hops
     1
@@ -409,7 +410,7 @@ def read_in_background_data(background_data_path: str,
     gzipped = str(background_data_path).endswith('.gz')
     # check if data has column headers
     header = pd.read_csv(background_data_path, sep='\t', nrows=0)
-    if header.columns.tolist() != ['chr', 'start', 'end', 'strand', 'depth']:
+    if header.columns.tolist() != ['chr', 'start', 'end', 'depth', 'strand']:
         header = None
     else:
         header = 0
@@ -420,17 +421,17 @@ def read_in_background_data(background_data_path: str,
                                     sep='\t',
                                     header=header,
                                     names=['chr', 'start', 'end',
-                                           'strand', 'depth'],
+                                           'depth', 'strand'],
                                     dtype={'chr': str,
                                            'start': int,
                                            'end': int,
-                                           'strand': str,
-                                           'depth': 'int64'},
+                                           'depth': 'int64',
+                                           'strand': str},
                                     compression='gzip' if gzipped else None)
     except ValueError as e:
         raise ValueError('background_data_path must be a qbed file '
-                         'with columns `chr`, `start`, `end`, `strand`, '
-                         'and `depth`') from e
+                         'with columns `chr`, `start`, `end`, `depth`, '
+                         'and `strand`') from e
 
     # relabel chr column
     background_df = relabel_chr_column(background_df,

diff --git a/callingcardstools/__main__.py b/callingcardstools/__main__.py
@@ -9,7 +9,8 @@
 from .Alignment.mammals import process_alignments as process_mammals_bam
 from .Alignment.yeast import legacy_makeccf
 from .Alignment.yeast import process_alignments as process_yeast_bam
-from .Analysis.yeast import rank_response as yeast_rank_response
+from .Analysis.yeast import chipexo_promoter_sig as yeast_chipexo_promoter_sig
+from .Analysis.yeast.rank_response import rank_response_parse_args
 from .BarcodeParser.yeast import barcode_table_to_json
 from .BarcodeParser.yeast import combine_qc as yeast_combine_qc
 from .PeakCalling.yeast import call_peaks as yeast_call_peaks
@@ -59,6 +60,9 @@ def parse_args() -> Callable[[list], argparse.Namespace]:
 
         'yeast_call_peaks': 'Call peaks on yeast data',
 
+        'yeast_chipexo_sig_promoter': 'call significant promoters from '
+        'chipexo data from yeastepigenome.org',
+
         'yeast_find_min_responsive': 'Given a set of yeast expression data '
         'and thresholds on the effects and/or pvalues, find the minimum '
         'number of responsive genes in the data set given',
@@ -139,8 +143,13 @@ def parse_args() -> Callable[[list], argparse.Namespace]:
         subparsers,
         script_descriptions['yeast_call_peaks'],
         common_args)
+
+    subparsers = yeast_chipexo_promoter_sig.parse_args(
+        subparsers,
+        script_descriptions['yeast_chipexo_sig_promoter'],
+        common_args)
 
-    subparsers = yeast_rank_response.rank_response_parse_args(
+    subparsers = rank_response_parse_args(
         subparsers,
         script_descriptions['yeast_rank_response'],
         common_args)

diff --git a/docs/home/changelog.md b/docs/home/changelog.md
@@ -1,5 +1,13 @@
 # Change Log
 
+## Version 1.4.0
+
+### Additions
+
+For yeast, changing the `yeast_call_peaks` `consider_strand` functionality
+to collapse read counts at the same coordinate on the forward/reverse strand
+in addition to ignoring the strand with regards to the promoter.
+
 ## Version 1.3.0
 
 ### Additions

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "callingCardsTools"
-version = "1.3.0"
+version = "1.4.0"
 description = "A collection of objects and functions to work with calling cards sequencing tools"
 authors = ["chase mateusiak <[email protected]>"]
 license = "MIT"
Original file line number	Diff line number	Diff line change
		@@ -1,2 +0,0 @@
		from callingcardstools.Analysis.yeast.chipexo_promoter_sig \
		import chipexo_promoter_sig