Skip to content

Commit

Permalink
bumping to version 1.4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
cmatKhan committed Dec 14, 2023
1 parent b3ae372 commit 34c5b95
Show file tree
Hide file tree
Showing 14 changed files with 88 additions and 3,977 deletions.
2 changes: 0 additions & 2 deletions callingcardstools/Analysis/yeast/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
from callingcardstools.Analysis.yeast.chipexo_promoter_sig \
import chipexo_promoter_sig
10 changes: 6 additions & 4 deletions callingcardstools/Analysis/yeast/chipexo_promoter_sig.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
import argparse
import logging
import os

import pandas as pd

from callingcardstools.PeakCalling.yeast import (read_in_chrmap,
relabel_chr_column,
read_in_promoter_data)
read_in_promoter_data,
relabel_chr_column)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -187,7 +189,7 @@ def parse_args(
help='Set this flag to gzip the output file.'
)

return parser
return subparser


def main(args: argparse.Namespace) -> None:
Expand Down
9 changes: 5 additions & 4 deletions callingcardstools/Analysis/yeast/rank_response/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
from .compute_rank_response import compute_rank_response
from .create_partitions import create_partitions
from .create_rank_response_table import create_rank_response_table
from .find_min_responsive_main import parse_args as find_min_responsive_parse_args # noqa
from .find_min_responsive_main import main as find_min_responsive_main
from .find_min_responsive import find_min_responsive
from .find_min_responsive_main import main as find_min_responsive_main
from .find_min_responsive_main import \
parse_args as find_min_responsive_parse_args
from .label_responsive_genes import label_responsive_genes
from .parse_binomtest_results import parse_binomtest_results
from .rank_response_main import parse_args as rank_response_parse_args
from .rank_response_main import main as rank_response_main
from .rank_response_main import parse_args as rank_response_parse_args
from .rank_response_ratio_summarize import rank_response_ratio_summarize
from .read_in_data import read_in_data
from .set_none_str_to_none import set_none_str_to_none
from .validate_config import validate_config
from .validate_config import validate_config
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import logging
import argparse
import json
from .validate_config import validate_config
import logging

from .create_rank_response_table import create_rank_response_table
from .validate_config import validate_config

logger = logging.getLogger(__name__)

Expand Down
40 changes: 25 additions & 15 deletions callingcardstools/PeakCalling/yeast/call_peaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,23 @@
.. author:: Chase Mateusiak
.. date:: 2023-11-23
"""
import argparse
import logging
import time
import os
import argparse
import time

import pandas as pd
from callingcardstools.PeakCalling.yeast import \
(read_in_chrmap,
read_in_experiment_data,
read_in_promoter_data,
read_in_background_data)

from callingcardstools.PeakCalling.yeast import (read_in_background_data,
read_in_chrmap,
read_in_experiment_data,
read_in_promoter_data)
from callingcardstools.PeakCalling.yeast.enrichment_vectorized import \
enrichment_vectorized
from callingcardstools.PeakCalling.yeast.poisson_pval_vectorized import \
poisson_pval_vectorized
from callingcardstools.PeakCalling.yeast.hypergeom_pval_vectorized import \
hypergeom_pval_vectorized
from callingcardstools.PeakCalling.yeast.poisson_pval_vectorized import \
poisson_pval_vectorized

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -72,6 +73,7 @@ def count_hops(promoter_df: pd.DataFrame,
... 'chr': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
... 'start': [150, 250, 350, 450, 550],
... 'end': [200, 300, 400, 500, 600],
... 'depth': [1, 1, 1, 1, 1],
... 'strand': ['+', '-', '+', '-', '+']
... })
>>> count_hops(promoter_df, qbed_df, 'hops', True)
Expand All @@ -82,10 +84,18 @@ def count_hops(promoter_df: pd.DataFrame,
3 chr1 400 500 - 1
4 chr1 500 600 + 1
"""

query_str = '(start <= qbed_start <= end) and strand == qbed_strand' \
if consider_strand \
else 'start <= qbed_start <= end'
if consider_strand:
query_str = '(start <= qbed_start <= end) and strand == qbed_strand'
else:
# if consider_strand is false, then combine rows with the same
# coordinates but different strand values and sum the depth. Set the
# strand to "*" for all rows
qbed_df = qbed_df\
.groupby(['chr', 'start', 'end'])\
.agg({'depth': 'sum'})\
.reset_index()\
.assign(strand='*')
query_str = 'start <= qbed_start <= end'

return promoter_df\
.merge(qbed_df.rename(columns={'start': 'qbed_start',
Expand Down Expand Up @@ -184,7 +194,7 @@ def call_peaks(
.fillna(0)\
.assign(background_total_hops=background_total_hops,
experiment_total_hops=experiment_total_hops)

promoter_hops_df['background_hops'] = \
promoter_hops_df['background_hops'].astype('int64')

Expand Down Expand Up @@ -360,7 +370,7 @@ def main(args: argparse.Namespace) -> None:
if not os.path.isfile(file):
raise FileNotFoundError('The following path '
f'does not exist: {file}')

result_df = call_peaks(
args.experiment_data_path,
args.experiment_orig_chr_convention,
Expand Down
29 changes: 15 additions & 14 deletions callingcardstools/PeakCalling/yeast/read_in_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os

import pandas as pd


Expand Down Expand Up @@ -199,8 +200,8 @@ def read_in_experiment_data(experiment_data_path: str,
... 'curr_chr_name_convention',
... 'new_chr_name_convention',
... chrmap_df)
>>> list(experiment_df.columns) == ['chr', 'start', 'end', 'strand',
... 'depth']
>>> list(experiment_df.columns) == ['chr', 'start', 'end', 'depth',
... 'strand']
True
>>> experiment_total_hops
1
Expand All @@ -218,7 +219,7 @@ def read_in_experiment_data(experiment_data_path: str,
sep='\t',
compression='gzip' if gzipped else None,
nrows=0)
if header.columns.tolist() != ['chr', 'start', 'end', 'strand', 'depth']:
if header.columns.tolist() != ['chr', 'start', 'end', 'depth', 'strand']:
header = None
else:
header = 0
Expand All @@ -228,12 +229,12 @@ def read_in_experiment_data(experiment_data_path: str,
sep='\t',
header=header,
names=['chr', 'start', 'end',
'strand', 'depth'],
'depth', 'strand'],
dtype={'chr': str,
'start': int,
'end': int,
'strand': str,
'depth': int},
'depth': int,
'strand': str},
compression='gzip' if gzipped else None)
except ValueError as e:
raise ValueError('experiment_data_path must be a qbed file '
Expand Down Expand Up @@ -393,8 +394,8 @@ def read_in_background_data(background_data_path: str,
... 'curr_chr_name_convention',
... 'new_chr_name_convention',
... chrmap_df)
>>> list(background_df.columns) == ['chr', 'start', 'end', 'strand',
... 'depth']
>>> list(background_df.columns) == ['chr', 'start', 'end', 'depth',
... 'strand']
True
>>> background_total_hops
1
Expand All @@ -409,7 +410,7 @@ def read_in_background_data(background_data_path: str,
gzipped = str(background_data_path).endswith('.gz')
# check if data has column headers
header = pd.read_csv(background_data_path, sep='\t', nrows=0)
if header.columns.tolist() != ['chr', 'start', 'end', 'strand', 'depth']:
if header.columns.tolist() != ['chr', 'start', 'end', 'depth', 'strand']:
header = None
else:
header = 0
Expand All @@ -420,17 +421,17 @@ def read_in_background_data(background_data_path: str,
sep='\t',
header=header,
names=['chr', 'start', 'end',
'strand', 'depth'],
'depth', 'strand'],
dtype={'chr': str,
'start': int,
'end': int,
'strand': str,
'depth': 'int64'},
'depth': 'int64',
'strand': str},
compression='gzip' if gzipped else None)
except ValueError as e:
raise ValueError('background_data_path must be a qbed file '
'with columns `chr`, `start`, `end`, `strand`, '
'and `depth`') from e
'with columns `chr`, `start`, `end`, `depth`, '
'and `strand`') from e

# relabel chr column
background_df = relabel_chr_column(background_df,
Expand Down
13 changes: 11 additions & 2 deletions callingcardstools/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from .Alignment.mammals import process_alignments as process_mammals_bam
from .Alignment.yeast import legacy_makeccf
from .Alignment.yeast import process_alignments as process_yeast_bam
from .Analysis.yeast import rank_response as yeast_rank_response
from .Analysis.yeast import chipexo_promoter_sig as yeast_chipexo_promoter_sig
from .Analysis.yeast.rank_response import rank_response_parse_args
from .BarcodeParser.yeast import barcode_table_to_json
from .BarcodeParser.yeast import combine_qc as yeast_combine_qc
from .PeakCalling.yeast import call_peaks as yeast_call_peaks
Expand Down Expand Up @@ -59,6 +60,9 @@ def parse_args() -> Callable[[list], argparse.Namespace]:

'yeast_call_peaks': 'Call peaks on yeast data',

'yeast_chipexo_sig_promoter': 'call significant promoters from '
'chipexo data from yeastepigenome.org',

'yeast_find_min_responsive': 'Given a set of yeast expression data '
'and thresholds on the effects and/or pvalues, find the minimum '
'number of responsive genes in the data set given',
Expand Down Expand Up @@ -139,8 +143,13 @@ def parse_args() -> Callable[[list], argparse.Namespace]:
subparsers,
script_descriptions['yeast_call_peaks'],
common_args)

subparsers = yeast_chipexo_promoter_sig.parse_args(
subparsers,
script_descriptions['yeast_chipexo_sig_promoter'],
common_args)

subparsers = yeast_rank_response.rank_response_parse_args(
subparsers = rank_response_parse_args(
subparsers,
script_descriptions['yeast_rank_response'],
common_args)
Expand Down
8 changes: 8 additions & 0 deletions docs/home/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Change Log

## Version 1.4.0

### Additions

For yeast, changing the `yeast_call_peaks` `consider_strand` functionality
to collapse read counts at the same coordinate on the forward/reverse strand
in addition to ignoring the strand with regards to the promoter.

## Version 1.3.0

### Additions
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "callingCardsTools"
version = "1.3.0"
version = "1.4.0"
description = "A collection of objects and functions to work with calling cards sequencing tools"
authors = ["chase mateusiak <[email protected]>"]
license = "MIT"
Expand Down
Loading

0 comments on commit 34c5b95

Please sign in to comment.