|
2 | 2 | import helpers
|
3 | 3 |
|
4 | 4 | import os, shutil
|
| 5 | + |
5 | 6 | module_dir = os.path.abspath(os.path.dirname(__file__))
|
6 | 7 |
|
7 | 8 | default_params_str = """{
|
|
37 | 38 | }
|
38 | 39 | """
|
39 | 40 |
|
| 41 | + |
40 | 42 | def get_params():
|
41 |
| - from helpers import log_stderr |
42 |
| - """ |
43 |
| - Gets the params dictionary that hold all the configuration |
44 |
| - information of the program. This is loaded from 'inmembrane.config' |
45 |
| - which should be found in the same place as the main binary. |
46 |
| -
|
47 |
| - If 'inmembrane.config' is not a found, a default 'inmembrane.config' |
48 |
| - is generated from 'default_params_str'. The config file should |
49 |
| - be edited if the binaries are not available on the path, or have |
50 |
| - different names. |
51 |
| - """ |
52 |
| - config = os.path.join(os.getcwd(), 'inmembrane.config') |
53 |
| - if not os.path.isfile(config): |
54 |
| - log_stderr("# Couldn't find inmembrane.config file") |
55 |
| - log_stderr("# So, will generate a default config " + config) |
56 |
| - abs_hmm_profiles = os.path.join(module_dir, 'hmm_profiles') |
57 |
| - fh = open(config, 'w') |
58 |
| - fh.write(default_params_str) |
59 |
| - fh.close() |
60 |
| - else: |
61 |
| - log_stderr("# Loading existing inmembrane.config") |
62 |
| - params = eval(open(config).read()) |
63 |
| - return params |
| 43 | + from helpers import log_stderr |
| 44 | + """ |
| 45 | + Gets the params dictionary that hold all the configuration |
| 46 | + information of the program. This is loaded from 'inmembrane.config' |
| 47 | + which should be found in the same place as the main binary. |
| 48 | + |
| 49 | + If 'inmembrane.config' is not a found, a default 'inmembrane.config' |
| 50 | + is generated from 'default_params_str'. The config file should |
| 51 | + be edited if the binaries are not available on the path, or have |
| 52 | + different names. |
| 53 | + """ |
| 54 | + config = os.path.join(os.getcwd(), 'inmembrane.config') |
| 55 | + if not os.path.isfile(config): |
| 56 | + log_stderr("# Couldn't find inmembrane.config file") |
| 57 | + log_stderr("# So, will generate a default config " + config) |
| 58 | + abs_hmm_profiles = os.path.join(module_dir, 'hmm_profiles') |
| 59 | + fh = open(config, 'w') |
| 60 | + fh.write(default_params_str) |
| 61 | + fh.close() |
| 62 | + else: |
| 63 | + log_stderr("# Loading existing inmembrane.config") |
| 64 | + params = eval(open(config).read()) |
| 65 | + return params |
| 66 | + |
64 | 67 |
|
65 | 68 | def init_output_dir(params):
|
66 |
| - """ |
67 |
| - Creates a directory for all output files and makes it the current |
68 |
| - working directory. copies the input sequences into it as 'input.fasta'. |
69 |
| - """ |
70 |
| - from helpers import dict_get |
71 |
| - |
72 |
| - if dict_get(params, 'out_dir'): |
73 |
| - base_dir = params['out_dir'] |
74 |
| - else: |
75 |
| - base_dir = '.'.join(os.path.splitext(params['fasta'])[:-1]) |
76 |
| - params['out_dir'] = base_dir |
77 |
| - if not os.path.isdir(base_dir): |
78 |
| - os.makedirs(base_dir) |
79 |
| - |
80 |
| - if not dict_get(params, 'csv'): |
81 |
| - basename = '.'.join(os.path.splitext(params['fasta'])[:-1]) |
82 |
| - params['csv'] = basename + '.csv' |
83 |
| - params['csv'] = os.path.abspath(params['csv']) |
84 |
| - |
85 |
| - params['citations'] = os.path.join(params['out_dir'], 'citations.txt') |
86 |
| - params['citations'] = os.path.abspath(params['citations']) |
87 |
| - |
88 |
| - fasta = "input.fasta" |
89 |
| - shutil.copy(params['fasta'], os.path.join(base_dir, fasta)) |
90 |
| - params['fasta'] = fasta |
| 69 | + """ |
| 70 | + Creates a directory for all output files and makes it the current |
| 71 | + working directory. copies the input sequences into it as 'input.fasta'. |
| 72 | + """ |
| 73 | + from helpers import dict_get |
91 | 74 |
|
92 |
| - shutil.copy(os.path.join(os.getcwd(), 'inmembrane.config'), base_dir) |
| 75 | + if dict_get(params, 'out_dir'): |
| 76 | + base_dir = params['out_dir'] |
| 77 | + else: |
| 78 | + base_dir = '.'.join(os.path.splitext(params['fasta'])[:-1]) |
| 79 | + params['out_dir'] = base_dir |
| 80 | + if not os.path.isdir(base_dir): |
| 81 | + os.makedirs(base_dir) |
93 | 82 |
|
94 |
| - os.chdir(base_dir) |
| 83 | + if not dict_get(params, 'csv'): |
| 84 | + basename = '.'.join(os.path.splitext(params['fasta'])[:-1]) |
| 85 | + params['csv'] = basename + '.csv' |
| 86 | + params['csv'] = os.path.abspath(params['csv']) |
| 87 | + |
| 88 | + params['citations'] = os.path.join(params['out_dir'], 'citations.txt') |
| 89 | + params['citations'] = os.path.abspath(params['citations']) |
| 90 | + |
| 91 | + fasta = "input.fasta" |
| 92 | + shutil.copy(params['fasta'], os.path.join(base_dir, fasta)) |
| 93 | + params['fasta'] = fasta |
| 94 | + |
| 95 | + shutil.copy(os.path.join(os.getcwd(), 'inmembrane.config'), base_dir) |
| 96 | + |
| 97 | + os.chdir(base_dir) |
95 | 98 |
|
96 | 99 |
|
97 | 100 | def import_protocol_python(params):
|
98 |
| - """ |
99 |
| - Some python magic that loads the desired protocol file |
100 |
| - encoded in the string 'params['protocol'] as a python file |
101 |
| - with the internal variable name 'protocol'. An appropriate |
102 |
| - python command is generated that is to be processed by |
103 |
| - the 'exec' function. |
104 |
| - """ |
105 |
| - protocol_py = os.path.join(module_dir, 'protocols', params['protocol']+'.py') |
106 |
| - if not os.path.isfile(protocol_py): |
107 |
| - raise IOError("Couldn't find protcols/" + protocol_py) |
108 |
| - return 'import inmembrane.protocols.%s as protocol' % (params['protocol']) |
| 101 | + """ |
| 102 | + Some python magic that loads the desired protocol file |
| 103 | + encoded in the string 'params['protocol'] as a python file |
| 104 | + with the internal variable name 'protocol'. An appropriate |
| 105 | + python command is generated that is to be processed by |
| 106 | + the 'exec' function. |
| 107 | + """ |
| 108 | + protocol_py = os.path.join(module_dir, 'protocols', |
| 109 | + params['protocol'] + '.py') |
| 110 | + if not os.path.isfile(protocol_py): |
| 111 | + raise IOError("Couldn't find protcols/" + protocol_py) |
| 112 | + return 'import inmembrane.protocols.%s as protocol' % (params['protocol']) |
109 | 113 |
|
110 | 114 |
|
111 | 115 | def process(params):
|
112 |
| - """ |
113 |
| - Main program loop. Triggers the 'protocol' found in the params |
114 |
| - to annotate all proteins give the list of annotations needed by |
115 |
| - 'protocol'. Then outputs to screen and a .csv file. |
116 |
| - """ |
117 |
| - from helpers import dict_get, create_proteins_dict, log_stdout, log_stderr |
118 |
| - # will load all plugins in the plugins/ directory |
119 |
| - from inmembrane.plugins import * |
120 |
| - |
121 |
| - # initializations |
122 |
| - exec(import_protocol_python(params)) |
123 |
| - init_output_dir(params) |
124 |
| - seqids, proteins = create_proteins_dict(params['fasta']) |
125 |
| - |
126 |
| - # TODO: ideally this loop needs to be run within the protocol, |
127 |
| - # since for some protocols not all plugins |
128 |
| - # will be run for every sequence, conditional |
129 |
| - # on the outcome of a previous analysis |
130 |
| - # eg. protocol.run(params, proteins) |
131 |
| - |
132 |
| - # annotates with external binaries as found in plugins |
133 |
| - for plugin_str in protocol.get_annotations(params): |
134 |
| - plugin = eval(plugin_str) |
135 |
| - plugin.annotate(params, proteins) |
136 |
| - |
137 |
| - # do protocol analysis on the results of the annotations |
138 |
| - for seqid in seqids: |
139 |
| - protein = proteins[seqid] |
140 |
| - protocol.post_process_protein(params, protein) |
141 |
| - log_stdout(protocol.protein_output_line(seqid, proteins)) |
142 |
| - |
143 |
| - # print a summary table of classifications to stderr |
144 |
| - log_stderr(protocol.summary_table(params, proteins)) |
145 |
| - |
146 |
| - # always write to biologist-friendly csv file |
147 |
| - f = open(params['csv'], 'w') |
148 |
| - for seqid in seqids: |
149 |
| - f.write(protocol.protein_csv_line(seqid, proteins)) |
150 |
| - f.close() |
151 |
| - log_stderr("\n") |
152 |
| - log_stderr("Output written to %s" % (params['csv'])) |
153 |
| - |
154 |
| - # TODO: citations for specific HMMs (PFAM etc ?) |
155 |
| - |
156 |
| - # write citations to a file and gracefully deal with plugins |
157 |
| - # without a citation defined |
158 |
| - import codecs |
159 |
| - import textwrap |
160 |
| - f = codecs.open(params['citations'], mode='w', encoding='utf-8') |
161 |
| - programs_used = [] |
162 |
| - for program in protocol.get_annotations(params): |
163 |
| - plugin = eval(program) |
164 |
| - try: |
165 |
| - f.write(plugin.citation['name']+":\n") |
166 |
| - f.write(textwrap.fill(plugin.citation['ref'])) |
167 |
| - except AttributeError: |
168 |
| - f.write("%s - no citation provided." % program) |
169 |
| - f.write("\n\n") |
170 |
| - try: |
171 |
| - programs_used.append(plugin.citation['name']) |
172 |
| - except (AttributeError, KeyError): |
173 |
| - programs_used.append(program) |
174 |
| - |
175 |
| - f.close() |
176 |
| - log_stderr("\n") |
177 |
| - log_stderr("This run used %s." % (", ".join(programs_used)) ) |
178 |
| - log_stderr("References have been written to %s \n" |
179 |
| - "# - please cite as appropriate." % |
180 |
| - (params['citations']) ) |
181 |
| - |
182 |
| - return proteins |
| 116 | + """ |
| 117 | + Main program loop. Triggers the 'protocol' found in the params |
| 118 | + to annotate all proteins give the list of annotations needed by |
| 119 | + 'protocol'. Then outputs to screen and a .csv file. |
| 120 | + """ |
| 121 | + from helpers import dict_get, create_proteins_dict, log_stdout, log_stderr |
| 122 | + # will load all plugins in the plugins/ directory |
| 123 | + from inmembrane.plugins import * |
| 124 | + |
| 125 | + # initializations |
| 126 | + exec (import_protocol_python(params)) |
| 127 | + init_output_dir(params) |
| 128 | + seqids, proteins = create_proteins_dict(params['fasta']) |
| 129 | + |
| 130 | + # TODO: ideally this loop needs to be run within the protocol, |
| 131 | + # since for some protocols not all plugins |
| 132 | + # will be run for every sequence, conditional |
| 133 | + # on the outcome of a previous analysis |
| 134 | + # eg. protocol.run(params, proteins) |
| 135 | + |
| 136 | + # annotates with external binaries as found in plugins |
| 137 | + for plugin_str in protocol.get_annotations(params): |
| 138 | + plugin = eval(plugin_str) |
| 139 | + plugin.annotate(params, proteins) |
| 140 | + |
| 141 | + # do protocol analysis on the results of the annotations |
| 142 | + for seqid in seqids: |
| 143 | + protein = proteins[seqid] |
| 144 | + protocol.post_process_protein(params, protein) |
| 145 | + log_stdout(protocol.protein_output_line(seqid, proteins)) |
| 146 | + |
| 147 | + # print a summary table of classifications to stderr |
| 148 | + log_stderr(protocol.summary_table(params, proteins)) |
| 149 | + |
| 150 | + # always write to biologist-friendly csv file |
| 151 | + f = open(params['csv'], 'w') |
| 152 | + for seqid in seqids: |
| 153 | + f.write(protocol.protein_csv_line(seqid, proteins)) |
| 154 | + f.close() |
| 155 | + log_stderr("\n") |
| 156 | + log_stderr("Output written to %s" % (params['csv'])) |
| 157 | + |
| 158 | + # TODO: citations for specific HMMs (PFAM etc ?) |
| 159 | + |
| 160 | + # write citations to a file and gracefully deal with plugins |
| 161 | + # without a citation defined |
| 162 | + import codecs |
| 163 | + import textwrap |
| 164 | + f = codecs.open(params['citations'], mode='w', encoding='utf-8') |
| 165 | + programs_used = [] |
| 166 | + for program in protocol.get_annotations(params): |
| 167 | + plugin = eval(program) |
| 168 | + try: |
| 169 | + f.write(plugin.citation['name'] + ":\n") |
| 170 | + f.write(textwrap.fill(plugin.citation['ref'])) |
| 171 | + except AttributeError: |
| 172 | + f.write("%s - no citation provided." % program) |
| 173 | + f.write("\n\n") |
| 174 | + try: |
| 175 | + programs_used.append(plugin.citation['name']) |
| 176 | + except (AttributeError, KeyError): |
| 177 | + programs_used.append(program) |
| 178 | + |
| 179 | + f.close() |
| 180 | + log_stderr("\n") |
| 181 | + log_stderr("This run used %s." % (", ".join(programs_used))) |
| 182 | + log_stderr("References have been written to %s \n" |
| 183 | + "# - please cite as appropriate." % |
| 184 | + (params['citations'])) |
| 185 | + |
| 186 | + return proteins |
0 commit comments