Package csb :: Package apps :: Module hhsearch
[frames] | no frames]

Source Code for Module csb.apps.hhsearch

  1  """ 
  2  Python bindings for the HHsearch program. Capable of executing multiple 
  3  HHsearch jobs in parallel. 
  4  """ 
  5   
  6  import multiprocessing as mp 
  7   
  8  import csb.apps 
  9  import csb.io 
 10  import csb.bio.io 
11 12 13 -class ExitCodes(csb.apps.ExitCodes):
14 15 IO_ERROR = 2 16 INVALID_DATA = 3 17 EXT_TOOL_FAILURE = 4
18
19 20 -class AppRunner(csb.apps.AppRunner):
21 22 @property
23 - def target(self):
24 return HHsearchApp
25
26 - def command_line(self):
27 28 cpu = mp.cpu_count() 29 cmd = csb.apps.ArgHandler(self.program, __doc__) 30 31 cmd.add_scalar_option('binary', 'b', str, 'full path to the HHsearch binary ', default='hhsearch') 32 cmd.add_scalar_option('cpu', 'c', int, 'maximum degree of parallelism', default=cpu) 33 cmd.add_scalar_option('database', 'd', str, 'the subject (database) HMM file', required=True) 34 cmd.add_array_argument('query', str, 'query HMM file(s)') 35 36 return cmd
37
38 39 -class HHsearchApp(csb.apps.Application):
40
41 - def main(self):
42 43 queries = list(self.args.query) 44 exe = HHsearch(self.args.binary, self.args.database) 45 46 try: 47 if len(queries) == 1: 48 exe.cpu = self.args.cpu 49 context = HHTask(queries[0]) 50 results = [ exe.run(context) ] 51 else: 52 context = [ HHTask(q) for q in queries ] 53 results = exe.runmany(context, workers=self.args.cpu) 54 55 except IOError as io: 56 HHsearchApp.exit(str(io), ExitCodes.IO_ERROR) 57 58 except csb.io.InvalidCommandError as ose: 59 msg = '{0!s}: {0.program}'.format(ose) 60 HHsearchApp.exit(msg, ExitCodes.IO_ERROR) 61 62 except csb.io.ProcessError as pe: 63 message = 'Bad exit code from HHsearch: #{0.code}.\nSTDERR: {0.stderr}\nSTDOUT: {0.stdout}'.format(pe.context) 64 HHsearchApp.exit(message, ExitCodes.EXT_TOOL_FAILURE) 65 66 self.log('\nRank Hit Prob St End Qst Qend') 67 self.log('-------------------------------------') 68 69 for c in results: 70 self.log('\n\n# QUERY:{0}\n'.format(c.queryfile)) 71 if c.result: 72 for hit in c.result: 73 self.log('{0.rank:3}. {0.id:5} {0.probability:5.3f} {0.start:3} {0.end:3} {0.qstart:3} {0.qend:3}'.format(hit))
74
75 76 -class Context(object):
77
78 - def __init__(self, query):
79 80 self.__query = query 81 self.__result = None
82 83 @property
84 - def query(self):
85 return self.__query
86 87 @property
88 - def result(self):
89 return self.__result
90 @result.setter
91 - def result(self, result):
92 self.__result = result
93
94 95 -class HHTask(Context):
96
97 - def __init__(self, queryfile):
98 99 self.queryfile = queryfile 100 query = open(queryfile).read() 101 102 super(HHTask, self).__init__(query)
103
104 105 -def _task(args):
106 107 try: 108 binary, db, cpu, context = args 109 return HHsearch(binary, db, cpu=cpu).run(context) 110 except (KeyboardInterrupt, SystemExit): 111 return
112
113 -class SecStructureScoring(object):
114 115 OFF = 0 116 AFTER = 1 117 DURING = 2 118 AFTER_PREDICTED = 3 119 DURING_PREDICTED = 4
120
121 -class HHsearch(object):
122
123 - class Options(object):
124 125 CPU = 'cpu' 126 SS = 'ssm' 127 MACT = 'mact' 128 MAX_HITS = 'Z' 129 MAX_ALI = 'B' 130 MAX_E = 'E' 131 MIN_P = 'p'
132
133 - def __init__(self, binary, db, cpu=None):
134 135 self._program = binary 136 self._db = db 137 self._opt = {} 138 self._parser = csb.bio.io.HHOutputParser() 139 140 self.cpu = cpu 141 self.ss = None 142 self.mac_threshold = None 143 self.max_hits = None 144 self.max_alignments = None 145 self.max_evalue = None 146 self.min_probability = None
147 148 @property
149 - def program(self):
150 return self._program
151 @program.setter
152 - def program(self, value):
153 self._program = value
154 155 @property
156 - def db(self):
157 return self._db
158 @db.setter
159 - def db(self, value):
160 self._db = value
161 162 @property
163 - def parser(self):
164 return self._parser
165 @parser.setter
166 - def parser(self, value):
167 self._parser = value
168 169 @property
170 - def cpu(self):
171 return self._get(HHsearch.Options.CPU)
172 @cpu.setter
173 - def cpu(self, value):
174 self._opt[HHsearch.Options.CPU] = value
175 176 @property
177 - def ss(self):
178 return self._get(HHsearch.Options.SS)
179 @ss.setter
180 - def ss(self, value):
181 self._opt[HHsearch.Options.SS] = value
182 183 @property
184 - def mac_threshold(self):
185 return self._get(HHsearch.Options.MACT)
186 @mac_threshold.setter
187 - def mac_threshold(self, value):
188 self._opt[HHsearch.Options.MACT] = value
189 190 @property
191 - def max_hits(self):
192 return self._get(HHsearch.Options.MAX_HITS)
193 @max_hits.setter
194 - def max_hits(self, value):
195 self._opt[HHsearch.Options.MAX_HITS] = value
196 197 @property
198 - def max_alignments(self):
199 return self._get(HHsearch.Options.MAX_ALI)
200 @max_alignments.setter
201 - def max_alignments(self, value):
202 self._opt[HHsearch.Options.MAX_ALI] = value
203 204 @property
205 - def max_evalue(self):
206 return self._get(HHsearch.Options.MAX_E)
207 @max_evalue.setter
208 - def max_evalue(self, value):
209 self._opt[HHsearch.Options.MAX_E] = value
210 211 @property
212 - def min_probability(self):
213 return self._get(HHsearch.Options.MIN_P)
214 @min_probability.setter
215 - def min_probability(self, value):
216 self._opt[HHsearch.Options.MIN_P] = value
217
218 - def _get(self, option):
219 220 if option in self._opt: 221 return self._opt[option] 222 else: 223 return None
224
225 - def _options(self):
226 227 options = [] 228 229 for option in self._opt: 230 value = self._opt[option] 231 232 if value is not None and value != '': 233 if isinstance(value, bool): 234 options.append('-{0}'.format(option)) 235 else: 236 options.append('-{0} {1}'.format(option, value)) 237 238 return ' '.join(options)
239
240 - def run(self, context):
241 242 with csb.io.TempFile() as q: 243 244 q.write(context.query) 245 q.flush() 246 247 with csb.io.TempFile() as o: 248 249 cmd = '{0.program} -i {1} -d {0.db} -o {2} {3}'.format(self, q.name, o.name, self._options()) 250 csb.io.Shell.runstrict(cmd) 251 252 context.result = self.parser.parse_file(o.name) 253 return context
254
255 - def runmany(self, contexts, workers=mp.cpu_count(), cpu=1):
256 257 if workers > len(contexts): 258 workers = len(contexts) 259 260 results = [] 261 taskargs = [(self.program, self.db, cpu, c) for c in contexts] 262 263 pool = mp.Pool(workers) 264 265 try: 266 for c in pool.map(_task, taskargs): 267 results.append(c) 268 except KeyboardInterrupt: 269 pass 270 finally: 271 pool.terminate() 272 273 return results
274
275 276 -def main():
277 AppRunner().run()
278 279 280 if __name__ == '__main__': 281 main() 282