1 """
2 Measure the precision and coverage of a fragment library stored in Rosetta
3 NNmake format.
4 """
5
6 import os
7 import multiprocessing
8 import matplotlib
9
10 import csb.apps
11
12 import csb.bio.io.wwpdb as wwpdb
13 import csb.bio.structure as structure
14 import csb.bio.fragments
15 import csb.bio.fragments.rosetta as rosetta
16 import csb.core
17 import csb.io.plots
24
26
27 @property
30
32
33 cpu = multiprocessing.cpu_count()
34 cmd = csb.apps.ArgHandler(self.program, __doc__)
35
36 cmd.add_scalar_option('pdb', 'p', str, 'the PDB database (a directory containing all PDB files)', required=True)
37 cmd.add_scalar_option('native', 'n', str, 'native structure of the target (PDB file)', required=True)
38 cmd.add_scalar_option('chain', 'c', str, 'chain identifier (if not specified, the first chain)', default=None)
39
40 cmd.add_scalar_option('top', 't', int, 'read top N fragments per position', default=25)
41 cmd.add_scalar_option('cpu', 'C', int, 'maximum degree of parallelism', default=cpu)
42 cmd.add_scalar_option('rmsd', 'r', float, 'RMSD cutoff for precision and coverage', default=1.5)
43 cmd.add_scalar_option('output', 'o', str, 'output directory', default='.')
44
45 cmd.add_boolean_option('save-structures', 's', 'create a PDB file for each fragment, superimposed over the native', default=False)
46
47 cmd.add_positional_argument('library', str, 'Fragment library file in Rosetta NNmake format')
48
49 return cmd
50
52
54
55 if not os.path.isdir(self.args.output):
56 PrecisionApp.exit('Output directory does not exist', code=ExitCodes.INVALID_DATA, usage=True)
57
58 for file in [self.args.native, self.args.library]:
59 if not os.path.isfile(file):
60 PrecisionApp.exit('File not found: {0}'.format(file), code=ExitCodes.INVALID_DATA, usage=True)
61
62 self.log('\nLoading {0.library} (top {0.top} per position)... '.format(self.args), ending='')
63
64 try:
65 library = rosetta.RosettaFragmentMap.read(self.args.library, top=self.args.top)
66 self.log('{0} fragments'.format(len(library)))
67
68 try:
69 native = wwpdb.RegularStructureParser(self.args.native).parse_structure()
70 native.accession = native.accession[:4]
71 if self.args.chain:
72 native = native.chains[self.args.chain]
73 else:
74 native = native.first_chain
75 except (structure.Broken3DStructureError, wwpdb.PDBParseError) as se:
76 raise ArgumentError("Can't parse native structure: " + str(se))
77
78 except (csb.core.ItemNotFoundError) as ce:
79 raise ArgumentError("Chain {0!s} not found in native structure".format(ce))
80
81 self.log('Superimposing fragments...\n')
82 si = LibrarySuperimposer(native, library, self.args.pdb, self.args.output,
83 save=self.args.save_structures, cutoff=self.args.rmsd)
84 matches = si.run(self.args.cpu)
85
86 info = si.plot(matches)
87 self.log(' RMSD Cutoff {0:>6.2f} A'.format(self.args.rmsd))
88 self.log(' AVG Precision {0.precision:>6.2f} %'.format(info))
89 self.log(' Coverage {0.coverage:>6.2f} %'.format(info))
90 self.log(' RW Precision {0.figure}'.format(si))
91
92
93
94 except ArgumentIOError as ioe:
95 PrecisionApp.exit(str(ioe), code=ExitCodes.INVALID_DATA)
96
97 except ArgumentError as ae:
98 PrecisionApp.exit(str(ae), code=ExitCodes.INVALID_DATA)
99
100 self.log('\nDone.')
101
104
107
109
110 - def __init__(self, precision, coverage):
113
115 return '{0.precision:6.2f} {0.coverage:6.2f}'.format(self)
116
119
121
122 - def __init__(self, native, library, pdb, output, save=False, cutoff=1.5):
145
147 try:
148 if self._out and not self._out.closed:
149 self._out.close()
150 except:
151 pass
152
153 @property
156
157 @property
160
161 - def run(self, cpu=1):
189
190 - def plot(self, matches):
191
192 residues = range(1, self._native.length + 1)
193 precision = []
194 precision2 = []
195 background = []
196 covered = set()
197
198 all = {}
199 positive = {}
200
201 for rank in residues:
202 all[rank] = 0
203 positive[rank] = 0
204
205 for match in matches:
206 for rank in range(match.qstart, match.qend + 1):
207 all[rank] += 1
208
209 if match.rmsd <= self._cutoff:
210 positive[rank] += 1
211 covered.add(rank)
212
213 assert sorted(all.keys()) == sorted(positive.keys()) == residues
214
215 for rank in residues:
216 if all[rank] == 0:
217 precision2.append(0)
218 background.append(0)
219 else:
220 p = positive[rank] * 100.0 / all[rank]
221 precision.append(p)
222 precision2.append(p)
223 background.append(100)
224
225 coverage = len(covered) * 100.0 / len(residues)
226 if len(precision) > 0:
227 avg_precision = sum(precision) / len(precision)
228 else:
229 avg_precision = 0
230
231 with csb.io.plots.Chart() as chart:
232
233 chart.plot.bar(residues, background, color='#f5f5f5', linewidth=None, edgecolor='#f5f5f5')
234 chart.plot.bar(residues, precision2, color='#5ba9da', linewidth=None, edgecolor='#5ba9da')
235
236 chart.plot.set_title(self._native.entry_id)
237 chart.plot.set_xlabel('Residue')
238 chart.plot.set_xlim(1, len(residues))
239 chart.plot.set_ylabel('Precision, %')
240 chart.plot.set_ylim(0, 100)
241
242 xaxis = chart.plot.axes.xaxis
243 yaxis = chart.plot.axes.yaxis
244
245 xaxis.set_major_locator(matplotlib.ticker.IndexLocator(10, 0))
246 xaxis.tick_bottom()
247 yaxis.tick_left()
248 for t in xaxis.get_major_ticks():
249 t.tick1On = False
250 t.tick2On = False
251 for t in xaxis.get_ticklabels():
252 t.set_fontsize(16)
253 for t in yaxis.get_ticklabels():
254 t.set_fontsize(16)
255
256 chart.plot.spines["right"].set_visible(False)
257 chart.plot.spines["top"].set_visible(False)
258
259 try:
260 chart.width = 15
261 chart.height = 5.5
262
263 chart.save(self._figure, chart.formats.PDF)
264
265 except IOError as io:
266 raise ArgumentIOError("Can't save figure: " + str(io))
267
268 return GlobalInfo(avg_precision, coverage)
269
270 -def rmsd(target, source, fragments, pdb, save=None):
271
272 matches = []
273
274 try:
275 src_file = wwpdb.find(source, [pdb])
276 if src_file is None:
277 raise IOError("Can't find structure {0} in {1}".format(source, pdb))
278
279 src_structure = wwpdb.RegularStructureParser(src_file).parse_structure()
280
281 except (IOError, structure.Broken3DStructureError, wwpdb.PDBParseError) as ex:
282 matches.append('Error parsing {0:5}: {1!s}'.format(source, ex))
283 return matches
284
285 for fn, fragment in enumerate(fragments):
286 try:
287 if fragment.chain not in ('_', '', None):
288 src_chain = src_structure.chains[fragment.chain]
289 else:
290 src_chain = src_structure.first_chain
291
292 try:
293 query = target.subregion(fragment.qstart, fragment.qend)
294 subject = src_chain.subregion(fragment.start, fragment.end, clone=True)
295
296 except IndexError:
297 matches.append('Fragment {1.source_id:>5} {0.start:>4}-{0.end:>4} is out of range'.format(fragment))
298 continue
299
300 si = query.align(subject)
301 match = csb.bio.fragments.FragmentMatch(fragment.id, qstart=fragment.qstart, qend=fragment.qend,
302 probability=None, rmsd=si.rmsd, tm_score=None, qlength=target.length)
303 matches.append(match)
304
305 if save:
306 dummy = structure.Structure(subject.entry_id)
307 dummy.chains.append(subject)
308 filename = '{0.qstart:}-{0.qend}-{0.source_id}.{1.entry_id}{2}.frag'.format(fragment, query, fn or '')
309 dummy.to_pdb(os.path.join(save, filename))
310
311 except (structure.Broken3DStructureError, IOError) as ex:
312 matches.append("Can't superimpose fragment {0}: {1!s}".format(fragment.id, ex))
313 continue
314
315 return matches
316
320
321
322 if __name__ == '__main__':
323 main()
324