1 """
2 Rosetta fragment libraries.
3
4 This module defines the L{RosettaFragmentMap} objects, which describes a
5 fragment library in Rosetta NNmake format. L{RosettaFragmentMap} has a
6 static factory method for building a library from a fragment file:
7
8 >>> RosettaFragmentMap.read('fragments.txt')
9 <RosettaFragmentMap>
10
11 @note: Consider extracting L{RosettaFragmentMap.read} as a Rosetta
12 fragment parser which naturally belongs to csb.bio.io.
13 """
14
15 from csb.bio.structure import TorsionAnglesCollection, TorsionAngles
16 from csb.core import AbstractContainer
20 """
21 Container struct for a single rosetta fragment residue.
22
23 @param rank: residue position (in the source chain, 1-based)
24 @type rank: int
25 @param aa: amino acid
26 @type aa: str
27 @param ss: secondary structure
28 @type ss: str
29 @param torsion: torsion angles
30 @type torsion: L{csb.bio.structure.TorsionAngles}
31 """
32
33 - def __init__(self, rank, aa, ss, torsion, calpha=[]):
40
41 @property
44
45 @property
48
49 @property
52
54 """
55 @return: a deep copy of the struct
56 @rtype: L{ResidueInfo}
57 """
58 return ResidueInfo(self.rank, self.aa, self.ss, self.torsion.copy(), self.calpha)
59
61 """
62 Represents a single Rosetta fragment match.
63
64 @param source_id: entry ID of the source PDB chain (in accnC format)
65 @type source_id: str
66 @param qstart: start position in target (rank)
67 @type qstart: int
68 @param qend: end position in target (rank)
69 @type qend: int
70 @param start: start position in C{source} (rank)
71 @type start: int
72 @param end: end position in C{source} (rank)
73 @type end: int
74 @param score: score of the fragment
75 @type score: float
76 @param residues: fragment residue structs
77 @type residues: iterable of L{ResidueInfo}
78 """
79
80 - def __init__(self, source_id, qstart, qend, start, end, score, residues):
94
122
126
128 return iter(self._residues)
129
131 return len(self._residues)
132
134
135 out = []
136
137 for residue in self.residues:
138 line = ' {0.accession:4} {0.chain:1} {1.rank:>5} {1.aa:1} {1.ss:1} {1.phi:>8.3f} {1.psi:>8.3f} {1.omega:>8.3f} {0.score:>8.3f}'
139 out.append(line.format(self, residue))
140
141 return '\n'.join(out)
142
143 @staticmethod
145 """
146 Factory method: build a rosetta fragment from an assignment object.
147
148 @param assignment: source assignment
149 @type assignment: L{Assignment}
150
151 @rtype: L{RosettaFragment}
152 """
153 residues = []
154 a = assignment
155
156 for rank, aa, torsion, calpha in zip(range(a.start, a.end + 1), a.sequence, a.torsion, a.backbone):
157 residues.append(ResidueInfo(rank, aa, 'L', torsion, calpha))
158
159 return RosettaFragment(a.source_id, a.qstart, a.qend, a.start, a.end, 1 - (a.probability or 0.0), residues)
160
161 @property
164
165 @property
167 return self._source_id
168
169 @property
172
173 @property
176
177 @property
179 return '{0.source_id}:{0.start}-{0.end}'.format(self)
180
181 @property
184
185 @property
188
189 @property
192
193 @property
196
197 @property
200
201 @property
203 return tuple(self._residues)
204
205 @property
208
210 """
211 Rosetta fragment file formatter.
212
213 @param output: destination stream
214 @type output: file
215 """
216
219
220 @property
223
225 """
226 Write a new assignment origin.
227
228 @param qstart: target position
229 @type qstart: float
230 @param frags: number of fragments, starting at that position
231 @type frags: int
232 """
233 self.output.write(' position: {0:>12} neighbors: {1:>12}\n\n'.format(qstart, len(frags)))
234
245
247 """
248 Write a new fragment residue.
249 @type fragment: L{RosettaFragment}
250 @type residue: L{ResidueInfo}
251 """
252 line = ' {0.accession:4} {0.chain:1} {1.rank:>5} {1.aa:1} {1.ss:1} {1.phi:>8.3f} {1.psi:>8.3f} {1.omega:>8.3f} {0.score:>8.3f}'
253 self.output.write(line.format(fragment, residue))
254
271
273 """
274 Represents a Rosetta fragment library.
275
276 @param fragments: library fragments
277 @type fragments: iterable of L{RosettaFragment}
278 @param length: target sequence's length. If not defined, the qend of the
279 last fragment will be used instead.
280 @type length: int
281 """
282
283 - def __init__(self, fragments, length=None):
284
285 self._fragments = []
286
287 self._unconf = set()
288 self._sources = set()
289 self._starts = set()
290 self._ends = set()
291 self._length = None
292
293 for f in fragments:
294 self.append(f)
295
296 if length is not None:
297 assert length >= self._maxend
298 self._length = int(length)
299 else:
300 self._length = self._maxend
301
302 @property
304 return max(self._ends or [0])
305
318
320 return len(self._fragments)
321
322 @property
324 return self._fragments
325
326 @property
328 return tuple(sorted(self._unconf))
329
330 @property
333
334 @property
336 return tuple(self._sources)
337
338 @property
340 return tuple(sorted(self._starts))
341
343 """
344 @return: a tuple of all fragments, extracted from the specified C{source}.
345
346 @param accession: source entry ID
347 @type accession: str
348 """
349 return tuple(f for f in self._fragments if f.accession == accession)
350
352 """
353 @return: a tuple of all fragments, starting at the specified target position.
354
355 @param qrank: fragment origin (in target, rank)
356 @type qrank: int
357 """
358 return tuple(f for f in self._fragments if f.qstart == qrank)
359
360 - def at(self, qrank):
361 """
362 @return: a tuple of all fragments, covering the specified position.
363
364 @param qrank: position in target, rank
365 @type qrank: int
366 """
367 return tuple(f for f in self._fragments if f.qstart <= qrank <= f.qend)
368
370 """
371 Mark the specified position in the target as a low-confidence one.
372
373 @param rank: position in target
374 @type rank: int
375 """
376 if not 1 <= rank <= self._length:
377 raise ValueError(rank)
378
379 self._unconf.add(rank)
380
382 """
383 Append C{fragment} to the library, if the fragment is anchored
384 around a low-confidence position.
385
386 @type fragment: L{RosettaFragment}
387 """
388 if not self._unconf:
389 raise ValueError('no unconfident regions to complement')
390
391 f = fragment
392 for rank in self._unconf:
393 if f.qstart < rank < f.qend:
394 if (rank - f.qstart + 1) > 0.4 * (f.qend - f.qstart + 1):
395 self.append(f)
396 break
397
398 - def sort(self, field='score', reverse=False):
399 """
400 Sort the fragments in the library.
401 """
402
403 self._fragments.sort(key=lambda i:getattr(i, field), reverse=reverse)
404
423
424 @staticmethod
425 - def read(file, top=None):
426 """
427 Read a standard fragment file.
428
429 @param file: file name
430 @type file: str
431 @param top: if defined, read only C{top} fragments per start position
432 (default=all)
433 @type top: int or None
434
435 @return: parsed fragment library
436 @rtype: L{RosettaFragmentMap}
437 """
438
439
440
441 def ang(a):
442 a = float(a)
443 if a < -180: return 360 + a
444 elif a > 180: return -360 + a
445 else: return a
446
447 frags = []
448
449 qstart, qend, start, end = 0, 0, 0, 0
450 id = ''
451 score = None
452 count = 0
453 residues = []
454
455 in_frag = False
456
457 for line in open(file):
458
459 if line.startswith(' position:'):
460 qstart = int(line.split()[1])
461 count = 0
462
463 elif not line.strip():
464 if in_frag:
465 count += 1
466 if not top or count <= top:
467 frags.append(RosettaFragment(id, qstart, qend, start, end, score, residues))
468 in_frag = False
469 id = ''
470 start = 0
471 end = 0
472 score = None
473 residues = []
474
475 else:
476 fields = line.split()
477 if not in_frag:
478 start = int(fields[2])
479 qend = qstart
480 in_frag = True
481 else:
482 qend += 1
483
484 end = int(fields[2])
485 id = fields[0].lower() + fields[1]
486 score = float(fields[8])
487 rank = int(fields[2])
488 aa, ss = fields[3:5]
489 phi, psi, omega = map(ang, fields[5:8])
490 residues.append(ResidueInfo(rank, aa, ss, TorsionAngles(phi, psi, omega)))
491
492 return RosettaFragmentMap(frags)
493