Package csb :: Package bio :: Package fragments :: Module rosetta
[frames] | no frames]

Source Code for Module csb.bio.fragments.rosetta

  1  """ 
  2  Rosetta fragment libraries. 
  3   
  4  This module defines the L{RosettaFragmentMap} objects, which describes a 
  5  fragment library in Rosetta NNmake format. L{RosettaFragmentMap} has a  
  6  static factory method for building a library from a fragment file: 
  7       
  8      >>> RosettaFragmentMap.read('fragments.txt') 
  9      <RosettaFragmentMap> 
 10       
 11  @note: Consider extracting L{RosettaFragmentMap.read} as a Rosetta 
 12         fragment parser which naturally belongs to csb.bio.io.       
 13  """ 
 14   
 15  from csb.bio.structure import TorsionAnglesCollection, TorsionAngles 
 16  from csb.core import AbstractContainer 
17 18 19 -class ResidueInfo(object):
20 """ 21 Container struct for a single rosetta fragment residue. 22 23 @param rank: residue position (in the source chain, 1-based) 24 @type rank: int 25 @param aa: amino acid 26 @type aa: str 27 @param ss: secondary structure 28 @type ss: str 29 @param torsion: torsion angles 30 @type torsion: L{csb.bio.structure.TorsionAngles} 31 """ 32
33 - def __init__(self, rank, aa, ss, torsion, calpha=[]):
34 35 self.rank = rank 36 self.aa = aa 37 self.ss = ss 38 self.torsion = torsion 39 self.calpha = tuple(calpha)
40 41 @property
42 - def phi(self):
43 return self.torsion.phi or 0.
44 45 @property
46 - def psi(self):
47 return self.torsion.psi or 0.
48 49 @property
50 - def omega(self):
51 return self.torsion.omega or 0.
52
53 - def copy(self):
54 """ 55 @return: a deep copy of the struct 56 @rtype: L{ResidueInfo} 57 """ 58 return ResidueInfo(self.rank, self.aa, self.ss, self.torsion.copy(), self.calpha)
59
60 -class RosettaFragment(object):
61 """ 62 Represents a single Rosetta fragment match. 63 64 @param source_id: entry ID of the source PDB chain (in accnC format) 65 @type source_id: str 66 @param qstart: start position in target (rank) 67 @type qstart: int 68 @param qend: end position in target (rank) 69 @type qend: int 70 @param start: start position in C{source} (rank) 71 @type start: int 72 @param end: end position in C{source} (rank) 73 @type end: int 74 @param score: score of the fragment 75 @type score: float 76 @param residues: fragment residue structs 77 @type residues: iterable of L{ResidueInfo} 78 """ 79
80 - def __init__(self, source_id, qstart, qend, start, end, score, residues):
81 82 if not (qend - qstart + 1) == (end - start + 1) == len(residues): 83 raise ValueError() 84 if not len(source_id) == 5: 85 raise ValueError(source_id) 86 87 self._source_id = str(source_id) 88 self._qstart = int(qstart) 89 self._qend = int(qend) 90 self._start = int(start) 91 self._end = int(end) 92 self._score = float(score) 93 self._residues = list(residues)
94
95 - def subregion(self, qstart, qend):
96 """ 97 Extract a subregion from the fragment. 98 99 @param qstart: start position in target 100 @type qstart: int 101 @param qend: end position in target 102 @type qend: int 103 104 @return: a new fragment (deep copy) 105 @rtype: L{RosettaFragment} 106 """ 107 108 if not self.qstart <= qstart <= qend <= self.qend: 109 raise ValueError('Invalid subregion') 110 111 start = qstart - self.qstart + self.start 112 end = qend - self.qend + self.end 113 114 diff = qstart - self.qstart 115 size = qend - qstart + 1 116 assert 0 <= diff 117 118 residues = [ r.copy() for r in self.residues[diff : diff + size] ] 119 assert len(residues) == size 120 121 return RosettaFragment(self.source_id, qstart, qend, start, end, self.score, residues)
122
123 - def __lt__(self, other):
124 # lower score means a better fragment 125 return self.score < other.score
126
127 - def __iter__(self):
128 return iter(self._residues)
129
130 - def __len__(self):
131 return len(self._residues)
132
133 - def __str__(self):
134 135 out = [] 136 137 for residue in self.residues: 138 line = ' {0.accession:4} {0.chain:1} {1.rank:>5} {1.aa:1} {1.ss:1} {1.phi:>8.3f} {1.psi:>8.3f} {1.omega:>8.3f} {0.score:>8.3f}' 139 out.append(line.format(self, residue)) 140 141 return '\n'.join(out)
142 143 @staticmethod
144 - def from_object(assignment):
145 """ 146 Factory method: build a rosetta fragment from an assignment object. 147 148 @param assignment: source assignment 149 @type assignment: L{Assignment} 150 151 @rtype: L{RosettaFragment} 152 """ 153 residues = [] 154 a = assignment 155 156 for rank, aa, torsion, calpha in zip(range(a.start, a.end + 1), a.sequence, a.torsion, a.backbone): 157 residues.append(ResidueInfo(rank, aa, 'L', torsion, calpha)) 158 159 return RosettaFragment(a.source_id, a.qstart, a.qend, a.start, a.end, 1 - (a.probability or 0.0), residues)
160 161 @property
162 - def length(self):
163 return len(self)
164 165 @property
166 - def source_id(self):
167 return self._source_id
168 169 @property
170 - def accession(self):
171 return self.source_id[:4]
172 173 @property
174 - def chain(self):
175 return self.source_id[4:]
176 177 @property
178 - def id(self):
179 return '{0.source_id}:{0.start}-{0.end}'.format(self)
180 181 @property
182 - def qstart(self):
183 return self._qstart
184 185 @property
186 - def qend(self):
187 return self._qend
188 189 @property
190 - def start(self):
191 return self._start
192 193 @property
194 - def end(self):
195 return self._end
196 197 @property
198 - def score(self):
199 return self._score
200 201 @property
202 - def residues(self):
203 return tuple(self._residues)
204 205 @property
206 - def torsion(self):
207 return TorsionAnglesCollection([r.torsion for r in self._residues], start=0)
208
209 -class OutputBuilder(object):
210 """ 211 Rosetta fragment file formatter. 212 213 @param output: destination stream 214 @type output: file 215 """ 216
217 - def __init__(self, output):
218 self._out = output
219 220 @property
221 - def output(self):
222 return self._out
223
224 - def add_position(self, qstart, frags):
225 """ 226 Write a new assignment origin. 227 228 @param qstart: target position 229 @type qstart: float 230 @param frags: number of fragments, starting at that position 231 @type frags: int 232 """ 233 self.output.write(' position: {0:>12} neighbors: {1:>12}\n\n'.format(qstart, len(frags)))
234
235 - def add_fragment(self, fragment):
236 """ 237 Write a new fragment. 238 @type fragment: L{RosettaFragment} 239 """ 240 for residue in fragment.residues: 241 self.add_residue(fragment, residue) 242 self.output.write('\n') 243 244 self.output.write('\n')
245
246 - def add_residue(self, fragment, residue):
247 """ 248 Write a new fragment residue. 249 @type fragment: L{RosettaFragment} 250 @type residue: L{ResidueInfo} 251 """ 252 line = ' {0.accession:4} {0.chain:1} {1.rank:>5} {1.aa:1} {1.ss:1} {1.phi:>8.3f} {1.psi:>8.3f} {1.omega:>8.3f} {0.score:>8.3f}' 253 self.output.write(line.format(fragment, residue))
254
255 -class ExtendedOutputBuilder(OutputBuilder):
256 """ 257 Builds non-standard fragment files, which contain the CA coordinates of 258 each residue at the end of each line. 259 """ 260
261 - def add_residue(self, fragment, residue):
262 263 super(ExtendedOutputBuilder, self).add_residue(fragment, residue) 264 265 if residue.calpha: 266 calpha = residue.calpha 267 else: 268 calpha = [0, 0, 0] 269 270 self.output.write(' {0:>7.3f} {1:>7.3f} {2:>7.3f}'.format(*calpha))
271
272 -class RosettaFragmentMap(AbstractContainer):
273 """ 274 Represents a Rosetta fragment library. 275 276 @param fragments: library fragments 277 @type fragments: iterable of L{RosettaFragment} 278 @param length: target sequence's length. If not defined, the qend of the 279 last fragment will be used instead. 280 @type length: int 281 """ 282
283 - def __init__(self, fragments, length=None):
284 285 self._fragments = [] 286 287 self._unconf = set() 288 self._sources = set() 289 self._starts = set() 290 self._ends = set() 291 self._length = None 292 293 for f in fragments: 294 self.append(f) 295 296 if length is not None: 297 assert length >= self._maxend 298 self._length = int(length) 299 else: 300 self._length = self._maxend
301 302 @property
303 - def _maxend(self):
304 return max(self._ends or [0])
305
306 - def append(self, fragment):
307 """ 308 Append a new L{RosettaFragment} 309 """ 310 311 if self._length and fragment.qend > self._length: 312 raise ValueError('fragment out of range') 313 314 self._fragments.append(fragment) 315 self._sources.add(fragment.accession) 316 self._starts.add(fragment.qstart) 317 self._ends.add(fragment.qend)
318
319 - def __len__(self):
320 return len(self._fragments)
321 322 @property
323 - def _children(self):
324 return self._fragments
325 326 @property
327 - def unconfident_positions(self):
328 return tuple(sorted(self._unconf))
329 330 @property
331 - def size(self):
332 return len(self)
333 334 @property
335 - def sources(self):
336 return tuple(self._sources)
337 338 @property
339 - def start_positions(self):
340 return tuple(sorted(self._starts))
341
342 - def fromsource(self, accession):
343 """ 344 @return: a tuple of all fragments, extracted from the specified C{source}. 345 346 @param accession: source entry ID 347 @type accession: str 348 """ 349 return tuple(f for f in self._fragments if f.accession == accession)
350
351 - def starting_at(self, qrank):
352 """ 353 @return: a tuple of all fragments, starting at the specified target position. 354 355 @param qrank: fragment origin (in target, rank) 356 @type qrank: int 357 """ 358 return tuple(f for f in self._fragments if f.qstart == qrank)
359
360 - def at(self, qrank):
361 """ 362 @return: a tuple of all fragments, covering the specified position. 363 364 @param qrank: position in target, rank 365 @type qrank: int 366 """ 367 return tuple(f for f in self._fragments if f.qstart <= qrank <= f.qend)
368
369 - def mark_unconfident(self, rank):
370 """ 371 Mark the specified position in the target as a low-confidence one. 372 373 @param rank: position in target 374 @type rank: int 375 """ 376 if not 1 <= rank <= self._length: 377 raise ValueError(rank) 378 379 self._unconf.add(rank)
380
381 - def complement(self, fragment):
382 """ 383 Append C{fragment} to the library, if the fragment is anchored 384 around a low-confidence position. 385 386 @type fragment: L{RosettaFragment} 387 """ 388 if not self._unconf: 389 raise ValueError('no unconfident regions to complement') 390 391 f = fragment 392 for rank in self._unconf: 393 if f.qstart < rank < f.qend: 394 if (rank - f.qstart + 1) > 0.4 * (f.qend - f.qstart + 1): 395 self.append(f) 396 break
397
398 - def sort(self, field='score', reverse=False):
399 """ 400 Sort the fragments in the library. 401 """ 402 403 self._fragments.sort(key=lambda i:getattr(i, field), reverse=reverse)
404
405 - def dump(self, file, builder=OutputBuilder):
406 """ 407 Write the library to a Rosetta fragment file. 408 409 @param file: destination file name 410 @type file: str 411 """ 412 413 with open(file, 'w') as out: 414 builder = builder(out) 415 416 for qstart in self.start_positions: 417 418 frags = self.starting_at(qstart) 419 builder.add_position(qstart, frags) 420 421 for fragment in frags: 422 builder.add_fragment(fragment)
423 424 @staticmethod
425 - def read(file, top=None):
426 """ 427 Read a standard fragment file. 428 429 @param file: file name 430 @type file: str 431 @param top: if defined, read only C{top} fragments per start position 432 (default=all) 433 @type top: int or None 434 435 @return: parsed fragment library 436 @rtype: L{RosettaFragmentMap} 437 """ 438 # This is the format (rosetta_fragments/nnmake/makeoutput.f): 439 # source chain rank residue ss phi psi omega score dme dme_f best_nn_ss_type dipolar+noe 'P' position 'F' fragment# 440 441 def ang(a): 442 a = float(a) 443 if a < -180: return 360 + a 444 elif a > 180: return -360 + a 445 else: return a
446 447 frags = [] 448 449 qstart, qend, start, end = 0, 0, 0, 0 450 id = '' 451 score = None 452 count = 0 453 residues = [] 454 455 in_frag = False 456 457 for line in open(file): 458 459 if line.startswith(' position:'): 460 qstart = int(line.split()[1]) 461 count = 0 462 463 elif not line.strip(): 464 if in_frag: 465 count += 1 466 if not top or count <= top: 467 frags.append(RosettaFragment(id, qstart, qend, start, end, score, residues)) 468 in_frag = False 469 id = '' 470 start = 0 471 end = 0 472 score = None 473 residues = [] 474 475 else: 476 fields = line.split() 477 if not in_frag: 478 start = int(fields[2]) 479 qend = qstart 480 in_frag = True 481 else: 482 qend += 1 483 484 end = int(fields[2]) 485 id = fields[0].lower() + fields[1] 486 score = float(fields[8]) 487 rank = int(fields[2]) 488 aa, ss = fields[3:5] 489 phi, psi, omega = map(ang, fields[5:8]) 490 residues.append(ResidueInfo(rank, aa, ss, TorsionAngles(phi, psi, omega))) 491 492 return RosettaFragmentMap(frags)
493