csb.bio.fragments.rosetta

1 """ 2 Rosetta fragment libraries. 3 4 This module defines the L{RosettaFragmentMap} objects, which describes a 5 fragment library in Rosetta NNmake format. L{RosettaFragmentMap} has a 6 static factory method for building a library from a fragment file: 7 8 >>> RosettaFragmentMap.read('fragments.txt') 9 <RosettaFragmentMap> 10 11 @note: Consider extracting L{RosettaFragmentMap.read} as a Rosetta 12 fragment parser which naturally belongs to csb.bio.io. 13 """ 14 15 from csb.bio.structure import TorsionAnglesCollection, TorsionAngles 16 from csb.core import AbstractContainer

17 18 19 -class ResidueInfo(object):

20 """ 21 Container struct for a single rosetta fragment residue. 22 23 @param rank: residue position (in the source chain, 1-based) 24 @type rank: int 25 @param aa: amino acid 26 @type aa: str 27 @param ss: secondary structure 28 @type ss: str 29 @param torsion: torsion angles 30 @type torsion: L{csb.bio.structure.TorsionAngles} 31 """ 32

33 - def __init__(self, rank, aa, ss, torsion, calpha=[]):

34 35 self.rank = rank 36 self.aa = aa 37 self.ss = ss 38 self.torsion = torsion 39 self.calpha = tuple(calpha)

40 41 @property

42 - def phi(self):

43 return self.torsion.phi or 0.

44 45 @property

46 - def psi(self):

47 return self.torsion.psi or 0.

48 49 @property

50 - def omega(self):

51 return self.torsion.omega or 0.

52

53 - def copy(self):

54 """ 55 @return: a deep copy of the struct 56 @rtype: L{ResidueInfo} 57 """ 58 return ResidueInfo(self.rank, self.aa, self.ss, self.torsion.copy(), self.calpha)

59

60 -class RosettaFragment(object):

61 """ 62 Represents a single Rosetta fragment match. 63 64 @param source_id: entry ID of the source PDB chain (in accnC format) 65 @type source_id: str 66 @param qstart: start position in target (rank) 67 @type qstart: int 68 @param qend: end position in target (rank) 69 @type qend: int 70 @param start: start position in C{source} (rank) 71 @type start: int 72 @param end: end position in C{source} (rank) 73 @type end: int 74 @param score: score of the fragment 75 @type score: float 76 @param residues: fragment residue structs 77 @type residues: iterable of L{ResidueInfo} 78 """ 79

80 - def __init__(self, source_id, qstart, qend, start, end, score, residues):

81 82 if not (qend - qstart + 1) == (end - start + 1) == len(residues): 83 raise ValueError() 84 if not len(source_id) == 5: 85 raise ValueError(source_id) 86 87 self._source_id = str(source_id) 88 self._qstart = int(qstart) 89 self._qend = int(qend) 90 self._start = int(start) 91 self._end = int(end) 92 self._score = float(score) 93 self._residues = list(residues)

94

95 - def subregion(self, qstart, qend):

96 """ 97 Extract a subregion from the fragment. 98 99 @param qstart: start position in target 100 @type qstart: int 101 @param qend: end position in target 102 @type qend: int 103 104 @return: a new fragment (deep copy) 105 @rtype: L{RosettaFragment} 106 """ 107 108 if not self.qstart <= qstart <= qend <= self.qend: 109 raise ValueError('Invalid subregion') 110 111 start = qstart - self.qstart + self.start 112 end = qend - self.qend + self.end 113 114 diff = qstart - self.qstart 115 size = qend - qstart + 1 116 assert 0 <= diff 117 118 residues = [ r.copy() for r in self.residues[diff : diff + size] ] 119 assert len(residues) == size 120 121 return RosettaFragment(self.source_id, qstart, qend, start, end, self.score, residues)

122

123 - def __lt__(self, other):

124 # lower score means a better fragment 125 return self.score < other.score

126

127 - def __iter__(self):

128 return iter(self._residues)

129

130 - def __len__(self):

131 return len(self._residues)

132

133 - def __str__(self):

134 135 out = [] 136 137 for residue in self.residues: 138 line = ' {0.accession:4} {0.chain:1} {1.rank:>5} {1.aa:1} {1.ss:1} {1.phi:>8.3f} {1.psi:>8.3f} {1.omega:>8.3f} {0.score:>8.3f}' 139 out.append(line.format(self, residue)) 140 141 return '\n'.join(out)

142 143 @staticmethod

144 - def from_object(assignment):

145 """ 146 Factory method: build a rosetta fragment from an assignment object. 147 148 @param assignment: source assignment 149 @type assignment: L{Assignment} 150 151 @rtype: L{RosettaFragment} 152 """ 153 residues = [] 154 a = assignment 155 156 for rank, aa, torsion, calpha in zip(range(a.start, a.end + 1), a.sequence, a.torsion, a.backbone): 157 residues.append(ResidueInfo(rank, aa, 'L', torsion, calpha)) 158 159 return RosettaFragment(a.source_id, a.qstart, a.qend, a.start, a.end, 1 - (a.probability or 0.0), residues)

160 161 @property

162 - def length(self):

163 return len(self)

164 165 @property

166 - def source_id(self):

167 return self._source_id

168 169 @property

170 - def accession(self):

171 return self.source_id[:4]

172 173 @property

174 - def chain(self):

175 return self.source_id[4:]

176 177 @property

178 - def id(self):

179 return '{0.source_id}:{0.start}-{0.end}'.format(self)

180 181 @property

182 - def qstart(self):

183 return self._qstart

184 185 @property

186 - def qend(self):

187 return self._qend

188 189 @property

190 - def start(self):

191 return self._start

192 193 @property

194 - def end(self):

195 return self._end

196 197 @property

198 - def score(self):

199 return self._score

200 201 @property

202 - def residues(self):

203 return tuple(self._residues)

204 205 @property

206 - def torsion(self):

207 return TorsionAnglesCollection([r.torsion for r in self._residues], start=0)

208

209 -class OutputBuilder(object):

210 """ 211 Rosetta fragment file formatter. 212 213 @param output: destination stream 214 @type output: file 215 """ 216

217 - def __init__(self, output):

218 self._out = output

219 220 @property

221 - def output(self):

222 return self._out

223

224 - def add_position(self, qstart, frags):

225 """ 226 Write a new assignment origin. 227 228 @param qstart: target position 229 @type qstart: float 230 @param frags: number of fragments, starting at that position 231 @type frags: int 232 """ 233 self.output.write(' position: {0:>12} neighbors: {1:>12}\n\n'.format(qstart, len(frags)))

234

235 - def add_fragment(self, fragment):

236 """ 237 Write a new fragment. 238 @type fragment: L{RosettaFragment} 239 """ 240 for residue in fragment.residues: 241 self.add_residue(fragment, residue) 242 self.output.write('\n') 243 244 self.output.write('\n')

245

246 - def add_residue(self, fragment, residue):

247 """ 248 Write a new fragment residue. 249 @type fragment: L{RosettaFragment} 250 @type residue: L{ResidueInfo} 251 """ 252 line = ' {0.accession:4} {0.chain:1} {1.rank:>5} {1.aa:1} {1.ss:1} {1.phi:>8.3f} {1.psi:>8.3f} {1.omega:>8.3f} {0.score:>8.3f}' 253 self.output.write(line.format(fragment, residue))

254

255 -class ExtendedOutputBuilder(OutputBuilder):

256 """ 257 Builds non-standard fragment files, which contain the CA coordinates of 258 each residue at the end of each line. 259 """ 260

261 - def add_residue(self, fragment, residue):

262 263 super(ExtendedOutputBuilder, self).add_residue(fragment, residue) 264 265 if residue.calpha: 266 calpha = residue.calpha 267 else: 268 calpha = [0, 0, 0] 269 270 self.output.write(' {0:>7.3f} {1:>7.3f} {2:>7.3f}'.format(*calpha))

271

272 -class RosettaFragmentMap(AbstractContainer):

273 """ 274 Represents a Rosetta fragment library. 275 276 @param fragments: library fragments 277 @type fragments: iterable of L{RosettaFragment} 278 @param length: target sequence's length. If not defined, the qend of the 279 last fragment will be used instead. 280 @type length: int 281 """ 282

283 - def __init__(self, fragments, length=None):

284 285 self._fragments = [] 286 287 self._unconf = set() 288 self._sources = set() 289 self._starts = set() 290 self._ends = set() 291 self._length = None 292 293 for f in fragments: 294 self.append(f) 295 296 if length is not None: 297 assert length >= self._maxend 298 self._length = int(length) 299 else: 300 self._length = self._maxend

301 302 @property

303 - def _maxend(self):

304 return max(self._ends or [0])

305

306 - def append(self, fragment):

307 """ 308 Append a new L{RosettaFragment} 309 """ 310 311 if self._length and fragment.qend > self._length: 312 raise ValueError('fragment out of range') 313 314 self._fragments.append(fragment) 315 self._sources.add(fragment.accession) 316 self._starts.add(fragment.qstart) 317 self._ends.add(fragment.qend)

318

319 - def __len__(self):

320 return len(self._fragments)

321 322 @property

323 - def _children(self):

324 return self._fragments

325 326 @property

327 - def unconfident_positions(self):

328 return tuple(sorted(self._unconf))

329 330 @property

331 - def size(self):

332 return len(self)

333 334 @property

335 - def sources(self):

336 return tuple(self._sources)

337 338 @property

339 - def start_positions(self):

340 return tuple(sorted(self._starts))

341

342 - def fromsource(self, accession):

343 """ 344 @return: a tuple of all fragments, extracted from the specified C{source}. 345 346 @param accession: source entry ID 347 @type accession: str 348 """ 349 return tuple(f for f in self._fragments if f.accession == accession)

350

351 - def starting_at(self, qrank):

352 """ 353 @return: a tuple of all fragments, starting at the specified target position. 354 355 @param qrank: fragment origin (in target, rank) 356 @type qrank: int 357 """ 358 return tuple(f for f in self._fragments if f.qstart == qrank)

359

360 - def at(self, qrank):

361 """ 362 @return: a tuple of all fragments, covering the specified position. 363 364 @param qrank: position in target, rank 365 @type qrank: int 366 """ 367 return tuple(f for f in self._fragments if f.qstart <= qrank <= f.qend)

368

369 - def mark_unconfident(self, rank):

370 """ 371 Mark the specified position in the target as a low-confidence one. 372 373 @param rank: position in target 374 @type rank: int 375 """ 376 if not 1 <= rank <= self._length: 377 raise ValueError(rank) 378 379 self._unconf.add(rank)

380

381 - def complement(self, fragment):

382 """ 383 Append C{fragment} to the library, if the fragment is anchored 384 around a low-confidence position. 385 386 @type fragment: L{RosettaFragment} 387 """ 388 if not self._unconf: 389 raise ValueError('no unconfident regions to complement') 390 391 f = fragment 392 for rank in self._unconf: 393 if f.qstart < rank < f.qend: 394 if (rank - f.qstart + 1) > 0.4 * (f.qend - f.qstart + 1): 395 self.append(f) 396 break

397

398 - def sort(self, field='score', reverse=False):

399 """ 400 Sort the fragments in the library. 401 """ 402 403 self._fragments.sort(key=lambda i:getattr(i, field), reverse=reverse)

404

405 - def dump(self, file, builder=OutputBuilder):

406 """ 407 Write the library to a Rosetta fragment file. 408 409 @param file: destination file name 410 @type file: str 411 """ 412 413 with open(file, 'w') as out: 414 builder = builder(out) 415 416 for qstart in self.start_positions: 417 418 frags = self.starting_at(qstart) 419 builder.add_position(qstart, frags) 420 421 for fragment in frags: 422 builder.add_fragment(fragment)

423 424 @staticmethod

425 - def read(file, top=None):

426 """ 427 Read a standard fragment file. 428 429 @param file: file name 430 @type file: str 431 @param top: if defined, read only C{top} fragments per start position 432 (default=all) 433 @type top: int or None 434 435 @return: parsed fragment library 436 @rtype: L{RosettaFragmentMap} 437 """ 438 # This is the format (rosetta_fragments/nnmake/makeoutput.f): 439 # source chain rank residue ss phi psi omega score dme dme_f best_nn_ss_type dipolar+noe 'P' position 'F' fragment# 440 441 def ang(a): 442 a = float(a) 443 if a < -180: return 360 + a 444 elif a > 180: return -360 + a 445 else: return a

446 447 frags = [] 448 449 qstart, qend, start, end = 0, 0, 0, 0 450 id = '' 451 score = None 452 count = 0 453 residues = [] 454 455 in_frag = False 456 457 for line in open(file): 458 459 if line.startswith(' position:'): 460 qstart = int(line.split()[1]) 461 count = 0 462 463 elif not line.strip(): 464 if in_frag: 465 count += 1 466 if not top or count <= top: 467 frags.append(RosettaFragment(id, qstart, qend, start, end, score, residues)) 468 in_frag = False 469 id = '' 470 start = 0 471 end = 0 472 score = None 473 residues = [] 474 475 else: 476 fields = line.split() 477 if not in_frag: 478 start = int(fields[2]) 479 qend = qstart 480 in_frag = True 481 else: 482 qend += 1 483 484 end = int(fields[2]) 485 id = fields[0].lower() + fields[1] 486 score = float(fields[8]) 487 rank = int(fields[2]) 488 aa, ss = fields[3:5] 489 phi, psi, omega = map(ang, fields[5:8]) 490 residues.append(ResidueInfo(rank, aa, ss, TorsionAngles(phi, psi, omega))) 491 492 return RosettaFragmentMap(frags)

493

Source Code for Module csb.bio.fragments.rosetta