Package csb :: Package bio :: Package fragments
[frames] | no frames]

Source Code for Package csb.bio.fragments

   1  """ 
   2  APIs for working with protein structure fragments and libraries. 
   3   
   4  This package contains the nuts and bolts of HHfrag. Everything here revolves 
   5  around the L{Target} class, which describes a protein structure prediction 
   6  target. One typically assigns fragments (L{Assignment}s) to the target and then 
   7  builds a fragment library with L{RosettaFragsetFactory}. 
   8  """ 
   9   
  10  import os 
  11  import numpy 
  12   
  13  import csb.io 
  14  import csb.core 
  15  import csb.bio.utils 
  16  import csb.bio.structure 
  17  import csb.bio.sequence 
  18   
  19  from csb.bio.structure import SecondaryStructure 
  20  from csb.bio.io.wwpdb import FileSystemStructureProvider, StructureParser 
21 22 23 -class FragmentTypes(object):
24 25 HHfrag = 'TH' 26 Rosetta = 'NN' 27 ISites = 'IS' 28 StaticHH = 'HH'
29
30 -class Metrics(object):
31 32 RMSD = 'rmsd_to' 33 NORMALIZED_RMSD = 'nrmsd_to' 34 MDA = 'mda_to'
35 36 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329, 37 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662, 38 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401, 39 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637, 40 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163, 41 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028, 42 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550, 43 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840, 44 53: 11.831245255954073, 54: 11.918975893263905 }
45 46 -class FragmentMatch(object):
47 """ 48 Base class, representing a match between a fragment and its target. 49 """ 50
51 - def __init__(self, id, qstart, qend, probability, rmsd, tm_score, qlength):
52 53 self._id = id 54 self._qstart = qstart 55 self._qend = qend 56 self._probability = probability 57 self._rmsd = rmsd 58 self._tm_score = tm_score 59 self._qlength = qlength
60 61 @property
62 - def id(self):
63 return self._id
64 65 @property
66 - def qstart(self):
67 return self._qstart
68 69 @property
70 - def qend(self):
71 return self._qend
72 73 @property
74 - def qlength(self):
75 return self._qlength
76 77 @property
78 - def rmsd(self):
79 return self._rmsd
80 81 @property
82 - def tm_score(self):
83 return self._tm_score
84 85 @property
86 - def probability(self):
87 return self._probability
88 89 @property
90 - def length(self):
91 return self.qend - self.qstart + 1
92 93 @property
94 - def source_id(self):
95 raise NotImplementedError()
96 97 @property
98 - def start(self):
99 raise NotImplementedError()
100 101 @property
102 - def end(self):
103 raise NotImplementedError()
104
105 106 -class TorsionAnglePredictor(object):
107 """ 108 Fragment-based phi/psi angles predictor. 109 110 @param target: target protein, containing fragment assignments 111 @type target: L{Target} 112 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering 113 @type threshold: float 114 @param extend: pick alternative, longer cluster reps, if possible 115 @type extend: bool 116 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step 117 will be performed on demand (the first time C{predictor.compute()} is invoked) 118 119 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long 120 time. Subsequent calls will be very fast. 121 """ 122
123 - def __init__(self, target, threshold=1.5, extend=False, init=False):
124 125 if not isinstance(target, Target): 126 raise TypeError(target) 127 if target.matches.length == 0: 128 raise ValueError('This target has no fragment assignments') 129 130 self._target = target 131 self._threshold = float(threshold) 132 self._extend = bool(extend) 133 134 self._initialized = False 135 self._reps = {} 136 self._clusters = {} 137 138 if init: 139 self.init()
140 141 @property
142 - def target(self):
143 return self._target
144 145 @property
146 - def threshold(self):
147 return self._threshold
148 149 @property
150 - def extend(self):
151 return self._extend
152
153 - def init(self):
154 """ 155 Compute and cache all L{FragmentCluster}s. 156 """ 157 158 self._reps = {} 159 self._clusters = {} 160 161 for residue in self.target.residues: 162 cluster = self._filter(residue) 163 164 if cluster is not None: 165 rep = cluster.centroid() 166 if rep.has_alternative: 167 rep.exchange() 168 169 self._reps[residue.native.rank] = rep 170 self._clusters[residue.native.rank] = cluster.items 171 172 self._initialized = True
173
174 - def _filter(self, residue):
175 176 try: 177 nodes = [] 178 for ai in residue.assignments: 179 node = ClusterNode.create(ai.fragment) 180 nodes.append(node) 181 182 cluster = FragmentCluster(nodes, threshold=self.threshold) 183 cluster.shrink(minitems=0) 184 185 return cluster 186 187 except (ClusterExhaustedError, ClusterDivergingError): 188 return None
189
190 - def _residue(self, rank):
191 192 for r in self._target.residues: 193 if r.native.rank == rank: 194 return r 195 196 raise ValueError('Rank {0} is out of range'.format(rank))
197
198 - def compute_single(self, rank):
199 """ 200 Extract torsion angles from the L{ClusterRep} at residue C{#rank}. 201 202 @param rank: target residue rank 203 @type rank: int 204 205 @rtype: L{TorsionPredictionInfo} 206 """ 207 208 residue = self._residue(rank) 209 rep = residue.filter(threshold=self.threshold, extend=self.extend) 210 211 if rep is None: 212 return None 213 214 else: 215 fragment = rep.centroid 216 torsion = fragment.torsion_at(rank, rank)[0] 217 ss = fragment.sec_structure_at(rank, rank)[0] 218 219 return TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
220
221 - def compute(self, rank):
222 """ 223 Extract torsion angles from all L{ClusterRep}s, covering residue C{#rank}. 224 225 @param rank: target residue rank 226 @type rank: int 227 228 @return: L{TorsionPredictionInfo} instances, sorted by confidence 229 @rtype: tuple of L{TorsionPredictionInfo} 230 """ 231 232 if not self._initialized: 233 self.init() 234 235 prediction = [] 236 237 for rep in self._reps.values(): 238 239 if rep.centroid.qstart <= rank <= rep.centroid.qend: 240 241 fragment = rep.centroid 242 torsion = fragment.torsion_at(rank, rank)[0] 243 ss = fragment.sec_structure_at(rank, rank)[0] 244 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss) 245 246 if rep is self._reps.get(rank, None): 247 info.primary = True 248 249 prediction.append(info) 250 251 prediction.sort(reverse=True) 252 return tuple(prediction)
253
254 - def flat_torsion_map(self):
255 """ 256 Filter the current fragment map and create a new, completely flat, 257 non-overlapping map built from centroids, assigned iteratively by 258 decreasing confidence. Centroids with lower confidence which overlap 259 with previously assigned centroids will be trimmed to fill existing 260 gaps only. 261 262 @return: L{TorsionPredictionInfo} instances, one for each target residue 263 @rtype: tuple of L{TorsionPredictionInfo} 264 """ 265 266 if not self._initialized: 267 self.init() 268 269 prediction = [] 270 slots = set(range(1, self.target.length + 1)) 271 272 reps = list(self._reps.values()) 273 reps.sort(key=lambda i: i.confidence, reverse=True) 274 275 for rep in reps: 276 277 for rank in range(rep.centroid.qstart, rep.centroid.qend + 1): 278 if rank in slots: 279 torsion = rep.centroid.torsion_at(rank, rank)[0] 280 ss = rep.centroid.sec_structure_at(rank, rank)[0] 281 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True) 282 283 prediction.append(info) 284 slots.remove(rank) 285 286 for rank in slots: 287 prediction.append(TorsionPredictionInfo(rank, 0, None)) 288 289 prediction.sort(key=lambda i: i.rank) 290 return tuple(prediction)
291
292 - def get_angles(self, rank):
293 """ 294 Extract all torsion angles coming from all fragments, which had survived 295 the filtering and cover residue C{#rank}. 296 297 @param rank: target residue rank 298 @type rank: int 299 300 @return: all L{TorsionAngles} for a cluster at the specified residue 301 @rtype: tuple of L{TorsionAngles} 302 """ 303 304 if not self._initialized: 305 self.init() 306 if rank not in self._clusters: 307 return tuple() 308 309 angles = [] 310 311 for node in self._clusters[rank]: 312 fragment = node.fragment 313 torsion = fragment.torsion_at(rank, rank)[0] 314 angles.append(torsion) 315 316 return tuple(angles)
317
318 319 -class TorsionPredictionInfo(object):
320 """ 321 Struct container for a single torsion angle prediction. 322 323 @param rank: target residue rank 324 @type rank: int 325 @param confidence: confidence of prediction 326 @type confidence: float 327 @param torsion: assigned phi/psi/omega angles 328 @type torsion: L{TorsionAngles} 329 @param dssp: assigned secondary structure 330 @type dssp: L{SecondaryStructureElement} 331 @param primary: if True, designates that the assigned angles are extracted 332 from the L{ClusterRep} at residue C{#rank}; otherwise: the 333 angles are coming from another, overlapping L{ClusterRep} 334 335 """ 336
337 - def __init__(self, rank, confidence, torsion, dssp, primary=False):
338 339 self.rank = rank 340 self.confidence = confidence 341 self.torsion = torsion 342 self.primary = primary 343 self.dssp = dssp
344
345 - def as_tuple(self):
346 """ 347 @return: convert this prediction to a tuple: (confidence, phi, psi, omega) 348 @rtype: tuple 349 """ 350 return tuple([self.confidence, self.torsion.phi, self.torsion.psi, self.torsion.omega])
351
352 - def __str__(self):
353 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
354
355 - def __lt__(self, other):
356 return self.confidence < other.confidence
357
358 359 -class AssignmentFactory(object):
360
361 - def target(self, *a, **k):
362 return Target(*a, **k)
363
364 - def residue(self, *a, **k):
365 return TargetResidue(*a, **k)
366
367 - def assignment(self, *a, **k):
368 return Assignment(*a, **k)
369
370 -class ChemShiftAssignmentFactory(object):
371
372 - def target(self, *a, **k):
373 return ChemShiftTarget(*a, **k)
374
375 - def residue(self, *a, **k):
376 return ChemShiftTargetResidue(*a, **k)
377
378 - def assignment(self, *a, **k):
379 return ChemShiftAssignment(*a, **k)
380
381 -class Target(csb.core.AbstractNIContainer):
382 """ 383 Represents a protein structure prediction target. 384 385 @param id: target sequence ID, in PDB accnC format 386 @type id: str 387 @param length: total target sequence length 388 @type length: int 389 @param residues: a list, containing target's residues. See also 390 L{Target.from_sequence} 391 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s 392 """ 393
394 - def __init__(self, id, length, residues, overlap=None, factory=AssignmentFactory()):
395 396 self._id = id 397 self._accession = id[:-1] 398 self._chain_id = id[-1] 399 self._length = length 400 self._overlap = overlap 401 self._factory = factory 402 403 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment) 404 self._errors = csb.core.CollectionContainer() 405 406 resi = [factory.residue(native) for native in residues] 407 self._residues = csb.core.ReadOnlyCollectionContainer(items=resi, 408 type=TargetResidue, start_index=1)
409 410 @staticmethod
411 - def from_sequence(id, sequence):
412 """ 413 Factory, which builds L{Target} objects from a bare sequence. 414 415 @param sequence: target's sequence 416 @type sequence: L{csb.bio.sequence.AbstractSequence}, str or iterable 417 418 @rtype: L{Target} 419 """ 420 421 if isinstance(sequence, csb.bio.sequence.Sequence): 422 sequence = sequence.sequence 423 424 residues = [] 425 426 for rn, aa in enumerate(sequence, start=1): 427 residue = csb.bio.structure.ProteinResidue(rank=rn, type=aa) 428 residues.append(residue) 429 430 return Target(id, len(residues), residues)
431 432 @staticmethod
433 - def from_profile(hmm):
434 """ 435 Factory, which builds L{Target} objects from an HMM profile. 436 437 @param hmm: target's HMM 438 @type hmm: L{csb.bio.hmm.ProfileHMM} 439 440 @rtype: L{Target} 441 """ 442 443 residues = [ r.clone() for r in hmm.residues ] 444 return Target(hmm.id, hmm.layers.length, residues)
445 446 @staticmethod
447 - def deserialize(pickle):
448 449 with open(pickle) as stream: 450 return csb.io.Pickle.load(stream)
451 452 @property
453 - def _children(self):
454 return self._residues
455 456 @property
457 - def errors(self):
458 return self._errors
459 460 @property
461 - def id(self):
462 return self._id
463 464 @property
465 - def accession(self):
466 return self._accession
467 468 @property
469 - def chain_id(self):
470 return self._chain_id
471 472 @property
473 - def max_overlap(self):
474 return self._overlap
475 476 @property
477 - def length(self):
478 return self._length
479 480 @property
481 - def sequence(self):
482 return ''.join(str(r.native.type) for r in self)
483 484 @property
485 - def matches(self):
486 return self._assignments
487 488 @property
489 - def residues(self):
490 return self._residues
491
492 - def assign(self, fragment):
493 """ 494 Add a new fragment match. 495 @param fragment: fragment to assign 496 @type fragment: L{Assignment} 497 """ 498 499 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues): 500 raise ValueError("Fragment out of range") 501 502 self._assignments._append_item(fragment) 503 504 for rank in range(fragment.qstart, fragment.qend + 1): 505 ai = ResidueAssignmentInfo(fragment, rank) 506 self._residues[rank].assign(ai)
507
508 - def assignall(self, fragments):
509 """ 510 Assign a bunch of fragments at once. 511 @type fragments: iterable of L{Assignment}s 512 """ 513 for frag in fragments: 514 self.assign(frag)
515
516 - def filter(self, threshold=1.5, extend=False):
517 """ 518 Filter the current fragment map using a L{FragmentCluster}. 519 520 @param threshold: cluster RMSD threshold (see L{FragmentCluster}) 521 @type threshold: float 522 @param extend: pick extended alternatives where possible (default=False) 523 @type extend: bool 524 525 @return: a new target, containing only cluster centroids/reps 526 @rtype: L{Target} 527 """ 528 529 target = self.clone() 530 531 for residue in self.residues: 532 rep = residue.filter(threshold=threshold, extend=extend) 533 534 if rep is not None: 535 target.assign(rep.centroid) 536 537 return target
538
539 - def clone(self):
540 """ 541 @return: a deep copy of the target 542 @rtype: L{Target} 543 """ 544 residues = [r.native for r in self.residues] 545 target = self._factory.target(self.id, self.length, residues, 546 overlap=self._overlap) 547 return target
548
549 -class ChemShiftTarget(Target):
550
551 - def __init__(self, id, length, residues, overlap=None):
555
556 - def assign(self, fragment):
557 558 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues): 559 raise ValueError("Fragment out of range") 560 561 self._assignments._append_item(fragment) 562 563 rank = fragment.qstart 564 ai = ResidueAssignmentInfo(fragment, rank) 565 self._residues[rank].assign(ai)
566
567 - def clone(self):
568 return self._factory.target(self.id, self.length, 569 [r.native for r in self.residues], 570 overlap=self._overlap)
571
572 -class TargetResidue(object):
573 """ 574 Wrapper around L{Target}'s native residues. Decorates them with additional, 575 fragment-related methods. 576 577 @type native_residue: L{csb.bio.structure.ProteinResidue} 578 """ 579
580 - def __init__(self, native_residue):
581 582 self._type = native_residue.type 583 self._native = native_residue.clone() 584 self._assignments = csb.core.ReadOnlyCollectionContainer(type=ResidueAssignmentInfo)
585 586 @property
587 - def type(self):
588 return self._type
589 590 @property
591 - def native(self):
592 return self._native
593 594 @property
595 - def assignments(self):
596 return self._assignments
597
598 - def assign(self, assignment_info):
599 self._assignments._append_item(assignment_info)
600
601 - def filter(self, method=Metrics.RMSD, threshold=1.5, extend=False):
602 """ 603 Filter all fragments, covering this position in the L{Target} using a 604 L{FragmentCluster}. 605 606 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD}) 607 @type method: str 608 @param threshold: cluster RMSD threshold (see L{FragmentCluster}) 609 @type threshold: float 610 @param extend: pick extended alternative where possible (default=False) 611 @type extend: bool 612 613 @return: cluster's representative (if converged) or None 614 @rtype: L{ClusterRep} or None 615 """ 616 617 try: 618 nodes = [] 619 for ai in self.assignments: 620 node = ClusterNode.create(ai.fragment, method, extend) 621 nodes.append(node) 622 623 cluster = FragmentCluster(nodes, threshold=threshold) 624 625 center = cluster.shrink(minitems=0) 626 if center.has_alternative: 627 center.exchange() 628 629 return center 630 631 except (ClusterExhaustedError, ClusterDivergingError): 632 return None
633
634 - def closest(self):
635 """ 636 @return: the fragment with the lowest RMSD at this position in the L{Target} 637 @rtype: L{Assignment} 638 """ 639 640 best = None 641 642 for ai in self.assignments: 643 a = ai.fragment 644 if a.length < FragmentCluster.MIN_LENGTH: 645 continue 646 if best is None or a.rmsd < best.rmsd: 647 best = a 648 elif a.rmsd == best.rmsd and a.length > best.length: 649 best = a 650 651 return best
652
653 - def longest(self):
654 """ 655 @return: the longest fragment, covering the current position 656 @rtype: L{Assignment} 657 """ 658 best = None 659 660 for q in self.assignments: 661 if best is None or (q.fragment.length > best.length): 662 best = q.fragment 663 664 return best
665
666 - def precision(self, threshold=1.5):
667 """ 668 @return: the residue-wise precision of the fragment library at the 669 current position (percentage). 670 671 @param threshold: true-positive RMSD cutoff (default=1.5) 672 @type threshold: float 673 @rtype: float 674 """ 675 676 if self.assignments.length < 1: 677 return None 678 else: 679 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold] 680 pos = len(positive) * 100.0 / self.assignments.length 681 682 return pos
683
684 -class ChemShiftTargetResidue(TargetResidue):
685
686 - def closest(self):
687 688 best = None 689 690 for ai in self.assignments: 691 a = ai.fragment 692 693 if a.score < ChemShiftAssignment.BIT_SCORE_THRESHOLD * a.window: 694 continue 695 696 if best is None or a.score > best.score: 697 best = a 698 elif a.score == best.score and a.length > best.length: 699 best = a 700 701 return best
702
703 704 -class ResidueAssignmentInfo(object):
705
706 - def __init__(self, assignment, rank):
707 708 if not assignment.qstart <= rank <= assignment.qend: 709 raise ValueError('Rank {0} is not matched by this assignment') 710 711 self._assignment = assignment 712 self._rank = rank 713 self._relrank = rank - assignment.qstart
714 715 @property
716 - def c_alpha(self):
717 return self._assignment.backbone[self._relrank]
718 719 @property
720 - def fragment(self):
721 return self._assignment
722
723 -class Assignment(FragmentMatch):
724 """ 725 Represents a match between a fragment and its target. 726 727 @param source: source structure (must have torsion angles precomputed) 728 @type source: L{csb.bio.structure.Chain} 729 @param start: start position in C{source} (rank) 730 @type start: int 731 @param end: end position in C{source} (rank) 732 @type end: int 733 @param id: fragment ID 734 @type id: str 735 @param qstart: start position in target (rank) 736 @type qstart: int 737 @param qend: end position in target (rank) 738 @type qend: int 739 @param probability: probability of assignment 740 @type probability: float 741 @param rmsd: RMSD of the fragment, compared to target's native structure 742 @type rmsd: float 743 """ 744
745 - def __init__(self, source, start, end, qstart, qend, id=None, probability=None, rmsd=None, 746 tm_score=None, score=None, neff=None, internal_id=None):
747 748 assert source.has_torsion 749 sub = source.subregion(start, end, clone=True) 750 try: 751 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues] 752 except csb.core.ItemNotFoundError: 753 raise csb.bio.structure.Broken3DStructureError() 754 torsion = [r.torsion.copy() for r in sub.residues] 755 756 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray) 757 self._torsion = torsion 758 self._sequence = sub.sequence 759 760 self._source_id = source.accession[:4] + source.id 761 self._start = start 762 self._end = end 763 764 self._score = score 765 self._neff = neff 766 self._ss = None 767 768 self.internal_id = internal_id 769 770 if id is None: 771 id = "{0}:{1}-{2}".format(self.source_id, self.start, self.end) 772 773 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None) 774 775 self._ss = SecondaryStructure('-' * self.length)
776 777 @staticmethod
778 - def from_fragment(fragment, provider):
779 """ 780 Create a new L{Assignment} given a source rosetta fragment. 781 782 @param fragment: rosetta fragment 783 @type fragment: L{RosettaFragment} 784 @param provider: PDB database provider 785 @type provider: L{StructureProvider} 786 787 @rtype: L{Assignment} 788 """ 789 try: 790 structure = provider.get(fragment.accession) 791 except KeyError: 792 structure = provider.get(fragment.source_id) 793 source = structure.chains[fragment.chain] 794 source.compute_torsion() 795 796 id = "{0}:{1}-{2}".format(fragment.source_id, fragment.start, fragment.end) 797 798 return Assignment(source, fragment.start, fragment.end, 799 fragment.qstart, fragment.qend, id, 0, 0)
800 801 @property
802 - def backbone(self):
803 return self._calpha
804 805 @property
806 - def sequence(self):
807 return self._sequence
808 809 @property
810 - def torsion(self):
811 return self._torsion
812 813 @property
814 - def source_id(self):
815 return self._source_id
816 817 @property
818 - def start(self):
819 return self._start
820 821 @property
822 - def end(self):
823 return self._end
824 825 @property
826 - def score(self):
827 return self._score
828 829 @property
830 - def neff(self):
831 return self._neff
832 833 @property
834 - def secondary_structure(self):
835 return self._ss
836 @secondary_structure.setter
837 - def secondary_structure(self, value):
838 839 if isinstance(value, csb.core.string): 840 value = csb.bio.structure.SecondaryStructure(value) 841 if len(str(value)) != self.length:#(value.end - value.start + 1) != self.length: 842 raise ValueError("Invalid secondary structure length", len(str(value)), self.length ) 843 844 self._ss = value
845
846 - def transform(self, rotation, translation):
847 """ 848 Apply rotation/translation to fragment's coordinates in place 849 """ 850 851 for ca in self.backbone: 852 newca = numpy.dot(ca, numpy.transpose(rotation)) + translation 853 for i in range(3): 854 ca[i] = newca[i]
855
856 - def _check_range(self, qstart, qend):
857 858 if not (self.qstart <= qstart <= qend <= self.qend): 859 raise ValueError('Region {0}..{1} is out of range {2.qstart}..{2.qend}'.format(qstart, qend, self))
860
861 - def anchored_around(self, rank):
862 """ 863 @return: True if the fragment is centered around position=C{rank} 864 @rtype: bool 865 """ 866 867 if self.qstart < rank < self.qend: 868 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1): 869 return True 870 871 return False
872
873 - def backbone_at(self, qstart, qend):
874 """ 875 @return: the CA coordinates of the fragment at the specified subregion. 876 @rtype: list 877 """ 878 879 self._check_range(qstart, qend) 880 881 relstart = qstart - self.qstart 882 relend = qend - self.qstart + 1 883 884 return self.backbone[relstart : relend]
885
886 - def torsion_at(self, qstart, qend):
887 """ 888 @return: torsion angles of the fragment at the specified subregion 889 @rtype: list 890 """ 891 self._check_range(qstart, qend) 892 893 relstart = qstart - self.qstart 894 relend = qend - self.qstart + 1 895 896 return self.torsion[relstart : relend]
897
898 - def sec_structure_at(self, qstart, qend):
899 """ 900 @return: secondary structure of the fragment at the specified subregion 901 @rtype: list 902 """ 903 904 self._check_range(qstart, qend) 905 start = qstart - self.qstart + 1 906 end = qend - self.qstart + 1 907 908 return self.secondary_structure.scan(start, end, loose=True, cut=True)
909
910 - def overlap(self, other):
911 """ 912 @type other: L{Assignment} 913 @return: target positions, covered by both C{self} and C{other} 914 @rtype: set of int 915 """ 916 917 qranks = set(range(self.qstart, self.qend + 1)) 918 sranks = set(range(other.qstart, other.qend + 1)) 919 920 return qranks.intersection(sranks)
921
922 - def rmsd_to(self, other, min_overlap=5):
923 """ 924 @return: the CA RMSD between C{self} and C{other}. 925 926 @param other: another fragment 927 @type other: L{Assignment} 928 @param min_overlap: require at least that number of overlapping residues 929 (return None if not satisfied) 930 @type min_overlap: int 931 932 @rtype: float 933 """ 934 if self is other: 935 return 0 936 937 common = self.overlap(other) 938 939 if len(common) >= min_overlap: 940 941 qstart, qend = min(common), max(common) 942 943 q = self.backbone_at(qstart, qend) 944 s = other.backbone_at(qstart, qend) 945 946 if len(q) > 0 and len(s) > 0: 947 return csb.bio.utils.rmsd(numpy.array(q), numpy.array(s)) 948 949 return None
950
951 - def nrmsd_to(self, other, min_overlap=5):
952 """ 953 @return: the normalized CA RMSD between C{self} and C{other}. 954 955 @param other: another fragment 956 @type other: L{Assignment} 957 @param min_overlap: require at least that number of overlapping residues 958 (return None if not satisfied) 959 @type min_overlap: int 960 961 @rtype: float 962 """ 963 964 common = self.overlap(other) 965 966 if len(common) >= min_overlap: 967 968 qstart, qend = min(common), max(common) 969 970 q = self.backbone_at(qstart, qend) 971 s = other.backbone_at(qstart, qend) 972 973 if len(q) > 0 and len(s) > 0: 974 return csb.bio.utils.rmsd(q, s) / RANDOM_RMSD[ len(common) ] 975 976 return None
977
978 - def mda_to(self, other, min_overlap=5):
979 """ 980 @return: the MDA (maximum deviation in torsion angles) between 981 C{self} and C{other}. 982 983 @param other: another fragment 984 @type other: L{Assignment} 985 @param min_overlap: require at least that number of overlapping residues 986 (return None if not satisfied) 987 @type min_overlap: int 988 989 @rtype: float 990 """ 991 common = self.overlap(other) 992 993 if len(common) >= min_overlap: 994 995 qstart, qend = min(common), max(common) 996 997 q = self.torsion_at(qstart, qend) 998 s = other.torsion_at(qstart, qend) 999 1000 if len(q) > 0 and len(s) > 0: 1001 1002 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:]) # phi: 2 .. L 1003 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1]) # psi: 1 .. L-1 1004 1005 return max(maxphi, maxpsi) 1006 1007 return None
1008
1009 1010 -class ChemShiftAssignment(Assignment):
1011 1012 BIT_SCORE_THRESHOLD = 1.1 1013
1014 - def __init__(self, source, start, end, qstart, qend, window, score, rmsd):
1015 1016 self._window = window 1017 1018 super(ChemShiftAssignment, self).__init__( 1019 source, start, end, qstart, qend, id=None, probability=1.0, 1020 rmsd=rmsd, tm_score=None, score=score, neff=None, internal_id=None)
1021 1022 @property
1023 - def window(self):
1024 return self._window
1025
1026 -class ClusterExhaustedError(ValueError):
1027 pass
1028
1029 -class ClusterEmptyError(ClusterExhaustedError):
1030 pass
1031
1032 -class ClusterDivergingError(RuntimeError):
1033 pass
1034
1035 -class FragmentCluster(object):
1036 """ 1037 Provides clustering/filtering of the fragments, covering a common residue 1038 in the target. Clustering is done via iterative shrinking of the cluster. 1039 At each iteration, node rejection (deletion) is attempted for each node. The 1040 node rejection, causing the most significant drop in the average pairwise 1041 distance (RMSD) in the cluster, is retained. This procedure is repeated 1042 until: 1) the average pairwise RMSD drops below the C{threshold} (converged), 1043 2) the cluster gets exhausted or 3) node rejection no longer 1044 causes a drop in the average distance (not converging). 1045 1046 @param items: cluster members 1047 @type items: iterable of L{ClusterNode}s 1048 @param threshold: RMSD threshold; continue shrinking until the mean distance 1049 drops below this value (default=1.5) 1050 @type threshold: float 1051 @param connectedness: when calculating centroids, consider only nodes 1052 connected to at least c% of all surviving vertices 1053 (default=0.5) 1054 @type connectedness: float 1055 """ 1056 1057 MIN_LENGTH = 6 1058
1059 - def __init__(self, items, threshold=1.5, connectedness=0.5):
1060 1061 items = set(i for i in items if i.fragment.length >= FragmentCluster.MIN_LENGTH) 1062 1063 self._matrix = {} 1064 self._threshold = float(threshold) 1065 self._connectedness = float(connectedness) 1066 self._weight = 0 1067 self._edges = 0 1068 1069 visited = set() 1070 1071 for i in items: 1072 self._matrix.setdefault(i, {}) 1073 1074 for j in items: 1075 self._matrix.setdefault(j, {}) 1076 1077 if (j, i) not in visited: 1078 visited.add((i, j)) 1079 distance = i.distance(j) 1080 1081 if distance is not None: 1082 self._matrix[i][j] = distance 1083 self._matrix[j][i] = distance 1084 i.weight += distance 1085 j.weight += distance 1086 1087 self._weight += distance 1088 self._edges += 1 1089 1090 self._items = set(self._matrix.keys()) 1091 1092 if len(self._items) < 1: 1093 raise ClusterEmptyError() 1094 1095 self._initcount = self.count
1096 1097 @property
1098 - def count(self):
1099 return len(self._items)
1100 1101 @property
1102 - def items(self):
1103 return tuple(self._items)
1104 1105 @property
1106 - def fragments(self):
1107 return tuple(i.fragment for i in self._items)
1108 1109 @property
1110 - def threshold(self):
1111 return self._threshold
1112 @threshold.setter
1113 - def threshold(self, value):
1114 self._threshold = float(value)
1115 1116 @property
1117 - def connectedness(self):
1118 return self._connectedness
1119
1120 - def _distances(self, skip=None):
1121 1122 d = [] 1123 1124 for i in self._matrix: 1125 if skip is i: 1126 continue 1127 1128 for j in self._matrix[i]: 1129 if skip is not j: 1130 d.append(self._matrix[i][j]) 1131 1132 return d
1133
1134 - def _distance(self, i, j):
1135 1136 if j in self._matrix[i]: 1137 return self._matrix[i][j] 1138 else: 1139 return None
1140
1141 - def mean(self, skip=None):
1142 """ 1143 @return: the current mean distance in the cluster 1144 @rtype: float 1145 """ 1146 if self._edges == 0: 1147 raise ClusterExhaustedError() 1148 1149 if not skip: 1150 return float(self._weight) / self._edges 1151 1152 else: 1153 weight = self._weight - skip.weight 1154 edges = self._edges - len(self._matrix[skip]) 1155 1156 if edges < 1: 1157 return 0 1158 else: 1159 return float(weight) / edges
1160
1161 - def centroid(self):
1162 """ 1163 @return: the current representative fragment 1164 @rtype: L{ClusterRep} 1165 1166 @note: the cluster rep is the node with the lowest average distance 1167 to all other nodes. If a fixed fragment exists, structurally similar 1168 to the rep, but longer, this fragment may be suggested as an alternative 1169 (see also L{ClusterRep}). 1170 """ 1171 1172 alt = None 1173 cen = None 1174 avg = None 1175 1176 for i in self._matrix: 1177 edges = len(self._matrix[i]) or (1.0 / self.count) 1178 curravg = float(i.weight) / edges 1179 conn = len(self._matrix[i]) / float(self.count) 1180 1181 if avg is None or (curravg < avg and conn >= self.connectedness): 1182 avg = curravg 1183 cen = i 1184 elif curravg == avg: 1185 if i.fragment.length > cen.fragment.length: 1186 cen = i 1187 1188 d = self._distances() 1189 mean = numpy.mean(d) 1190 cons = sum(1.0 for i in d if i <= self.threshold) / len(d) 1191 1192 for i in self._matrix: 1193 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length: 1194 distance = self._distance(i, cen) 1195 if distance is not None and distance < 0.5 * self.threshold: 1196 if alt is None or alt.fragment.length < i.fragment.length: 1197 alt = i 1198 1199 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt, 1200 rejections=(self._initcount - self.count))
1201
1202 - def reject(self, item):
1203 """ 1204 Remove C{item} from the cluster. 1205 1206 @type item: L{ClusterNode} 1207 @raise ClusterExhaustedError: if this is the last remaining item 1208 """ 1209 if self.count == 1: 1210 raise ClusterExhaustedError() 1211 1212 assert not item.fixed 1213 1214 for i in self._matrix: 1215 if item in self._matrix[i]: 1216 distance = self._matrix[i][item] 1217 self._weight -= distance 1218 i.weight -= distance 1219 1220 del self._matrix[i][item] 1221 self._edges -= 1 1222 1223 del self._matrix[item] 1224 self._items.remove(item)
1225
1226 - def shrinkone(self):
1227 """ 1228 Shrink the cluster by a single node. 1229 1230 @return: True on successful shrink, False otherwise (e.g. if 1231 already converged) 1232 @rtype: bool 1233 @raise ClusterExhaustedError: if exhausted 1234 @raise ClusterDivergingError: if not converging 1235 """ 1236 1237 mean = self.mean() 1238 if mean <= self.threshold or self.count == 1: 1239 return False # already shrunk enough 1240 1241 m = {} 1242 1243 for i in self._matrix: 1244 if not i.fixed: 1245 newmean = self.mean(skip=i) 1246 m[newmean] = i 1247 1248 if len(m) == 0: # only fixed items remaining 1249 raise ClusterExhaustedError() 1250 1251 newmean = min(m) 1252 1253 if newmean > mean: 1254 raise ClusterDivergingError() # can't converge, usually when fixed items are too far away from the average 1255 elif newmean < mean: 1256 junk = m[newmean] 1257 self.reject(junk) 1258 return True # successful shrink 1259 else: 1260 return False # converged
1261
1262 - def shrink(self, minitems=2):
1263 """ 1264 Start automatic shrinking. 1265 1266 @param minitems: absolute minimum of the number of nodes in the cluster 1267 @type minitems: int 1268 1269 @return: cluster's representative: the node with the lowest average 1270 distance to all other nodes in the cluster 1271 @rtype: L{ClusterRep} 1272 1273 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and 1274 still not converged 1275 """ 1276 1277 if self.count > minitems: 1278 1279 while self.shrinkone(): 1280 if self.count <= minitems: 1281 raise ClusterExhaustedError() 1282 else: 1283 raise ClusterExhaustedError() 1284 1285 return self.centroid()
1286
1287 -class ClusterNode(object):
1288 """ 1289 Cluster node. 1290 1291 @param fragment: fragment 1292 @type fragment: L{Assignment} 1293 @param distance: distance metric (a L{Metrics} member, default is RMSD) 1294 @type distance: str 1295 @param fixed: mark this node as fixed (cannot be rejected) 1296 @type fixed: bool 1297 """ 1298 1299 FIXED = 0.7 1300 1301 @staticmethod
1302 - def create(fragment, method=Metrics.RMSD, extend=False):
1303 """ 1304 Create a new L{ClusterNode} given a specified C{Assignment}. If this 1305 assignment is a high probability match, define it as a fixed fragment. 1306 1307 @rtype: L{ClusterNode} 1308 """ 1309 if fragment.probability > ClusterNode.FIXED and fragment.length >= FragmentCluster.MIN_LENGTH: 1310 return ClusterNode(fragment, distance=method, fixed=extend) 1311 else: 1312 return ClusterNode(fragment, distance=method, fixed=False)
1313
1314 - def __init__(self, fragment, distance=Metrics.RMSD, fixed=False):
1315 1316 if fixed and fragment.length < FragmentCluster.MIN_LENGTH: 1317 raise ValueError("Can't fix a short fragment") 1318 1319 self.fragment = fragment 1320 self.fixed = bool(fixed) 1321 self.weight = 0 1322 1323 self._distance = getattr(self.fragment, distance)
1324
1325 - def distance(self, other):
1326 """ 1327 @return: the distance between self and another node 1328 @type other: L{ClusterNode} 1329 @rtype: float 1330 """ 1331 return self._distance(other.fragment)
1332
1333 -class ClusterRep(object):
1334 """ 1335 Cluster's representative (centroid) node. This object carries the 1336 result of shrinking itself. 1337 1338 @param centroid: rep node 1339 @type centroid: L{ClusterNode} 1340 @param mean: current mean distance in the cluster 1341 @type mean: float 1342 @param consistency: percentage of pairwise distances below the RMSD C{threshold} 1343 @type consistency: float 1344 @param count: current number of nodes in the cluster 1345 @type count: int 1346 @param rejections: total number of rejections 1347 @type rejections: int 1348 @param alternative: suggested cluster rep alternative (e.g. structurally 1349 similar to the centroid, but longer) 1350 @type param: 1351 """ 1352
1353 - def __init__(self, centroid, mean, consistency, count, rejections=0, alternative=None):
1354 1355 if isinstance(centroid, ClusterNode): 1356 centroid = centroid.fragment 1357 if isinstance(alternative, ClusterNode): 1358 alternative = alternative.fragment 1359 1360 self._centroid = centroid 1361 self._alternative = alternative 1362 self._mean = mean 1363 self._consistency = consistency 1364 self._count = count 1365 self._rejections = rejections
1366 1367 @property
1368 - def confidence(self):
1369 """ 1370 Confidence of assignment: log10(count) * consistency 1371 """ 1372 if self.count <= 0 or self.count is None or self.consistency is None: 1373 return 0 1374 else: 1375 return numpy.log10(self.count) * self.consistency
1376 1377 @property
1378 - def centroid(self):
1379 return self._centroid
1380 1381 @property
1382 - def alternative(self):
1383 return self._alternative
1384 1385 @property
1386 - def has_alternative(self):
1387 return self._alternative is not None
1388 1389 @property
1390 - def mean(self):
1391 return self._mean
1392 1393 @property
1394 - def consistency(self):
1395 return self._consistency
1396 1397 @property
1398 - def count(self):
1399 return self._count
1400 1401 @property
1402 - def rejections(self):
1403 return self._rejections
1404
1405 - def exchange(self):
1406 """ 1407 If an alternative is available, swap the centroid and the alternative. 1408 """ 1409 1410 if self._alternative is not None: 1411 1412 centroid = self._centroid 1413 self._centroid = self._alternative 1414 self._alternative = centroid
1415
1416 1417 -class ResidueEventInfo(object):
1418
1419 - def __init__(self, residue, confidence=0, count=0, confident=True, gap=False, rep=None):
1420 1421 self.residue = residue 1422 self.confidence = confidence 1423 self.confident = confident 1424 self.gap = gap 1425 self.count = count 1426 self.rep = rep
1427 1428 @property
1429 - def rank(self):
1430 return self.residue.rank
1431 1432 @property
1433 - def type(self):
1434 return self.residue.type
1435 1436 @property
1437 - def torsion(self):
1438 if self.rep: 1439 return self.rep.torsion_at(self.rank, self.rank)[0] 1440 else: 1441 return None
1442
1443 1444 -class RosettaFragsetFactory(object):
1445 """ 1446 Simplifies the construction of fragment libraries. 1447 """ 1448
1449 - def __init__(self):
1450 import csb.bio.fragments.rosetta as rosetta 1451 self.rosetta = rosetta
1452
1453 - def make_fragset(self, target):
1454 """ 1455 Build a fragment library given a L{Target} and its L{Assignment}s. 1456 1457 @param target: target protein 1458 @type target: L{Target} 1459 1460 @rtype: L{RosettaFragmentMap} 1461 """ 1462 1463 frag_factory = self.rosetta.RosettaFragment 1464 fragments = list(map(frag_factory.from_object, target.matches)) 1465 #fragments = [ frag_factory.from_object(f) for f in target.matches if f.length >= 6 ] 1466 fragments.sort() 1467 1468 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1469
1470 - def make_chopped(self, fragments, window):
1471 """ 1472 Build a fixed-length fragment library from a list of 1473 variable-length L{Assignment}s. 1474 1475 @param fragments: source fragments 1476 @type fragments: iterable of L{RosettaFragment}s 1477 @param window: fixed-length fragment size (for classic Rosetta: choose 9) 1478 @type window: int 1479 1480 @return: fixed-length fragment library 1481 @rtype: L{RosettaFragmentMap} 1482 """ 1483 1484 frags = [] 1485 1486 for f in fragments: 1487 for qs in range(f.qstart, f.qend - window + 1): 1488 frags.append(f.subregion(qs, qs + window - 1)) 1489 1490 return self.rosetta.RosettaFragmentMap(frags)
1491
1492 - def make_combined(self, target, filling, threshold=0.5, callback=None):
1493 """ 1494 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments). 1495 The regions to be complemented are determined by calculating the confidence 1496 at each residue (by filtering). 1497 1498 1499 @param target: target protein 1500 @type target: L{Target} 1501 @param filling: additional fragments to place in the low-conf regions 1502 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment} 1503 @param threshold: confidence threshold 1504 @type threshold: float 1505 1506 @return: complemented fragment library 1507 @rtype: L{RosettaFragmentMap} 1508 """ 1509 1510 fragmap = self.make_fragset(target) 1511 covered = set() 1512 1513 for r in target.residues: 1514 1515 if r.assignments.length == 0: 1516 if callback: 1517 callback(ResidueEventInfo(r.native, gap=True)) 1518 continue 1519 1520 cluster = r.filter() 1521 if cluster is None: 1522 if callback: 1523 callback(ResidueEventInfo(r.native, 0, 0, confident=False)) 1524 continue 1525 1526 if cluster.confidence >= threshold: 1527 covered.add(r.native.rank) 1528 confident = True 1529 else: 1530 confident = False 1531 1532 if callback: 1533 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, confident)) 1534 1535 for r in target.residues: 1536 if r.native.rank not in covered: # true for gaps and low-conf residues 1537 fragmap.mark_unconfident(r.native.rank) 1538 1539 for frag in filling: 1540 fragmap.complement(frag) 1541 1542 return fragmap
1543
1544 - def make_filtered(self, target, extend=False, callback=None):
1545 """ 1546 Builed a filtered fragment library (by clustering), containing only 1547 representative fragments (cluster centroids). 1548 1549 @param target: target protein 1550 @type target: L{Target} 1551 @param extend: if True, pick alternative reps if available 1552 @type extend: bool 1553 1554 @return: filtered fragment library 1555 @rtype: L{RosettaFragmentMap} 1556 """ 1557 1558 fragments = [] 1559 1560 for r in target.residues: 1561 if r.assignments.length == 0: 1562 if callback: 1563 callback(ResidueEventInfo(r.native, gap=True)) 1564 continue 1565 1566 cluster = r.filter(extend=extend) 1567 if cluster is None: 1568 if callback: 1569 callback(ResidueEventInfo(r.native, 0, 0, confident=False)) 1570 continue 1571 1572 if extend and cluster.has_alternative: 1573 best = cluster.alternative 1574 else: 1575 best = cluster.centroid 1576 1577 fragment = self.rosetta.RosettaFragment.from_object(best) 1578 fragments.append(fragment) 1579 if callback: 1580 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, rep=cluster.centroid)) 1581 1582 fragments.sort() 1583 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1584
1585 - def mix(self, *fragsets):
1586 """ 1587 Mix fragments from multiple libraries. 1588 1589 @type fragsets: L{RosettaFragmentMap} 1590 @return: mixed fragment library 1591 @rtype: L{RosettaFragmentMap} 1592 """ 1593 1594 fragments = [] 1595 length = 0 1596 1597 for fragset in fragsets: 1598 if fragset._length > length: 1599 length = fragset._length 1600 1601 for fragment in fragset: 1602 fragments.append(fragment) 1603 1604 return self.rosetta.RosettaFragmentMap(fragments, length)
1605
1606 1607 -class BenchmarkAdapter(object):
1608
1609 - class Connection(object):
1610 1611 FACTORY = None 1612 DSN = None 1613
1614 - def __init__(self, factory=None, dsn=None):
1615 1616 self.factory = factory or self.__class__.FACTORY 1617 self.cs = dsn or self.__class__.DSN 1618 self.connection = None 1619 self.cursor = None
1620
1621 - def __enter__(self):
1622 1623 self.connection = self.factory(self.cs) 1624 try: 1625 self.cursor = self.connection.cursor() 1626 except: 1627 self.connection.close() 1628 raise 1629 return self
1630
1631 - def __exit__(self, *args):
1632 try: 1633 if not self.cursor.closed: 1634 self.cursor.close() 1635 finally: 1636 if not self.connection.closed: 1637 self.connection.close()
1638
1639 - def __init__(self, pdb_paths, connection_string=None, factory=AssignmentFactory()):
1640 1641 self._connection = None 1642 1643 self._parser = StructureParser 1644 self._pdb = FileSystemStructureProvider(pdb_paths) 1645 self._factory = factory 1646 1647 try: 1648 import psycopg2.extras 1649 except ImportError: 1650 raise RuntimeError('Please install the psycopg2 module first') 1651 1652 if connection_string is None: 1653 connection_string = self.connection_string() 1654 1655 BenchmarkAdapter.Connection.FACTORY = psycopg2.extras.DictConnection 1656 BenchmarkAdapter.Connection.DSN = connection_string
1657 1658 @staticmethod
1659 - def connection_string(database='FragmentBenchmarks', host='', username='', password=''):
1660 1661 fields = ['dbname={0}'.format(database)] 1662 1663 if host: 1664 fields.append('host={0}'.format(host)) 1665 if username: 1666 fields.append('user={0}'.format(username)) 1667 fields.append('password={0}'.format(password)) 1668 1669 return ' '.join(fields)
1670
1671 - def targets(self, benchmark_id):
1672 1673 with BenchmarkAdapter.Connection() as db: 1674 1675 db.cursor.callproc('reporting."GetTargets"', (benchmark_id,)) 1676 return db.cursor.fetchall()
1677
1678 - def target_details(self, target_id):
1679 1680 with BenchmarkAdapter.Connection() as db: 1681 1682 db.cursor.callproc('reporting."GetTargetDetails"', (target_id,)) 1683 return db.cursor.fetchall()
1684
1685 - def assignments(self, target_id, type):
1686 1687 with BenchmarkAdapter.Connection() as db: 1688 1689 db.cursor.callproc('reporting."GetAssignments"', (target_id, type)) 1690 return db.cursor.fetchall()
1691
1692 - def assignments_sec_structure(self, target_id, type):
1693 1694 with BenchmarkAdapter.Connection() as db: 1695 1696 db.cursor.callproc('reporting."GetTargetSecStructureAssignments2"', (target_id, type)) 1697 return db.cursor.fetchall()
1698
1699 - def scores(self, benchmark_id, type):
1700 1701 with BenchmarkAdapter.Connection() as db: 1702 1703 db.cursor.callproc('reporting."GetScores"', (benchmark_id, type)) 1704 return db.cursor.fetchall()
1705
1706 - def centroids(self, benchmark_id):
1707 1708 with BenchmarkAdapter.Connection() as db: 1709 1710 db.cursor.callproc('reporting."GetCentroids"', (benchmark_id,)) 1711 return db.cursor.fetchall()
1712
1713 - def structure(self, accession, chain=None):
1714 1715 pdbfile = self._pdb.find(accession) 1716 1717 if not pdbfile and chain: 1718 pdbfile = self._pdb.find(accession + chain) 1719 1720 if not pdbfile: 1721 raise IOError('{0} not found here: {1}'.format(accession, self._pdb)) 1722 1723 return self._parser(pdbfile).parse_structure()
1724
1725 - def prediction(self, target_id, type, ss=False):
1726 1727 info = self.target_details(target_id) 1728 if not info: 1729 raise ValueError('No such Target ID in the database: {0}'.format(target_id)) 1730 row = info[0] 1731 1732 id = row["Accession"] 1733 length = float(row["Length"]) 1734 overlap = float(row["MaxOverlap"]) / (length or 1.) 1735 1736 native = self.structure(id[:4], id[4]).chains[id[4]] 1737 target = self._factory.target(id, length, native.residues, overlap) 1738 1739 source = None 1740 1741 for row in self.assignments(target_id, type): 1742 1743 src_accession = row['Source'][:4] 1744 src_chain = row['Source'][4] 1745 1746 if source is None or source.accession != src_accession: 1747 try: 1748 source = self.structure(src_accession, src_chain) 1749 except (IOError, ValueError) as ex: 1750 target.errors.append(ex) 1751 continue 1752 1753 if src_chain == '_': 1754 frag_chain = source.first_chain 1755 else: 1756 frag_chain = source.chains[src_chain] 1757 if not frag_chain.has_torsion: 1758 frag_chain.compute_torsion() 1759 1760 fragment = self._factory.assignment( 1761 source=frag_chain, 1762 start=row['SourceStart'], 1763 end=row['SourceEnd'], 1764 id=row['FragmentName'], 1765 qstart=row['Start'], 1766 qend=row['End'], 1767 probability=row['Probability'], 1768 score=row['Score'], 1769 neff=row['Neff'], 1770 rmsd=row['RMSD'], 1771 tm_score=row['TMScore'], 1772 internal_id=row['InternalID']) 1773 1774 target.assign(fragment) 1775 1776 if ss: 1777 self._attach_sec_structure(target, target_id, type) 1778 1779 return target
1780
1781 - def _attach_sec_structure(self, target, target_id, type):
1782 1783 ss = {} 1784 1785 for row in self.assignments_sec_structure(target_id, type): 1786 frag_id, state = row["AssignmentID"], row["DSSP"] 1787 if row[frag_id] not in ss: 1788 ss[frag_id] = [] 1789 1790 ss[frag_id].append(state) 1791 1792 for a in target.matches: 1793 if a.internal_id in ss: 1794 dssp = ''.join(ss[a.internal_id]) 1795 a.secondary_structure = dssp
1796