| Home | Trees | Indices | Help |
|
|---|
|
|
1 """
2 APIs for working with protein structure fragments and libraries.
3
4 This package contains the nuts and bolts of HHfrag. Everything here revolves
5 around the L{Target} class, which describes a protein structure prediction
6 target. One typically assigns fragments (L{Assignment}s) to the target and then
7 builds a fragment library with L{RosettaFragsetFactory}.
8 """
9
10 import os
11 import numpy
12
13 import csb.io
14 import csb.core
15 import csb.bio.utils
16 import csb.bio.structure
17 import csb.bio.sequence
18
19 from csb.bio.structure import SecondaryStructure
20 from csb.bio.io.wwpdb import FileSystemStructureProvider, StructureParser
29
35
36 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329,
37 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662,
38 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401,
39 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637,
40 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163,
41 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028,
42 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550,
43 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840,
44 53: 11.831245255954073, 54: 11.918975893263905 }
47 """
48 Base class, representing a match between a fragment and its target.
49 """
50
52
53 self._id = id
54 self._qstart = qstart
55 self._qend = qend
56 self._probability = probability
57 self._rmsd = rmsd
58 self._tm_score = tm_score
59 self._qlength = qlength
60
61 @property
64
65 @property
68
69 @property
72
73 @property
76
77 @property
80
81 @property
84
85 @property
88
89 @property
92
93 @property
96
97 @property
100
101 @property
104
107 """
108 Fragment-based phi/psi angles predictor.
109
110 @param target: target protein, containing fragment assignments
111 @type target: L{Target}
112 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering
113 @type threshold: float
114 @param extend: pick alternative, longer cluster reps, if possible
115 @type extend: bool
116 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step
117 will be performed on demand (the first time C{predictor.compute()} is invoked)
118
119 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long
120 time. Subsequent calls will be very fast.
121 """
122
124
125 if not isinstance(target, Target):
126 raise TypeError(target)
127 if target.matches.length == 0:
128 raise ValueError('This target has no fragment assignments')
129
130 self._target = target
131 self._threshold = float(threshold)
132 self._extend = bool(extend)
133
134 self._initialized = False
135 self._reps = {}
136 self._clusters = {}
137
138 if init:
139 self.init()
140
141 @property
144
145 @property
148
149 @property
152
154 """
155 Compute and cache all L{FragmentCluster}s.
156 """
157
158 self._reps = {}
159 self._clusters = {}
160
161 for residue in self.target.residues:
162 cluster = self._filter(residue)
163
164 if cluster is not None:
165 rep = cluster.centroid()
166 if rep.has_alternative:
167 rep.exchange()
168
169 self._reps[residue.native.rank] = rep
170 self._clusters[residue.native.rank] = cluster.items
171
172 self._initialized = True
173
175
176 try:
177 nodes = []
178 for ai in residue.assignments:
179 node = ClusterNode.create(ai.fragment)
180 nodes.append(node)
181
182 cluster = FragmentCluster(nodes, threshold=self.threshold)
183 cluster.shrink(minitems=0)
184
185 return cluster
186
187 except (ClusterExhaustedError, ClusterDivergingError):
188 return None
189
191
192 for r in self._target.residues:
193 if r.native.rank == rank:
194 return r
195
196 raise ValueError('Rank {0} is out of range'.format(rank))
197
199 """
200 Extract torsion angles from the L{ClusterRep} at residue C{#rank}.
201
202 @param rank: target residue rank
203 @type rank: int
204
205 @rtype: L{TorsionPredictionInfo}
206 """
207
208 residue = self._residue(rank)
209 rep = residue.filter(threshold=self.threshold, extend=self.extend)
210
211 if rep is None:
212 return None
213
214 else:
215 fragment = rep.centroid
216 torsion = fragment.torsion_at(rank, rank)[0]
217 ss = fragment.sec_structure_at(rank, rank)[0]
218
219 return TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
220
222 """
223 Extract torsion angles from all L{ClusterRep}s, covering residue C{#rank}.
224
225 @param rank: target residue rank
226 @type rank: int
227
228 @return: L{TorsionPredictionInfo} instances, sorted by confidence
229 @rtype: tuple of L{TorsionPredictionInfo}
230 """
231
232 if not self._initialized:
233 self.init()
234
235 prediction = []
236
237 for rep in self._reps.values():
238
239 if rep.centroid.qstart <= rank <= rep.centroid.qend:
240
241 fragment = rep.centroid
242 torsion = fragment.torsion_at(rank, rank)[0]
243 ss = fragment.sec_structure_at(rank, rank)[0]
244 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss)
245
246 if rep is self._reps.get(rank, None):
247 info.primary = True
248
249 prediction.append(info)
250
251 prediction.sort(reverse=True)
252 return tuple(prediction)
253
255 """
256 Filter the current fragment map and create a new, completely flat,
257 non-overlapping map built from centroids, assigned iteratively by
258 decreasing confidence. Centroids with lower confidence which overlap
259 with previously assigned centroids will be trimmed to fill existing
260 gaps only.
261
262 @return: L{TorsionPredictionInfo} instances, one for each target residue
263 @rtype: tuple of L{TorsionPredictionInfo}
264 """
265
266 if not self._initialized:
267 self.init()
268
269 prediction = []
270 slots = set(range(1, self.target.length + 1))
271
272 reps = list(self._reps.values())
273 reps.sort(key=lambda i: i.confidence, reverse=True)
274
275 for rep in reps:
276
277 for rank in range(rep.centroid.qstart, rep.centroid.qend + 1):
278 if rank in slots:
279 torsion = rep.centroid.torsion_at(rank, rank)[0]
280 ss = rep.centroid.sec_structure_at(rank, rank)[0]
281 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
282
283 prediction.append(info)
284 slots.remove(rank)
285
286 for rank in slots:
287 prediction.append(TorsionPredictionInfo(rank, 0, None))
288
289 prediction.sort(key=lambda i: i.rank)
290 return tuple(prediction)
291
293 """
294 Extract all torsion angles coming from all fragments, which had survived
295 the filtering and cover residue C{#rank}.
296
297 @param rank: target residue rank
298 @type rank: int
299
300 @return: all L{TorsionAngles} for a cluster at the specified residue
301 @rtype: tuple of L{TorsionAngles}
302 """
303
304 if not self._initialized:
305 self.init()
306 if rank not in self._clusters:
307 return tuple()
308
309 angles = []
310
311 for node in self._clusters[rank]:
312 fragment = node.fragment
313 torsion = fragment.torsion_at(rank, rank)[0]
314 angles.append(torsion)
315
316 return tuple(angles)
317
320 """
321 Struct container for a single torsion angle prediction.
322
323 @param rank: target residue rank
324 @type rank: int
325 @param confidence: confidence of prediction
326 @type confidence: float
327 @param torsion: assigned phi/psi/omega angles
328 @type torsion: L{TorsionAngles}
329 @param dssp: assigned secondary structure
330 @type dssp: L{SecondaryStructureElement}
331 @param primary: if True, designates that the assigned angles are extracted
332 from the L{ClusterRep} at residue C{#rank}; otherwise: the
333 angles are coming from another, overlapping L{ClusterRep}
334
335 """
336
338
339 self.rank = rank
340 self.confidence = confidence
341 self.torsion = torsion
342 self.primary = primary
343 self.dssp = dssp
344
346 """
347 @return: convert this prediction to a tuple: (confidence, phi, psi, omega)
348 @rtype: tuple
349 """
350 return tuple([self.confidence, self.torsion.phi, self.torsion.psi, self.torsion.omega])
351
353 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
354
357
369
380
382 """
383 Represents a protein structure prediction target.
384
385 @param id: target sequence ID, in PDB accnC format
386 @type id: str
387 @param length: total target sequence length
388 @type length: int
389 @param residues: a list, containing target's residues. See also
390 L{Target.from_sequence}
391 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s
392 """
393
395
396 self._id = id
397 self._accession = id[:-1]
398 self._chain_id = id[-1]
399 self._length = length
400 self._overlap = overlap
401 self._factory = factory
402
403 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment)
404 self._errors = csb.core.CollectionContainer()
405
406 resi = [factory.residue(native) for native in residues]
407 self._residues = csb.core.ReadOnlyCollectionContainer(items=resi,
408 type=TargetResidue, start_index=1)
409
410 @staticmethod
412 """
413 Factory, which builds L{Target} objects from a bare sequence.
414
415 @param sequence: target's sequence
416 @type sequence: L{csb.bio.sequence.AbstractSequence}, str or iterable
417
418 @rtype: L{Target}
419 """
420
421 if isinstance(sequence, csb.bio.sequence.Sequence):
422 sequence = sequence.sequence
423
424 residues = []
425
426 for rn, aa in enumerate(sequence, start=1):
427 residue = csb.bio.structure.ProteinResidue(rank=rn, type=aa)
428 residues.append(residue)
429
430 return Target(id, len(residues), residues)
431
432 @staticmethod
434 """
435 Factory, which builds L{Target} objects from an HMM profile.
436
437 @param hmm: target's HMM
438 @type hmm: L{csb.bio.hmm.ProfileHMM}
439
440 @rtype: L{Target}
441 """
442
443 residues = [ r.clone() for r in hmm.residues ]
444 return Target(hmm.id, hmm.layers.length, residues)
445
446 @staticmethod
451
452 @property
455
456 @property
459
460 @property
463
464 @property
467
468 @property
471
472 @property
475
476 @property
479
480 @property
483
484 @property
487
488 @property
491
493 """
494 Add a new fragment match.
495 @param fragment: fragment to assign
496 @type fragment: L{Assignment}
497 """
498
499 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues):
500 raise ValueError("Fragment out of range")
501
502 self._assignments._append_item(fragment)
503
504 for rank in range(fragment.qstart, fragment.qend + 1):
505 ai = ResidueAssignmentInfo(fragment, rank)
506 self._residues[rank].assign(ai)
507
509 """
510 Assign a bunch of fragments at once.
511 @type fragments: iterable of L{Assignment}s
512 """
513 for frag in fragments:
514 self.assign(frag)
515
517 """
518 Filter the current fragment map using a L{FragmentCluster}.
519
520 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
521 @type threshold: float
522 @param extend: pick extended alternatives where possible (default=False)
523 @type extend: bool
524
525 @return: a new target, containing only cluster centroids/reps
526 @rtype: L{Target}
527 """
528
529 target = self.clone()
530
531 for residue in self.residues:
532 rep = residue.filter(threshold=threshold, extend=extend)
533
534 if rep is not None:
535 target.assign(rep.centroid)
536
537 return target
538
548
550
552
553 super(ChemShiftTarget, self).__init__(id, length, residues, overlap=overlap,
554 factory=ChemShiftAssignmentFactory())
555
557
558 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues):
559 raise ValueError("Fragment out of range")
560
561 self._assignments._append_item(fragment)
562
563 rank = fragment.qstart
564 ai = ResidueAssignmentInfo(fragment, rank)
565 self._residues[rank].assign(ai)
566
571
573 """
574 Wrapper around L{Target}'s native residues. Decorates them with additional,
575 fragment-related methods.
576
577 @type native_residue: L{csb.bio.structure.ProteinResidue}
578 """
579
581
582 self._type = native_residue.type
583 self._native = native_residue.clone()
584 self._assignments = csb.core.ReadOnlyCollectionContainer(type=ResidueAssignmentInfo)
585
586 @property
589
590 @property
593
594 @property
597
600
602 """
603 Filter all fragments, covering this position in the L{Target} using a
604 L{FragmentCluster}.
605
606 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD})
607 @type method: str
608 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
609 @type threshold: float
610 @param extend: pick extended alternative where possible (default=False)
611 @type extend: bool
612
613 @return: cluster's representative (if converged) or None
614 @rtype: L{ClusterRep} or None
615 """
616
617 try:
618 nodes = []
619 for ai in self.assignments:
620 node = ClusterNode.create(ai.fragment, method, extend)
621 nodes.append(node)
622
623 cluster = FragmentCluster(nodes, threshold=threshold)
624
625 center = cluster.shrink(minitems=0)
626 if center.has_alternative:
627 center.exchange()
628
629 return center
630
631 except (ClusterExhaustedError, ClusterDivergingError):
632 return None
633
635 """
636 @return: the fragment with the lowest RMSD at this position in the L{Target}
637 @rtype: L{Assignment}
638 """
639
640 best = None
641
642 for ai in self.assignments:
643 a = ai.fragment
644 if a.length < FragmentCluster.MIN_LENGTH:
645 continue
646 if best is None or a.rmsd < best.rmsd:
647 best = a
648 elif a.rmsd == best.rmsd and a.length > best.length:
649 best = a
650
651 return best
652
654 """
655 @return: the longest fragment, covering the current position
656 @rtype: L{Assignment}
657 """
658 best = None
659
660 for q in self.assignments:
661 if best is None or (q.fragment.length > best.length):
662 best = q.fragment
663
664 return best
665
667 """
668 @return: the residue-wise precision of the fragment library at the
669 current position (percentage).
670
671 @param threshold: true-positive RMSD cutoff (default=1.5)
672 @type threshold: float
673 @rtype: float
674 """
675
676 if self.assignments.length < 1:
677 return None
678 else:
679 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold]
680 pos = len(positive) * 100.0 / self.assignments.length
681
682 return pos
683
685
687
688 best = None
689
690 for ai in self.assignments:
691 a = ai.fragment
692
693 if a.score < ChemShiftAssignment.BIT_SCORE_THRESHOLD * a.window:
694 continue
695
696 if best is None or a.score > best.score:
697 best = a
698 elif a.score == best.score and a.length > best.length:
699 best = a
700
701 return best
702
705
707
708 if not assignment.qstart <= rank <= assignment.qend:
709 raise ValueError('Rank {0} is not matched by this assignment')
710
711 self._assignment = assignment
712 self._rank = rank
713 self._relrank = rank - assignment.qstart
714
715 @property
717 return self._assignment.backbone[self._relrank]
718
719 @property
722
724 """
725 Represents a match between a fragment and its target.
726
727 @param source: source structure (must have torsion angles precomputed)
728 @type source: L{csb.bio.structure.Chain}
729 @param start: start position in C{source} (rank)
730 @type start: int
731 @param end: end position in C{source} (rank)
732 @type end: int
733 @param id: fragment ID
734 @type id: str
735 @param qstart: start position in target (rank)
736 @type qstart: int
737 @param qend: end position in target (rank)
738 @type qend: int
739 @param probability: probability of assignment
740 @type probability: float
741 @param rmsd: RMSD of the fragment, compared to target's native structure
742 @type rmsd: float
743 """
744
745 - def __init__(self, source, start, end, qstart, qend, id=None, probability=None, rmsd=None,
746 tm_score=None, score=None, neff=None, internal_id=None):
747
748 assert source.has_torsion
749 sub = source.subregion(start, end, clone=True)
750 try:
751 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues]
752 except csb.core.ItemNotFoundError:
753 raise csb.bio.structure.Broken3DStructureError()
754 torsion = [r.torsion.copy() for r in sub.residues]
755
756 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray)
757 self._torsion = torsion
758 self._sequence = sub.sequence
759
760 self._source_id = source.accession[:4] + source.id
761 self._start = start
762 self._end = end
763
764 self._score = score
765 self._neff = neff
766 self._ss = None
767
768 self.internal_id = internal_id
769
770 if id is None:
771 id = "{0}:{1}-{2}".format(self.source_id, self.start, self.end)
772
773 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
774
775 self._ss = SecondaryStructure('-' * self.length)
776
777 @staticmethod
779 """
780 Create a new L{Assignment} given a source rosetta fragment.
781
782 @param fragment: rosetta fragment
783 @type fragment: L{RosettaFragment}
784 @param provider: PDB database provider
785 @type provider: L{StructureProvider}
786
787 @rtype: L{Assignment}
788 """
789 try:
790 structure = provider.get(fragment.accession)
791 except KeyError:
792 structure = provider.get(fragment.source_id)
793 source = structure.chains[fragment.chain]
794 source.compute_torsion()
795
796 id = "{0}:{1}-{2}".format(fragment.source_id, fragment.start, fragment.end)
797
798 return Assignment(source, fragment.start, fragment.end,
799 fragment.qstart, fragment.qend, id, 0, 0)
800
801 @property
804
805 @property
808
809 @property
812
813 @property
816
817 @property
820
821 @property
824
825 @property
828
829 @property
832
833 @property
836 @secondary_structure.setter
838
839 if isinstance(value, csb.core.string):
840 value = csb.bio.structure.SecondaryStructure(value)
841 if len(str(value)) != self.length:#(value.end - value.start + 1) != self.length:
842 raise ValueError("Invalid secondary structure length", len(str(value)), self.length )
843
844 self._ss = value
845
847 """
848 Apply rotation/translation to fragment's coordinates in place
849 """
850
851 for ca in self.backbone:
852 newca = numpy.dot(ca, numpy.transpose(rotation)) + translation
853 for i in range(3):
854 ca[i] = newca[i]
855
857
858 if not (self.qstart <= qstart <= qend <= self.qend):
859 raise ValueError('Region {0}..{1} is out of range {2.qstart}..{2.qend}'.format(qstart, qend, self))
860
862 """
863 @return: True if the fragment is centered around position=C{rank}
864 @rtype: bool
865 """
866
867 if self.qstart < rank < self.qend:
868 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1):
869 return True
870
871 return False
872
874 """
875 @return: the CA coordinates of the fragment at the specified subregion.
876 @rtype: list
877 """
878
879 self._check_range(qstart, qend)
880
881 relstart = qstart - self.qstart
882 relend = qend - self.qstart + 1
883
884 return self.backbone[relstart : relend]
885
887 """
888 @return: torsion angles of the fragment at the specified subregion
889 @rtype: list
890 """
891 self._check_range(qstart, qend)
892
893 relstart = qstart - self.qstart
894 relend = qend - self.qstart + 1
895
896 return self.torsion[relstart : relend]
897
899 """
900 @return: secondary structure of the fragment at the specified subregion
901 @rtype: list
902 """
903
904 self._check_range(qstart, qend)
905 start = qstart - self.qstart + 1
906 end = qend - self.qstart + 1
907
908 return self.secondary_structure.scan(start, end, loose=True, cut=True)
909
911 """
912 @type other: L{Assignment}
913 @return: target positions, covered by both C{self} and C{other}
914 @rtype: set of int
915 """
916
917 qranks = set(range(self.qstart, self.qend + 1))
918 sranks = set(range(other.qstart, other.qend + 1))
919
920 return qranks.intersection(sranks)
921
923 """
924 @return: the CA RMSD between C{self} and C{other}.
925
926 @param other: another fragment
927 @type other: L{Assignment}
928 @param min_overlap: require at least that number of overlapping residues
929 (return None if not satisfied)
930 @type min_overlap: int
931
932 @rtype: float
933 """
934 if self is other:
935 return 0
936
937 common = self.overlap(other)
938
939 if len(common) >= min_overlap:
940
941 qstart, qend = min(common), max(common)
942
943 q = self.backbone_at(qstart, qend)
944 s = other.backbone_at(qstart, qend)
945
946 if len(q) > 0 and len(s) > 0:
947 return csb.bio.utils.rmsd(numpy.array(q), numpy.array(s))
948
949 return None
950
952 """
953 @return: the normalized CA RMSD between C{self} and C{other}.
954
955 @param other: another fragment
956 @type other: L{Assignment}
957 @param min_overlap: require at least that number of overlapping residues
958 (return None if not satisfied)
959 @type min_overlap: int
960
961 @rtype: float
962 """
963
964 common = self.overlap(other)
965
966 if len(common) >= min_overlap:
967
968 qstart, qend = min(common), max(common)
969
970 q = self.backbone_at(qstart, qend)
971 s = other.backbone_at(qstart, qend)
972
973 if len(q) > 0 and len(s) > 0:
974 return csb.bio.utils.rmsd(q, s) / RANDOM_RMSD[ len(common) ]
975
976 return None
977
979 """
980 @return: the MDA (maximum deviation in torsion angles) between
981 C{self} and C{other}.
982
983 @param other: another fragment
984 @type other: L{Assignment}
985 @param min_overlap: require at least that number of overlapping residues
986 (return None if not satisfied)
987 @type min_overlap: int
988
989 @rtype: float
990 """
991 common = self.overlap(other)
992
993 if len(common) >= min_overlap:
994
995 qstart, qend = min(common), max(common)
996
997 q = self.torsion_at(qstart, qend)
998 s = other.torsion_at(qstart, qend)
999
1000 if len(q) > 0 and len(s) > 0:
1001
1002 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:]) # phi: 2 .. L
1003 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1]) # psi: 1 .. L-1
1004
1005 return max(maxphi, maxpsi)
1006
1007 return None
1008
1011
1012 BIT_SCORE_THRESHOLD = 1.1
1013
1015
1016 self._window = window
1017
1018 super(ChemShiftAssignment, self).__init__(
1019 source, start, end, qstart, qend, id=None, probability=1.0,
1020 rmsd=rmsd, tm_score=None, score=score, neff=None, internal_id=None)
1021
1022 @property
1025
1028
1031
1034
1036 """
1037 Provides clustering/filtering of the fragments, covering a common residue
1038 in the target. Clustering is done via iterative shrinking of the cluster.
1039 At each iteration, node rejection (deletion) is attempted for each node. The
1040 node rejection, causing the most significant drop in the average pairwise
1041 distance (RMSD) in the cluster, is retained. This procedure is repeated
1042 until: 1) the average pairwise RMSD drops below the C{threshold} (converged),
1043 2) the cluster gets exhausted or 3) node rejection no longer
1044 causes a drop in the average distance (not converging).
1045
1046 @param items: cluster members
1047 @type items: iterable of L{ClusterNode}s
1048 @param threshold: RMSD threshold; continue shrinking until the mean distance
1049 drops below this value (default=1.5)
1050 @type threshold: float
1051 @param connectedness: when calculating centroids, consider only nodes
1052 connected to at least c% of all surviving vertices
1053 (default=0.5)
1054 @type connectedness: float
1055 """
1056
1057 MIN_LENGTH = 6
1058
1060
1061 items = set(i for i in items if i.fragment.length >= FragmentCluster.MIN_LENGTH)
1062
1063 self._matrix = {}
1064 self._threshold = float(threshold)
1065 self._connectedness = float(connectedness)
1066 self._weight = 0
1067 self._edges = 0
1068
1069 visited = set()
1070
1071 for i in items:
1072 self._matrix.setdefault(i, {})
1073
1074 for j in items:
1075 self._matrix.setdefault(j, {})
1076
1077 if (j, i) not in visited:
1078 visited.add((i, j))
1079 distance = i.distance(j)
1080
1081 if distance is not None:
1082 self._matrix[i][j] = distance
1083 self._matrix[j][i] = distance
1084 i.weight += distance
1085 j.weight += distance
1086
1087 self._weight += distance
1088 self._edges += 1
1089
1090 self._items = set(self._matrix.keys())
1091
1092 if len(self._items) < 1:
1093 raise ClusterEmptyError()
1094
1095 self._initcount = self.count
1096
1097 @property
1100
1101 @property
1104
1105 @property
1107 return tuple(i.fragment for i in self._items)
1108
1109 @property
1112 @threshold.setter
1114 self._threshold = float(value)
1115
1116 @property
1119
1121
1122 d = []
1123
1124 for i in self._matrix:
1125 if skip is i:
1126 continue
1127
1128 for j in self._matrix[i]:
1129 if skip is not j:
1130 d.append(self._matrix[i][j])
1131
1132 return d
1133
1140
1142 """
1143 @return: the current mean distance in the cluster
1144 @rtype: float
1145 """
1146 if self._edges == 0:
1147 raise ClusterExhaustedError()
1148
1149 if not skip:
1150 return float(self._weight) / self._edges
1151
1152 else:
1153 weight = self._weight - skip.weight
1154 edges = self._edges - len(self._matrix[skip])
1155
1156 if edges < 1:
1157 return 0
1158 else:
1159 return float(weight) / edges
1160
1162 """
1163 @return: the current representative fragment
1164 @rtype: L{ClusterRep}
1165
1166 @note: the cluster rep is the node with the lowest average distance
1167 to all other nodes. If a fixed fragment exists, structurally similar
1168 to the rep, but longer, this fragment may be suggested as an alternative
1169 (see also L{ClusterRep}).
1170 """
1171
1172 alt = None
1173 cen = None
1174 avg = None
1175
1176 for i in self._matrix:
1177 edges = len(self._matrix[i]) or (1.0 / self.count)
1178 curravg = float(i.weight) / edges
1179 conn = len(self._matrix[i]) / float(self.count)
1180
1181 if avg is None or (curravg < avg and conn >= self.connectedness):
1182 avg = curravg
1183 cen = i
1184 elif curravg == avg:
1185 if i.fragment.length > cen.fragment.length:
1186 cen = i
1187
1188 d = self._distances()
1189 mean = numpy.mean(d)
1190 cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
1191
1192 for i in self._matrix:
1193 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length:
1194 distance = self._distance(i, cen)
1195 if distance is not None and distance < 0.5 * self.threshold:
1196 if alt is None or alt.fragment.length < i.fragment.length:
1197 alt = i
1198
1199 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt,
1200 rejections=(self._initcount - self.count))
1201
1203 """
1204 Remove C{item} from the cluster.
1205
1206 @type item: L{ClusterNode}
1207 @raise ClusterExhaustedError: if this is the last remaining item
1208 """
1209 if self.count == 1:
1210 raise ClusterExhaustedError()
1211
1212 assert not item.fixed
1213
1214 for i in self._matrix:
1215 if item in self._matrix[i]:
1216 distance = self._matrix[i][item]
1217 self._weight -= distance
1218 i.weight -= distance
1219
1220 del self._matrix[i][item]
1221 self._edges -= 1
1222
1223 del self._matrix[item]
1224 self._items.remove(item)
1225
1227 """
1228 Shrink the cluster by a single node.
1229
1230 @return: True on successful shrink, False otherwise (e.g. if
1231 already converged)
1232 @rtype: bool
1233 @raise ClusterExhaustedError: if exhausted
1234 @raise ClusterDivergingError: if not converging
1235 """
1236
1237 mean = self.mean()
1238 if mean <= self.threshold or self.count == 1:
1239 return False # already shrunk enough
1240
1241 m = {}
1242
1243 for i in self._matrix:
1244 if not i.fixed:
1245 newmean = self.mean(skip=i)
1246 m[newmean] = i
1247
1248 if len(m) == 0: # only fixed items remaining
1249 raise ClusterExhaustedError()
1250
1251 newmean = min(m)
1252
1253 if newmean > mean:
1254 raise ClusterDivergingError() # can't converge, usually when fixed items are too far away from the average
1255 elif newmean < mean:
1256 junk = m[newmean]
1257 self.reject(junk)
1258 return True # successful shrink
1259 else:
1260 return False # converged
1261
1263 """
1264 Start automatic shrinking.
1265
1266 @param minitems: absolute minimum of the number of nodes in the cluster
1267 @type minitems: int
1268
1269 @return: cluster's representative: the node with the lowest average
1270 distance to all other nodes in the cluster
1271 @rtype: L{ClusterRep}
1272
1273 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and
1274 still not converged
1275 """
1276
1277 if self.count > minitems:
1278
1279 while self.shrinkone():
1280 if self.count <= minitems:
1281 raise ClusterExhaustedError()
1282 else:
1283 raise ClusterExhaustedError()
1284
1285 return self.centroid()
1286
1288 """
1289 Cluster node.
1290
1291 @param fragment: fragment
1292 @type fragment: L{Assignment}
1293 @param distance: distance metric (a L{Metrics} member, default is RMSD)
1294 @type distance: str
1295 @param fixed: mark this node as fixed (cannot be rejected)
1296 @type fixed: bool
1297 """
1298
1299 FIXED = 0.7
1300
1301 @staticmethod
1303 """
1304 Create a new L{ClusterNode} given a specified C{Assignment}. If this
1305 assignment is a high probability match, define it as a fixed fragment.
1306
1307 @rtype: L{ClusterNode}
1308 """
1309 if fragment.probability > ClusterNode.FIXED and fragment.length >= FragmentCluster.MIN_LENGTH:
1310 return ClusterNode(fragment, distance=method, fixed=extend)
1311 else:
1312 return ClusterNode(fragment, distance=method, fixed=False)
1313
1315
1316 if fixed and fragment.length < FragmentCluster.MIN_LENGTH:
1317 raise ValueError("Can't fix a short fragment")
1318
1319 self.fragment = fragment
1320 self.fixed = bool(fixed)
1321 self.weight = 0
1322
1323 self._distance = getattr(self.fragment, distance)
1324
1326 """
1327 @return: the distance between self and another node
1328 @type other: L{ClusterNode}
1329 @rtype: float
1330 """
1331 return self._distance(other.fragment)
1332
1334 """
1335 Cluster's representative (centroid) node. This object carries the
1336 result of shrinking itself.
1337
1338 @param centroid: rep node
1339 @type centroid: L{ClusterNode}
1340 @param mean: current mean distance in the cluster
1341 @type mean: float
1342 @param consistency: percentage of pairwise distances below the RMSD C{threshold}
1343 @type consistency: float
1344 @param count: current number of nodes in the cluster
1345 @type count: int
1346 @param rejections: total number of rejections
1347 @type rejections: int
1348 @param alternative: suggested cluster rep alternative (e.g. structurally
1349 similar to the centroid, but longer)
1350 @type param:
1351 """
1352
1354
1355 if isinstance(centroid, ClusterNode):
1356 centroid = centroid.fragment
1357 if isinstance(alternative, ClusterNode):
1358 alternative = alternative.fragment
1359
1360 self._centroid = centroid
1361 self._alternative = alternative
1362 self._mean = mean
1363 self._consistency = consistency
1364 self._count = count
1365 self._rejections = rejections
1366
1367 @property
1369 """
1370 Confidence of assignment: log10(count) * consistency
1371 """
1372 if self.count <= 0 or self.count is None or self.consistency is None:
1373 return 0
1374 else:
1375 return numpy.log10(self.count) * self.consistency
1376
1377 @property
1380
1381 @property
1384
1385 @property
1388
1389 @property
1392
1393 @property
1396
1397 @property
1400
1401 @property
1404
1415
1418
1420
1421 self.residue = residue
1422 self.confidence = confidence
1423 self.confident = confident
1424 self.gap = gap
1425 self.count = count
1426 self.rep = rep
1427
1428 @property
1431
1432 @property
1435
1436 @property
1438 if self.rep:
1439 return self.rep.torsion_at(self.rank, self.rank)[0]
1440 else:
1441 return None
1442
1445 """
1446 Simplifies the construction of fragment libraries.
1447 """
1448
1452
1454 """
1455 Build a fragment library given a L{Target} and its L{Assignment}s.
1456
1457 @param target: target protein
1458 @type target: L{Target}
1459
1460 @rtype: L{RosettaFragmentMap}
1461 """
1462
1463 frag_factory = self.rosetta.RosettaFragment
1464 fragments = list(map(frag_factory.from_object, target.matches))
1465 #fragments = [ frag_factory.from_object(f) for f in target.matches if f.length >= 6 ]
1466 fragments.sort()
1467
1468 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1469
1471 """
1472 Build a fixed-length fragment library from a list of
1473 variable-length L{Assignment}s.
1474
1475 @param fragments: source fragments
1476 @type fragments: iterable of L{RosettaFragment}s
1477 @param window: fixed-length fragment size (for classic Rosetta: choose 9)
1478 @type window: int
1479
1480 @return: fixed-length fragment library
1481 @rtype: L{RosettaFragmentMap}
1482 """
1483
1484 frags = []
1485
1486 for f in fragments:
1487 for qs in range(f.qstart, f.qend - window + 1):
1488 frags.append(f.subregion(qs, qs + window - 1))
1489
1490 return self.rosetta.RosettaFragmentMap(frags)
1491
1493 """
1494 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments).
1495 The regions to be complemented are determined by calculating the confidence
1496 at each residue (by filtering).
1497
1498
1499 @param target: target protein
1500 @type target: L{Target}
1501 @param filling: additional fragments to place in the low-conf regions
1502 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment}
1503 @param threshold: confidence threshold
1504 @type threshold: float
1505
1506 @return: complemented fragment library
1507 @rtype: L{RosettaFragmentMap}
1508 """
1509
1510 fragmap = self.make_fragset(target)
1511 covered = set()
1512
1513 for r in target.residues:
1514
1515 if r.assignments.length == 0:
1516 if callback:
1517 callback(ResidueEventInfo(r.native, gap=True))
1518 continue
1519
1520 cluster = r.filter()
1521 if cluster is None:
1522 if callback:
1523 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1524 continue
1525
1526 if cluster.confidence >= threshold:
1527 covered.add(r.native.rank)
1528 confident = True
1529 else:
1530 confident = False
1531
1532 if callback:
1533 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, confident))
1534
1535 for r in target.residues:
1536 if r.native.rank not in covered: # true for gaps and low-conf residues
1537 fragmap.mark_unconfident(r.native.rank)
1538
1539 for frag in filling:
1540 fragmap.complement(frag)
1541
1542 return fragmap
1543
1545 """
1546 Builed a filtered fragment library (by clustering), containing only
1547 representative fragments (cluster centroids).
1548
1549 @param target: target protein
1550 @type target: L{Target}
1551 @param extend: if True, pick alternative reps if available
1552 @type extend: bool
1553
1554 @return: filtered fragment library
1555 @rtype: L{RosettaFragmentMap}
1556 """
1557
1558 fragments = []
1559
1560 for r in target.residues:
1561 if r.assignments.length == 0:
1562 if callback:
1563 callback(ResidueEventInfo(r.native, gap=True))
1564 continue
1565
1566 cluster = r.filter(extend=extend)
1567 if cluster is None:
1568 if callback:
1569 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1570 continue
1571
1572 if extend and cluster.has_alternative:
1573 best = cluster.alternative
1574 else:
1575 best = cluster.centroid
1576
1577 fragment = self.rosetta.RosettaFragment.from_object(best)
1578 fragments.append(fragment)
1579 if callback:
1580 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, rep=cluster.centroid))
1581
1582 fragments.sort()
1583 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1584
1586 """
1587 Mix fragments from multiple libraries.
1588
1589 @type fragsets: L{RosettaFragmentMap}
1590 @return: mixed fragment library
1591 @rtype: L{RosettaFragmentMap}
1592 """
1593
1594 fragments = []
1595 length = 0
1596
1597 for fragset in fragsets:
1598 if fragset._length > length:
1599 length = fragset._length
1600
1601 for fragment in fragset:
1602 fragments.append(fragment)
1603
1604 return self.rosetta.RosettaFragmentMap(fragments, length)
1605
1608
1610
1611 FACTORY = None
1612 DSN = None
1613
1615
1616 self.factory = factory or self.__class__.FACTORY
1617 self.cs = dsn or self.__class__.DSN
1618 self.connection = None
1619 self.cursor = None
1620
1622
1623 self.connection = self.factory(self.cs)
1624 try:
1625 self.cursor = self.connection.cursor()
1626 except:
1627 self.connection.close()
1628 raise
1629 return self
1630
1638
1640
1641 self._connection = None
1642
1643 self._parser = StructureParser
1644 self._pdb = FileSystemStructureProvider(pdb_paths)
1645 self._factory = factory
1646
1647 try:
1648 import psycopg2.extras
1649 except ImportError:
1650 raise RuntimeError('Please install the psycopg2 module first')
1651
1652 if connection_string is None:
1653 connection_string = self.connection_string()
1654
1655 BenchmarkAdapter.Connection.FACTORY = psycopg2.extras.DictConnection
1656 BenchmarkAdapter.Connection.DSN = connection_string
1657
1658 @staticmethod
1660
1661 fields = ['dbname={0}'.format(database)]
1662
1663 if host:
1664 fields.append('host={0}'.format(host))
1665 if username:
1666 fields.append('user={0}'.format(username))
1667 fields.append('password={0}'.format(password))
1668
1669 return ' '.join(fields)
1670
1672
1673 with BenchmarkAdapter.Connection() as db:
1674
1675 db.cursor.callproc('reporting."GetTargets"', (benchmark_id,))
1676 return db.cursor.fetchall()
1677
1679
1680 with BenchmarkAdapter.Connection() as db:
1681
1682 db.cursor.callproc('reporting."GetTargetDetails"', (target_id,))
1683 return db.cursor.fetchall()
1684
1686
1687 with BenchmarkAdapter.Connection() as db:
1688
1689 db.cursor.callproc('reporting."GetAssignments"', (target_id, type))
1690 return db.cursor.fetchall()
1691
1693
1694 with BenchmarkAdapter.Connection() as db:
1695
1696 db.cursor.callproc('reporting."GetTargetSecStructureAssignments2"', (target_id, type))
1697 return db.cursor.fetchall()
1698
1700
1701 with BenchmarkAdapter.Connection() as db:
1702
1703 db.cursor.callproc('reporting."GetScores"', (benchmark_id, type))
1704 return db.cursor.fetchall()
1705
1707
1708 with BenchmarkAdapter.Connection() as db:
1709
1710 db.cursor.callproc('reporting."GetCentroids"', (benchmark_id,))
1711 return db.cursor.fetchall()
1712
1714
1715 pdbfile = self._pdb.find(accession)
1716
1717 if not pdbfile and chain:
1718 pdbfile = self._pdb.find(accession + chain)
1719
1720 if not pdbfile:
1721 raise IOError('{0} not found here: {1}'.format(accession, self._pdb))
1722
1723 return self._parser(pdbfile).parse_structure()
1724
1726
1727 info = self.target_details(target_id)
1728 if not info:
1729 raise ValueError('No such Target ID in the database: {0}'.format(target_id))
1730 row = info[0]
1731
1732 id = row["Accession"]
1733 length = float(row["Length"])
1734 overlap = float(row["MaxOverlap"]) / (length or 1.)
1735
1736 native = self.structure(id[:4], id[4]).chains[id[4]]
1737 target = self._factory.target(id, length, native.residues, overlap)
1738
1739 source = None
1740
1741 for row in self.assignments(target_id, type):
1742
1743 src_accession = row['Source'][:4]
1744 src_chain = row['Source'][4]
1745
1746 if source is None or source.accession != src_accession:
1747 try:
1748 source = self.structure(src_accession, src_chain)
1749 except (IOError, ValueError) as ex:
1750 target.errors.append(ex)
1751 continue
1752
1753 if src_chain == '_':
1754 frag_chain = source.first_chain
1755 else:
1756 frag_chain = source.chains[src_chain]
1757 if not frag_chain.has_torsion:
1758 frag_chain.compute_torsion()
1759
1760 fragment = self._factory.assignment(
1761 source=frag_chain,
1762 start=row['SourceStart'],
1763 end=row['SourceEnd'],
1764 id=row['FragmentName'],
1765 qstart=row['Start'],
1766 qend=row['End'],
1767 probability=row['Probability'],
1768 score=row['Score'],
1769 neff=row['Neff'],
1770 rmsd=row['RMSD'],
1771 tm_score=row['TMScore'],
1772 internal_id=row['InternalID'])
1773
1774 target.assign(fragment)
1775
1776 if ss:
1777 self._attach_sec_structure(target, target_id, type)
1778
1779 return target
1780
1782
1783 ss = {}
1784
1785 for row in self.assignments_sec_structure(target_id, type):
1786 frag_id, state = row["AssignmentID"], row["DSSP"]
1787 if row[frag_id] not in ss:
1788 ss[frag_id] = []
1789
1790 ss[frag_id].append(state)
1791
1792 for a in target.matches:
1793 if a.internal_id in ss:
1794 dssp = ''.join(ss[a.internal_id])
1795 a.secondary_structure = dssp
1796
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Jul 4 20:19:09 2017 | http://epydoc.sourceforge.net |