1 """
2 APIs for working with protein structure fragments and libraries.
3
4 This package contains the nuts and bolts of HHfrag. Everything here revolves
5 around the L{Target} class, which describes a protein structure prediction
6 target. One typically assigns fragments (L{Assignment}s) to the target and then
7 builds a fragment library with L{RosettaFragsetFactory}.
8 """
9
10 import os
11 import numpy
12
13 import csb.io
14 import csb.core
15 import csb.bio.utils
16 import csb.bio.structure
17 import csb.bio.sequence
18
19 from csb.bio.structure import SecondaryStructure
20 from csb.bio.io.wwpdb import FileSystemStructureProvider, StructureParser
29
35
36 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329,
37 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662,
38 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401,
39 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637,
40 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163,
41 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028,
42 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550,
43 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840,
44 53: 11.831245255954073, 54: 11.918975893263905 }
47 """
48 Base class, representing a match between a fragment and its target.
49 """
50
51 - def __init__(self, id, qstart, qend, probability, rmsd, tm_score, qlength):
60
61 @property
64
65 @property
68
69 @property
72
73 @property
76
77 @property
80
81 @property
84
85 @property
87 return self._probability
88
89 @property
92
93 @property
95 raise NotImplementedError()
96
97 @property
99 raise NotImplementedError()
100
101 @property
103 raise NotImplementedError()
104
107 """
108 Fragment-based phi/psi angles predictor.
109
110 @param target: target protein, containing fragment assignments
111 @type target: L{Target}
112 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering
113 @type threshold: float
114 @param extend: pick alternative, longer cluster reps, if possible
115 @type extend: bool
116 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step
117 will be performed on demand (the first time C{predictor.compute()} is invoked)
118
119 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long
120 time. Subsequent calls will be very fast.
121 """
122
123 - def __init__(self, target, threshold=1.5, extend=False, init=False):
124
125 if not isinstance(target, Target):
126 raise TypeError(target)
127 if target.matches.length == 0:
128 raise ValueError('This target has no fragment assignments')
129
130 self._target = target
131 self._threshold = float(threshold)
132 self._extend = bool(extend)
133
134 self._initialized = False
135 self._reps = {}
136 self._clusters = {}
137
138 if init:
139 self.init()
140
141 @property
144
145 @property
147 return self._threshold
148
149 @property
152
173
189
197
220
253
255 """
256 Filter the current fragment map and create a new, completely flat,
257 non-overlapping map built from centroids, assigned iteratively by
258 decreasing confidence. Centroids with lower confidence which overlap
259 with previously assigned centroids will be trimmed to fill existing
260 gaps only.
261
262 @return: L{TorsionPredictionInfo} instances, one for each target residue
263 @rtype: tuple of L{TorsionPredictionInfo}
264 """
265
266 if not self._initialized:
267 self.init()
268
269 prediction = []
270 slots = set(range(1, self.target.length + 1))
271
272 reps = list(self._reps.values())
273 reps.sort(key=lambda i: i.confidence, reverse=True)
274
275 for rep in reps:
276
277 for rank in range(rep.centroid.qstart, rep.centroid.qend + 1):
278 if rank in slots:
279 torsion = rep.centroid.torsion_at(rank, rank)[0]
280 ss = rep.centroid.sec_structure_at(rank, rank)[0]
281 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
282
283 prediction.append(info)
284 slots.remove(rank)
285
286 for rank in slots:
287 prediction.append(TorsionPredictionInfo(rank, 0, None))
288
289 prediction.sort(key=lambda i: i.rank)
290 return tuple(prediction)
291
293 """
294 Extract all torsion angles coming from all fragments, which had survived
295 the filtering and cover residue C{#rank}.
296
297 @param rank: target residue rank
298 @type rank: int
299
300 @return: all L{TorsionAngles} for a cluster at the specified residue
301 @rtype: tuple of L{TorsionAngles}
302 """
303
304 if not self._initialized:
305 self.init()
306 if rank not in self._clusters:
307 return tuple()
308
309 angles = []
310
311 for node in self._clusters[rank]:
312 fragment = node.fragment
313 torsion = fragment.torsion_at(rank, rank)[0]
314 angles.append(torsion)
315
316 return tuple(angles)
317
320 """
321 Struct container for a single torsion angle prediction.
322
323 @param rank: target residue rank
324 @type rank: int
325 @param confidence: confidence of prediction
326 @type confidence: float
327 @param torsion: assigned phi/psi/omega angles
328 @type torsion: L{TorsionAngles}
329 @param dssp: assigned secondary structure
330 @type dssp: L{SecondaryStructureElement}
331 @param primary: if True, designates that the assigned angles are extracted
332 from the L{ClusterRep} at residue C{#rank}; otherwise: the
333 angles are coming from another, overlapping L{ClusterRep}
334
335 """
336
337 - def __init__(self, rank, confidence, torsion, dssp, primary=False):
344
351
353 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
354
357
369
380
381 -class Target(csb.core.AbstractNIContainer):
382 """
383 Represents a protein structure prediction target.
384
385 @param id: target sequence ID, in PDB accnC format
386 @type id: str
387 @param length: total target sequence length
388 @type length: int
389 @param residues: a list, containing target's residues. See also
390 L{Target.from_sequence}
391 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s
392 """
393
409
410 @staticmethod
431
432 @staticmethod
445
446 @staticmethod
451
452 @property
454 return self._residues
455
456 @property
459
460 @property
463
464 @property
466 return self._accession
467
468 @property
470 return self._chain_id
471
472 @property
475
476 @property
479
480 @property
483
484 @property
486 return self._assignments
487
488 @property
490 return self._residues
491
507
509 """
510 Assign a bunch of fragments at once.
511 @type fragments: iterable of L{Assignment}s
512 """
513 for frag in fragments:
514 self.assign(frag)
515
516 - def filter(self, threshold=1.5, extend=False):
517 """
518 Filter the current fragment map using a L{FragmentCluster}.
519
520 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
521 @type threshold: float
522 @param extend: pick extended alternatives where possible (default=False)
523 @type extend: bool
524
525 @return: a new target, containing only cluster centroids/reps
526 @rtype: L{Target}
527 """
528
529 target = self.clone()
530
531 for residue in self.residues:
532 rep = residue.filter(threshold=threshold, extend=extend)
533
534 if rep is not None:
535 target.assign(rep.centroid)
536
537 return target
538
548
550
551 - def __init__(self, id, length, residues, overlap=None):
555
566
571
573 """
574 Wrapper around L{Target}'s native residues. Decorates them with additional,
575 fragment-related methods.
576
577 @type native_residue: L{csb.bio.structure.ProteinResidue}
578 """
579
585
586 @property
589
590 @property
593
594 @property
596 return self._assignments
597
598 - def assign(self, assignment_info):
599 self._assignments._append_item(assignment_info)
600
602 """
603 Filter all fragments, covering this position in the L{Target} using a
604 L{FragmentCluster}.
605
606 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD})
607 @type method: str
608 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
609 @type threshold: float
610 @param extend: pick extended alternative where possible (default=False)
611 @type extend: bool
612
613 @return: cluster's representative (if converged) or None
614 @rtype: L{ClusterRep} or None
615 """
616
617 try:
618 nodes = []
619 for ai in self.assignments:
620 node = ClusterNode.create(ai.fragment, method, extend)
621 nodes.append(node)
622
623 cluster = FragmentCluster(nodes, threshold=threshold)
624
625 center = cluster.shrink(minitems=0)
626 if center.has_alternative:
627 center.exchange()
628
629 return center
630
631 except (ClusterExhaustedError, ClusterDivergingError):
632 return None
633
652
654 """
655 @return: the longest fragment, covering the current position
656 @rtype: L{Assignment}
657 """
658 best = None
659
660 for q in self.assignments:
661 if best is None or (q.fragment.length > best.length):
662 best = q.fragment
663
664 return best
665
667 """
668 @return: the residue-wise precision of the fragment library at the
669 current position (percentage).
670
671 @param threshold: true-positive RMSD cutoff (default=1.5)
672 @type threshold: float
673 @rtype: float
674 """
675
676 if self.assignments.length < 1:
677 return None
678 else:
679 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold]
680 pos = len(positive) * 100.0 / self.assignments.length
681
682 return pos
683
702
705
714
715 @property
717 return self._assignment.backbone[self._relrank]
718
719 @property
721 return self._assignment
722
724 """
725 Represents a match between a fragment and its target.
726
727 @param source: source structure (must have torsion angles precomputed)
728 @type source: L{csb.bio.structure.Chain}
729 @param start: start position in C{source} (rank)
730 @type start: int
731 @param end: end position in C{source} (rank)
732 @type end: int
733 @param id: fragment ID
734 @type id: str
735 @param qstart: start position in target (rank)
736 @type qstart: int
737 @param qend: end position in target (rank)
738 @type qend: int
739 @param probability: probability of assignment
740 @type probability: float
741 @param rmsd: RMSD of the fragment, compared to target's native structure
742 @type rmsd: float
743 """
744
745 - def __init__(self, source, start, end, qstart, qend, id=None, probability=None, rmsd=None,
746 tm_score=None, score=None, neff=None, internal_id=None):
747
748 assert source.has_torsion
749 sub = source.subregion(start, end, clone=True)
750 try:
751 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues]
752 except csb.core.ItemNotFoundError:
753 raise csb.bio.structure.Broken3DStructureError()
754 torsion = [r.torsion.copy() for r in sub.residues]
755
756 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray)
757 self._torsion = torsion
758 self._sequence = sub.sequence
759
760 self._source_id = source.accession[:4] + source.id
761 self._start = start
762 self._end = end
763
764 self._score = score
765 self._neff = neff
766 self._ss = None
767
768 self.internal_id = internal_id
769
770 if id is None:
771 id = "{0}:{1}-{2}".format(self.source_id, self.start, self.end)
772
773 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
774
775 self._ss = SecondaryStructure('-' * self.length)
776
777 @staticmethod
800
801 @property
804
805 @property
807 return self._sequence
808
809 @property
812
813 @property
815 return self._source_id
816
817 @property
820
821 @property
824
825 @property
828
829 @property
832
833 @property
836 @secondary_structure.setter
845
855
860
862 """
863 @return: True if the fragment is centered around position=C{rank}
864 @rtype: bool
865 """
866
867 if self.qstart < rank < self.qend:
868 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1):
869 return True
870
871 return False
872
874 """
875 @return: the CA coordinates of the fragment at the specified subregion.
876 @rtype: list
877 """
878
879 self._check_range(qstart, qend)
880
881 relstart = qstart - self.qstart
882 relend = qend - self.qstart + 1
883
884 return self.backbone[relstart : relend]
885
887 """
888 @return: torsion angles of the fragment at the specified subregion
889 @rtype: list
890 """
891 self._check_range(qstart, qend)
892
893 relstart = qstart - self.qstart
894 relend = qend - self.qstart + 1
895
896 return self.torsion[relstart : relend]
897
909
911 """
912 @type other: L{Assignment}
913 @return: target positions, covered by both C{self} and C{other}
914 @rtype: set of int
915 """
916
917 qranks = set(range(self.qstart, self.qend + 1))
918 sranks = set(range(other.qstart, other.qend + 1))
919
920 return qranks.intersection(sranks)
921
922 - def rmsd_to(self, other, min_overlap=5):
923 """
924 @return: the CA RMSD between C{self} and C{other}.
925
926 @param other: another fragment
927 @type other: L{Assignment}
928 @param min_overlap: require at least that number of overlapping residues
929 (return None if not satisfied)
930 @type min_overlap: int
931
932 @rtype: float
933 """
934 if self is other:
935 return 0
936
937 common = self.overlap(other)
938
939 if len(common) >= min_overlap:
940
941 qstart, qend = min(common), max(common)
942
943 q = self.backbone_at(qstart, qend)
944 s = other.backbone_at(qstart, qend)
945
946 if len(q) > 0 and len(s) > 0:
947 return csb.bio.utils.rmsd(numpy.array(q), numpy.array(s))
948
949 return None
950
951 - def nrmsd_to(self, other, min_overlap=5):
952 """
953 @return: the normalized CA RMSD between C{self} and C{other}.
954
955 @param other: another fragment
956 @type other: L{Assignment}
957 @param min_overlap: require at least that number of overlapping residues
958 (return None if not satisfied)
959 @type min_overlap: int
960
961 @rtype: float
962 """
963
964 common = self.overlap(other)
965
966 if len(common) >= min_overlap:
967
968 qstart, qend = min(common), max(common)
969
970 q = self.backbone_at(qstart, qend)
971 s = other.backbone_at(qstart, qend)
972
973 if len(q) > 0 and len(s) > 0:
974 return csb.bio.utils.rmsd(q, s) / RANDOM_RMSD[ len(common) ]
975
976 return None
977
978 - def mda_to(self, other, min_overlap=5):
979 """
980 @return: the MDA (maximum deviation in torsion angles) between
981 C{self} and C{other}.
982
983 @param other: another fragment
984 @type other: L{Assignment}
985 @param min_overlap: require at least that number of overlapping residues
986 (return None if not satisfied)
987 @type min_overlap: int
988
989 @rtype: float
990 """
991 common = self.overlap(other)
992
993 if len(common) >= min_overlap:
994
995 qstart, qend = min(common), max(common)
996
997 q = self.torsion_at(qstart, qend)
998 s = other.torsion_at(qstart, qend)
999
1000 if len(q) > 0 and len(s) > 0:
1001
1002 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:])
1003 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1])
1004
1005 return max(maxphi, maxpsi)
1006
1007 return None
1008
1011
1012 BIT_SCORE_THRESHOLD = 1.1
1013
1014 - def __init__(self, source, start, end, qstart, qend, window, score, rmsd):
1015
1016 self._window = window
1017
1018 super(ChemShiftAssignment, self).__init__(
1019 source, start, end, qstart, qend, id=None, probability=1.0,
1020 rmsd=rmsd, tm_score=None, score=score, neff=None, internal_id=None)
1021
1022 @property
1025
1028
1031
1034
1036 """
1037 Provides clustering/filtering of the fragments, covering a common residue
1038 in the target. Clustering is done via iterative shrinking of the cluster.
1039 At each iteration, node rejection (deletion) is attempted for each node. The
1040 node rejection, causing the most significant drop in the average pairwise
1041 distance (RMSD) in the cluster, is retained. This procedure is repeated
1042 until: 1) the average pairwise RMSD drops below the C{threshold} (converged),
1043 2) the cluster gets exhausted or 3) node rejection no longer
1044 causes a drop in the average distance (not converging).
1045
1046 @param items: cluster members
1047 @type items: iterable of L{ClusterNode}s
1048 @param threshold: RMSD threshold; continue shrinking until the mean distance
1049 drops below this value (default=1.5)
1050 @type threshold: float
1051 @param connectedness: when calculating centroids, consider only nodes
1052 connected to at least c% of all surviving vertices
1053 (default=0.5)
1054 @type connectedness: float
1055 """
1056
1057 MIN_LENGTH = 6
1058
1059 - def __init__(self, items, threshold=1.5, connectedness=0.5):
1096
1097 @property
1099 return len(self._items)
1100
1101 @property
1103 return tuple(self._items)
1104
1105 @property
1107 return tuple(i.fragment for i in self._items)
1108
1109 @property
1111 return self._threshold
1112 @threshold.setter
1114 self._threshold = float(value)
1115
1116 @property
1118 return self._connectedness
1119
1121
1122 d = []
1123
1124 for i in self._matrix:
1125 if skip is i:
1126 continue
1127
1128 for j in self._matrix[i]:
1129 if skip is not j:
1130 d.append(self._matrix[i][j])
1131
1132 return d
1133
1135
1136 if j in self._matrix[i]:
1137 return self._matrix[i][j]
1138 else:
1139 return None
1140
1141 - def mean(self, skip=None):
1142 """
1143 @return: the current mean distance in the cluster
1144 @rtype: float
1145 """
1146 if self._edges == 0:
1147 raise ClusterExhaustedError()
1148
1149 if not skip:
1150 return float(self._weight) / self._edges
1151
1152 else:
1153 weight = self._weight - skip.weight
1154 edges = self._edges - len(self._matrix[skip])
1155
1156 if edges < 1:
1157 return 0
1158 else:
1159 return float(weight) / edges
1160
1162 """
1163 @return: the current representative fragment
1164 @rtype: L{ClusterRep}
1165
1166 @note: the cluster rep is the node with the lowest average distance
1167 to all other nodes. If a fixed fragment exists, structurally similar
1168 to the rep, but longer, this fragment may be suggested as an alternative
1169 (see also L{ClusterRep}).
1170 """
1171
1172 alt = None
1173 cen = None
1174 avg = None
1175
1176 for i in self._matrix:
1177 edges = len(self._matrix[i]) or (1.0 / self.count)
1178 curravg = float(i.weight) / edges
1179 conn = len(self._matrix[i]) / float(self.count)
1180
1181 if avg is None or (curravg < avg and conn >= self.connectedness):
1182 avg = curravg
1183 cen = i
1184 elif curravg == avg:
1185 if i.fragment.length > cen.fragment.length:
1186 cen = i
1187
1188 d = self._distances()
1189 mean = numpy.mean(d)
1190 cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
1191
1192 for i in self._matrix:
1193 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length:
1194 distance = self._distance(i, cen)
1195 if distance is not None and distance < 0.5 * self.threshold:
1196 if alt is None or alt.fragment.length < i.fragment.length:
1197 alt = i
1198
1199 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt,
1200 rejections=(self._initcount - self.count))
1201
1203 """
1204 Remove C{item} from the cluster.
1205
1206 @type item: L{ClusterNode}
1207 @raise ClusterExhaustedError: if this is the last remaining item
1208 """
1209 if self.count == 1:
1210 raise ClusterExhaustedError()
1211
1212 assert not item.fixed
1213
1214 for i in self._matrix:
1215 if item in self._matrix[i]:
1216 distance = self._matrix[i][item]
1217 self._weight -= distance
1218 i.weight -= distance
1219
1220 del self._matrix[i][item]
1221 self._edges -= 1
1222
1223 del self._matrix[item]
1224 self._items.remove(item)
1225
1227 """
1228 Shrink the cluster by a single node.
1229
1230 @return: True on successful shrink, False otherwise (e.g. if
1231 already converged)
1232 @rtype: bool
1233 @raise ClusterExhaustedError: if exhausted
1234 @raise ClusterDivergingError: if not converging
1235 """
1236
1237 mean = self.mean()
1238 if mean <= self.threshold or self.count == 1:
1239 return False
1240
1241 m = {}
1242
1243 for i in self._matrix:
1244 if not i.fixed:
1245 newmean = self.mean(skip=i)
1246 m[newmean] = i
1247
1248 if len(m) == 0:
1249 raise ClusterExhaustedError()
1250
1251 newmean = min(m)
1252
1253 if newmean > mean:
1254 raise ClusterDivergingError()
1255 elif newmean < mean:
1256 junk = m[newmean]
1257 self.reject(junk)
1258 return True
1259 else:
1260 return False
1261
1262 - def shrink(self, minitems=2):
1263 """
1264 Start automatic shrinking.
1265
1266 @param minitems: absolute minimum of the number of nodes in the cluster
1267 @type minitems: int
1268
1269 @return: cluster's representative: the node with the lowest average
1270 distance to all other nodes in the cluster
1271 @rtype: L{ClusterRep}
1272
1273 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and
1274 still not converged
1275 """
1276
1277 if self.count > minitems:
1278
1279 while self.shrinkone():
1280 if self.count <= minitems:
1281 raise ClusterExhaustedError()
1282 else:
1283 raise ClusterExhaustedError()
1284
1285 return self.centroid()
1286
1288 """
1289 Cluster node.
1290
1291 @param fragment: fragment
1292 @type fragment: L{Assignment}
1293 @param distance: distance metric (a L{Metrics} member, default is RMSD)
1294 @type distance: str
1295 @param fixed: mark this node as fixed (cannot be rejected)
1296 @type fixed: bool
1297 """
1298
1299 FIXED = 0.7
1300
1301 @staticmethod
1313
1324
1326 """
1327 @return: the distance between self and another node
1328 @type other: L{ClusterNode}
1329 @rtype: float
1330 """
1331 return self._distance(other.fragment)
1332
1334 """
1335 Cluster's representative (centroid) node. This object carries the
1336 result of shrinking itself.
1337
1338 @param centroid: rep node
1339 @type centroid: L{ClusterNode}
1340 @param mean: current mean distance in the cluster
1341 @type mean: float
1342 @param consistency: percentage of pairwise distances below the RMSD C{threshold}
1343 @type consistency: float
1344 @param count: current number of nodes in the cluster
1345 @type count: int
1346 @param rejections: total number of rejections
1347 @type rejections: int
1348 @param alternative: suggested cluster rep alternative (e.g. structurally
1349 similar to the centroid, but longer)
1350 @type param:
1351 """
1352
1353 - def __init__(self, centroid, mean, consistency, count, rejections=0, alternative=None):
1366
1367 @property
1369 """
1370 Confidence of assignment: log10(count) * consistency
1371 """
1372 if self.count <= 0 or self.count is None or self.consistency is None:
1373 return 0
1374 else:
1375 return numpy.log10(self.count) * self.consistency
1376
1377 @property
1379 return self._centroid
1380
1381 @property
1383 return self._alternative
1384
1385 @property
1387 return self._alternative is not None
1388
1389 @property
1392
1393 @property
1395 return self._consistency
1396
1397 @property
1400
1401 @property
1403 return self._rejections
1404
1406 """
1407 If an alternative is available, swap the centroid and the alternative.
1408 """
1409
1410 if self._alternative is not None:
1411
1412 centroid = self._centroid
1413 self._centroid = self._alternative
1414 self._alternative = centroid
1415
1418
1419 - def __init__(self, residue, confidence=0, count=0, confident=True, gap=False, rep=None):
1427
1428 @property
1431
1432 @property
1435
1436 @property
1438 if self.rep:
1439 return self.rep.torsion_at(self.rank, self.rank)[0]
1440 else:
1441 return None
1442
1445 """
1446 Simplifies the construction of fragment libraries.
1447 """
1448
1452
1469
1471 """
1472 Build a fixed-length fragment library from a list of
1473 variable-length L{Assignment}s.
1474
1475 @param fragments: source fragments
1476 @type fragments: iterable of L{RosettaFragment}s
1477 @param window: fixed-length fragment size (for classic Rosetta: choose 9)
1478 @type window: int
1479
1480 @return: fixed-length fragment library
1481 @rtype: L{RosettaFragmentMap}
1482 """
1483
1484 frags = []
1485
1486 for f in fragments:
1487 for qs in range(f.qstart, f.qend - window + 1):
1488 frags.append(f.subregion(qs, qs + window - 1))
1489
1490 return self.rosetta.RosettaFragmentMap(frags)
1491
1492 - def make_combined(self, target, filling, threshold=0.5, callback=None):
1493 """
1494 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments).
1495 The regions to be complemented are determined by calculating the confidence
1496 at each residue (by filtering).
1497
1498
1499 @param target: target protein
1500 @type target: L{Target}
1501 @param filling: additional fragments to place in the low-conf regions
1502 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment}
1503 @param threshold: confidence threshold
1504 @type threshold: float
1505
1506 @return: complemented fragment library
1507 @rtype: L{RosettaFragmentMap}
1508 """
1509
1510 fragmap = self.make_fragset(target)
1511 covered = set()
1512
1513 for r in target.residues:
1514
1515 if r.assignments.length == 0:
1516 if callback:
1517 callback(ResidueEventInfo(r.native, gap=True))
1518 continue
1519
1520 cluster = r.filter()
1521 if cluster is None:
1522 if callback:
1523 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1524 continue
1525
1526 if cluster.confidence >= threshold:
1527 covered.add(r.native.rank)
1528 confident = True
1529 else:
1530 confident = False
1531
1532 if callback:
1533 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, confident))
1534
1535 for r in target.residues:
1536 if r.native.rank not in covered:
1537 fragmap.mark_unconfident(r.native.rank)
1538
1539 for frag in filling:
1540 fragmap.complement(frag)
1541
1542 return fragmap
1543
1545 """
1546 Builed a filtered fragment library (by clustering), containing only
1547 representative fragments (cluster centroids).
1548
1549 @param target: target protein
1550 @type target: L{Target}
1551 @param extend: if True, pick alternative reps if available
1552 @type extend: bool
1553
1554 @return: filtered fragment library
1555 @rtype: L{RosettaFragmentMap}
1556 """
1557
1558 fragments = []
1559
1560 for r in target.residues:
1561 if r.assignments.length == 0:
1562 if callback:
1563 callback(ResidueEventInfo(r.native, gap=True))
1564 continue
1565
1566 cluster = r.filter(extend=extend)
1567 if cluster is None:
1568 if callback:
1569 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1570 continue
1571
1572 if extend and cluster.has_alternative:
1573 best = cluster.alternative
1574 else:
1575 best = cluster.centroid
1576
1577 fragment = self.rosetta.RosettaFragment.from_object(best)
1578 fragments.append(fragment)
1579 if callback:
1580 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, rep=cluster.centroid))
1581
1582 fragments.sort()
1583 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1584
1585 - def mix(self, *fragsets):
1605
1608
1610
1611 FACTORY = None
1612 DSN = None
1613
1614 - def __init__(self, factory=None, dsn=None):
1615
1616 self.factory = factory or self.__class__.FACTORY
1617 self.cs = dsn or self.__class__.DSN
1618 self.connection = None
1619 self.cursor = None
1620
1622
1623 self.connection = self.factory(self.cs)
1624 try:
1625 self.cursor = self.connection.cursor()
1626 except:
1627 self.connection.close()
1628 raise
1629 return self
1630
1632 try:
1633 if not self.cursor.closed:
1634 self.cursor.close()
1635 finally:
1636 if not self.connection.closed:
1637 self.connection.close()
1638
1657
1658 @staticmethod
1659 - def connection_string(database='FragmentBenchmarks', host='', username='', password=''):
1670
1677
1684
1691
1698
1699 - def scores(self, benchmark_id, type):
1705
1712
1713 - def structure(self, accession, chain=None):
1724
1725 - def prediction(self, target_id, type, ss=False):
1726
1727 info = self.target_details(target_id)
1728 if not info:
1729 raise ValueError('No such Target ID in the database: {0}'.format(target_id))
1730 row = info[0]
1731
1732 id = row["Accession"]
1733 length = float(row["Length"])
1734 overlap = float(row["MaxOverlap"]) / (length or 1.)
1735
1736 native = self.structure(id[:4], id[4]).chains[id[4]]
1737 target = self._factory.target(id, length, native.residues, overlap)
1738
1739 source = None
1740
1741 for row in self.assignments(target_id, type):
1742
1743 src_accession = row['Source'][:4]
1744 src_chain = row['Source'][4]
1745
1746 if source is None or source.accession != src_accession:
1747 try:
1748 source = self.structure(src_accession, src_chain)
1749 except (IOError, ValueError) as ex:
1750 target.errors.append(ex)
1751 continue
1752
1753 if src_chain == '_':
1754 frag_chain = source.first_chain
1755 else:
1756 frag_chain = source.chains[src_chain]
1757 if not frag_chain.has_torsion:
1758 frag_chain.compute_torsion()
1759
1760 fragment = self._factory.assignment(
1761 source=frag_chain,
1762 start=row['SourceStart'],
1763 end=row['SourceEnd'],
1764 id=row['FragmentName'],
1765 qstart=row['Start'],
1766 qend=row['End'],
1767 probability=row['Probability'],
1768 score=row['Score'],
1769 neff=row['Neff'],
1770 rmsd=row['RMSD'],
1771 tm_score=row['TMScore'],
1772 internal_id=row['InternalID'])
1773
1774 target.assign(fragment)
1775
1776 if ss:
1777 self._attach_sec_structure(target, target_id, type)
1778
1779 return target
1780
1796