1 """
2 HHpred and Hidden Markov Model APIs.
3
4 This package defines the abstractions for working with HHpred's HMMs and
5 hit lists. L{ProfileHMM} is the most important object of this module.
6 It describes a sequence profile hidden Markov model in the way HHpred
7 sees this concept:
8
9 - a profile is composed of a list of L{HMMLayer}s, which contain a
10 number of L{State}s
11 - these L{States} can be of different types: Match, Insertion Deletion, etc.
12 - a profile contains a multiple alignment, from which it is derived
13 - this multiple alignment is an A3M (condensed) Alignment, where the
14 first sequence is a master sequence
15 - the match states in all layers correspond to the residues of the master
16 sequence
17
18 L{ProfileHMM} objects provide list-like access to their layers:
19
20 >>> hmm.layers[1]
21 <HMMLayer> # first layer: layer at master residue=1
22
23 Every layer provides dictionary-like access to its states:
24
25 >>> layer[States.Match]
26 <Match State>
27
28 and every state provides dictionary-like access to its transitions to other
29 states:
30
31 >>> state = hmm.layers[1][States.match]
32 >>> state.transitions[States.Insertion]
33 <Transition> # Match > Insertion
34 >>> transition.predecessor
35 <Match State> # source state
36 >>> transition.successor
37 <Insertion State> # target state
38
39 Whether this transition points to a state at the same (i) or the next layer
40 (i+1) depends on the semantics of the source and the target states.
41
42 Building HMMs from scratch is supported through a number of C{append} methods
43 at various places:
44
45 >>> layer = HMMLayer(...)
46 >>> layer.append(State(...))
47 >>> hmm.layers.append(layer)
48
49 See L{HMMLayersCollection}, L{HMMLayer}, L{EmissionTable} and L{TransitionTable}
50 for details.
51 """
52
53 import sys
54 import math
55
56 import csb.core
57 import csb.io
58 import csb.bio.structure as structure
59 import csb.bio.sequence as sequence
60
61 from csb.core import Enum
66
69
72
75
78
81
84
87
88
89 -class States(csb.core.enum):
94
96 """
97 Enumeration of HMM emission and transition score units
98 """
99 LogScales='LogScales'; Probability='Probability'
100
101 BACKGROUND = [ 0.076627178753322270, 0.018866884241976509, 0.053996136712517316,
102 0.059788009880742142, 0.034939432842683173, 0.075415244982547675,
103 0.036829356494115069, 0.050485048600600511, 0.059581159080509941,
104 0.099925728794059046, 0.021959667190729986, 0.040107059298840765,
105 0.045310838527464106, 0.032644867589507229, 0.051296350550656143,
106 0.046617000834108295, 0.071051060827250878, 0.072644631719882335,
107 0.012473412286822654, 0.039418044025976547 ]
108 """
109 Background amino acid probabilities
110 """
111
112 RELATIVE_SA = { 'A': 0.02, 'B': 0.14, 'C': 0.33, 'D': 0.55, 'E': 1.00 }
113 """
114 Relative solvent accessibility codes (upper bounds)
115 """
119 """
120 Describes a protein profile Hidden Markov Model.
121 Optional parameters:
122
123 @param units: defines the units of the transition and emission scores
124 @type units: L{ScoreUnits}
125 @param scale: the scaling factor used to convert emission/transition
126 probabilities
127 @type scale: float
128 @param logbase: the base of the logarithm used for scaling the emission and
129 transition probabilities
130 @type logbase: float
131 """
132
134
135 self._name = None
136 self._id = None
137 self._family = None
138 self._length = ProfileLength(0, 0)
139 self._alignment = None
140 self._consensus = None
141 self._dssp = None
142 self._dssp_solvent = None
143 self._psipred = None
144 self._effective_matches = None
145 self._evd = EVDParameters(None, None)
146 self._version = None
147 self._pseudocounts = False
148 self._emission_pseudocounts = False
149 self._transition_pseudocounts = False
150 self._layers = HMMLayersCollection()
151 self._start = State(States.Start)
152 self._start_insertion = None
153 self._end = State(States.End)
154 self._scale = scale
155 self._logbase = logbase
156 if units is None:
157 self._score_units = ScoreUnits.LogScales
158 else:
159 self._score_units = units
160
161 @property
163 """
164 Profile name (NAME)
165 @rtype: str
166 """
167 return self._name
168 @name.setter
169 - def name(self, value):
171
172 @property
174 """
175 Profile entry ID (FILE)
176 @rtype: str
177 """
178 return self._id
179 @id.setter
180 - def id(self, value):
181 self._id = str(value)
182
183 @property
185 """
186 Alternative entry ID (FAM)
187 @rtype: str
188 """
189 return self._family
190 @family.setter
192 self._family = str(value)
193
194 @property
196 """
197 Profile length
198 @rtype: L{ProfileLength}
199 """
200 return self._length
201 @length.setter
206
207 @property
209 """
210 Source multiple alignment
211 @rtype: L{A3MAlignment}
212 """
213 return self._alignment
214 @alignment.setter
219
220 @property
222 """
223 Consensus sequence
224 @rtype: L{AbstractSequence}
225 """
226 return self._consensus
227 @consensus.setter
232
233 @property
235 """
236 DSSP (calculated) secondary structure
237 @rtype: L{SecondaryStructure}
238 """
239 return self._dssp
240 @dssp.setter
241 - def dssp(self, value):
245
246 @property
248 """
249 Solvent accessibility values
250 @rtype: str
251 """
252 return self._dssp_solvent
253 @dssp_solvent.setter
255 self._dssp_solvent = str(value)
256
257 @property
259 """
260 PSIPRED (predicted) secondary structure
261 @rtype: L{SecondaryStructure}
262 """
263 return self._psipred
264 @psipred.setter
269
270 @property
272 """
273 Number of effective matches (NEFF)
274 """
275 return self._effective_matches
276 @effective_matches.setter
278 self._effective_matches = value
279
280 @property
282 """
283 Extreme-value distribution parameters (EVD)
284 @rtype: L{EVDParameters}
285 """
286 return self._evd
287 @evd.setter
288 - def evd(self, value):
292
293 @property
295 """
296 Format version number (HHsearch)
297 @rtype: str
298 """
299 return self._version
300 @version.setter
302 self._version = str(value)
303
304 @property
306 """
307 @rtype: bool
308 """
309 return self._pseudocounts
310 @pseudocounts.setter
312 self._pseudocounts = bool(value)
313
314 @property
316 """
317 @rtype: bool
318 """
319 return self._emission_pseudocounts
320 @emission_pseudocounts.setter
322 self._emission_pseudocounts = bool(value)
323
324 @property
326 """
327 @rtype: bool
328 """
329 return self._transition_pseudocounts
330 @transition_pseudocounts.setter
332 self._transition_pseudocounts = bool(value)
333
334 @property
336 """
337 List-like access to the HMM's layers
338 @rtype: L{HMMLayersCollection}
339 """
340 return self._layers
341
342 @property
344 """
345 Start state (at the start layer)
346 @rtype: L{State}
347 """
348 return self._start
349 @start.setter
355
356 @property
358 """
359 Insertion state at the start layer
360 @rtype: L{State}
361 """
362 return self._start_insertion
363 @start_insertion.setter
369
370 @property
372 """
373 Final state (at the end layer)
374 @rtype: L{State}
375 """
376 return self._end
377 @end.setter
378 - def end(self, value):
383
384 @property
386 """
387 Score scaling factor
388 @rtype: float
389 """
390 return self._scale
391
392 @property
394 """
395 Base of the logarithm used for score scaling
396 @rtype: float
397 """
398 return self._logbase
399
400 @property
402 """
403 Current score units
404 @rtype: L{ScoreUnits} member
405 """
406 return self._score_units
407
408 @property
417
418 @property
420 """
421 A list of layers including start and start_insertion
422 @rtype: list of L{HMMLayer}
423 """
424 complete_layers = []
425 first_layer = HMMLayer(rank=0, residue=None)
426 first_layer.append(self.start)
427 if self.start_insertion:
428 first_layer.append(self.start_insertion)
429 complete_layers.append(first_layer)
430 for layer in self.layers:
431 complete_layers.append(layer)
432
433 return complete_layers
434
435 @property
437 """
438 True if this profile contains structural data
439 @rtype: bool
440 """
441 has = False
442 for layer in self.layers:
443 if layer.residue.has_structure:
444 return True
445 return has
446
448 """
449 Serialize this HMM to a file.
450
451 @param file_name: target file name
452 @type file_name: str
453 """
454 rec = sys.getrecursionlimit()
455 sys.setrecursionlimit(10000)
456 csb.io.Pickle.dump(self, open(file_name, 'wb'))
457 sys.setrecursionlimit(rec)
458
459 @staticmethod
461 """
462 De-serialize an HMM from a file.
463
464 @param file_name: source file name (pickle)
465 @type file_name: str
466 """
467 rec = sys.getrecursionlimit()
468 sys.setrecursionlimit(10000)
469 try:
470 return csb.io.Pickle.load(open(file_name, 'rb'))
471 finally:
472 sys.setrecursionlimit(rec)
473
474 - def _convert(self, units, score, scale, logbase):
485
486 - def to_hmm(self, output_file=None, convert_scores=False):
515
517 """
518 Extract a sub-segment of the profile.
519
520 @param start: start layer of the segment (rank)
521 @type start: int
522 @param end: end layer of the segment (rank)
523 @type end: int
524
525 @return: a deepcopy of the extracted HMM segment
526 @rtype: L{ProfileHMMSegment}
527 """
528 return ProfileHMMSegment(self, start, end)
529
533
540
547
548 - def structure(self, chain_id=None, accession=None):
549 """
550 Extract the structural information from the HMM.
551
552 @param accession: defines the accession number of the structure
553 @type accession: str
554 @param chain_id: defines explicitly the chain identifier
555 @type chain_id: str
556
557 @return: a shallow L{Structure} wrapper around the residues in the HMM.
558 @rtype: L{Structure}
559 """
560 struct = structure.Structure(accession or self.id)
561 chain = self.chain(chain_id)
562 struct.chains.append(chain)
563
564 return struct
565
566 - def chain(self, chain_id=None):
586
588 """
589 Extract the emission scores of all match states in the profile.
590 The metric of the emission scores returned depends on the current
591 hmm.score_units setting - you may need to call hmm.convert_scores()
592 to adjust the hmm to your particular needs.
593
594 @return: a list of dictionaries; each dict key is a single amino acid
595 @rtype: list
596 """
597 profile = []
598
599 for layer in self.layers:
600 emission = {}
601
602 for aa in layer[States.Match].emission:
603 emission[str(aa)] = layer[States.Match].emission[aa] or 0.0
604 profile.append(emission)
605
606 return profile
607
609 """
610 Convert emission and transition scores to the specified units.
611
612 @param units: the target units for the conversion (a member of
613 L{ScoreUnits}).
614 @type units: L{csb.core.EnumItem}
615 @param method: if defined, implements the exact mathematical
616 transformation that will be applied. It must be a
617 function or lambda expression with the following
618 signature::
619 def (target_units, score, scale, logbase)
620
621 and it has to return the score converted to
622 C{target_units}. If method performs a conversion from
623 probabilities to scaled logs, you should also update
624 C{hmm.scale} and C{hmm.logbase}.
625 @type method: function, lambda
626 """
627
628 if self._score_units == units:
629 return
630
631 if method is not None:
632 convert = method
633 else:
634 convert = self._convert
635
636 for layer in self.layers:
637 for state_kind in layer:
638 state = layer[state_kind]
639 if not state.silent:
640 for residue in state.emission:
641 if state.emission[residue] is not None:
642 state.emission.update(residue, convert(
643 units, state.emission[residue],
644 self.scale, self.logbase))
645 for residue in state.background:
646 if state.background[residue] is not None:
647 state.background.update(residue, convert(
648 units, state.background[residue],
649 self.scale, self.logbase))
650 for tran_kind in state.transitions:
651 transition = state.transitions[tran_kind]
652 transition.probability = convert(units, transition.probability,
653 self.scale, self.logbase)
654
655
656
657 if self.start_insertion:
658 for t_it in self.start_insertion.transitions:
659 transition = self.start_insertion.transitions[t_it]
660 transition.probability = convert(units, transition.probability,
661 self.scale, self.logbase)
662
663 for residue in self.start_insertion.emission:
664 state = self.start_insertion
665 if state.emission[residue] is not None:
666 state.emission.update(residue,
667 convert(units, state.emission[residue], self.scale, self.logbase))
668 state.background.update(residue,
669 convert(units, state.background[residue], self.scale, self.logbase))
670
671 for tran_kind in self.start.transitions:
672 transition = self.start.transitions[tran_kind]
673 transition.probability = convert(units,
674 transition.probability, self.scale, self.logbase)
675
676
677 self._score_units = units
678
680 """
681 Compute the Log-sum-of-odds score between the emission tables of self
682 and other (Soeding 2004). If no observable Match state is found at a
683 given layer, the Insertion state is used instead.
684
685 @note: This is not a full implementation of the formula since only
686 emission vectors are involved in the computation and any transition
687 probabilities are ignored.
688
689 @param other: the subject HMM
690 @type other: L{ProfileHMM}
691
692 @return: emission log-sum-of-odds similarity between C{self} and
693 C{other}
694 @rtype: float
695
696 @raise ValueError: when self and other differ in their length, when the
697 score_units are not Probability, or when no
698 observable states are present
699 """
700 score = 1
701
702 if self.layers.length != other.layers.length or self.layers.length < 1:
703 raise ValueError('Both HMMs must have the same nonzero number of layers')
704 if self.score_units != ScoreUnits.Probability or \
705 other.score_units != ScoreUnits.Probability:
706 raise ValueError('Scores must be converted to probabilities first.')
707
708 for q_layer, s_layer in zip(self.layers, other.layers):
709
710 try:
711 if States.Match in q_layer and not q_layer[States.Match].silent:
712 q_state = q_layer[States.Match]
713 else:
714 q_state = q_layer[States.Insertion]
715
716 if States.Match in s_layer and not s_layer[States.Match].silent:
717 s_state = s_layer[States.Match]
718 else:
719 s_state = s_layer[States.Insertion]
720 except csb.core.ItemNotFoundError:
721 raise ValueError('Query and subject must contain observable states '
722 'at each layer')
723
724 emission_dotproduct = 0
725
726 for aa in q_state.emission:
727
728 q_emission = q_state.emission[aa] or sys.float_info.min
729 s_emission = s_state.emission[aa] or sys.float_info.min
730 emission_dotproduct += (q_emission * s_emission /
731 q_state.background[aa])
732
733 score *= emission_dotproduct
734
735 return math.log(score)
736
738 """
739 Attach references from each profile layer to the relevant DSSP secondary
740 structure element.
741 """
742 assert self.dssp is not None
743
744 for motif in self.dssp:
745 for i in range(motif.start, motif.end + 1):
746 self.layers[i].residue.secondary_structure = motif
747
750 """
751 Represents a segment (fragment) of a ProfileHMM.
752
753 @param hmm: source HMM
754 @type hmm: ProfileHMM
755 @param start: start layer of the segment (rank)
756 @type start: int
757 @param end: end layer of the segment (rank)
758 @type end: int
759
760 @raise ValueError: when start or end positions are out of range
761 """
762
764
765 if start < hmm.layers.start_index or start > hmm.layers.last_index:
766 raise IndexError('Start position {0} is out of range'.format(start))
767 if end < hmm.layers.start_index or end > hmm.layers.last_index:
768 raise IndexError('End position {0} is out of range'.format(end))
769
770
771
772 super(ProfileHMMSegment, self).__init__(units=hmm.score_units,
773 scale=hmm.scale, logbase=hmm.logbase)
774 self.id = hmm.id
775 self.family = hmm.family
776 self.name = hmm.name
777 self.pseudocounts = hmm.pseudocounts
778 self.evd = hmm.evd
779 self.version = hmm.version
780 self.source = hmm.id
781 self._source_start = start
782 self._source_end = end
783
784 if hmm.alignment:
785 self.alignment = hmm.alignment.hmm_subregion(start, end)
786 self.consensus = hmm.consensus.subregion(start, end)
787
788 layers = csb.core.deepcopy(hmm.layers[start : end + 1])
789 max_score = 1.0
790 if hmm.score_units != ScoreUnits.Probability:
791 max_score = hmm._convert(hmm.score_units,
792 max_score, hmm.scale, hmm.logbase)
793 self._build_graph(layers, max_score)
794
795 if hmm.dssp:
796 self.dssp = hmm.dssp.subregion(start, end)
797 self._assign_secstructure()
798 if hmm.psipred:
799 self.psipred = hmm.psipred.subregion(start, end)
800
801 self.length.layers = self.layers.length
802 self.length.matches = self.layers.length
803 self.effective_matches = sum([(l.effective_matches or 0.0) for l in self.layers]) / self.layers.length
804
805 @property
807 """
808 Start position of this segment in its source HMM
809 @rtype: int
810 """
811 return self._source_start
812
813 @property
815 """
816 End position of this segment in its source HMM
817 @rtype: int
818 """
819 return self._source_end
820
845
848 """
849 Represents a segment of the Match state emission probabilities of a L{ProfileHMM}.
850 Contains only Match states, connected with equal transition probabilities of 100%.
851 """
852
854
855 factory = StateFactory()
856
857 for rank, source_layer in enumerate(source_layers, start=1):
858
859 emission = source_layer[States.Match].emission
860 background = source_layer[States.Match].background
861
862 match = factory.create_match(emission, background)
863 match.rank = rank
864
865 layer = HMMLayer(rank, source_layer.residue)
866 layer.append(match)
867 self.layers.append(layer)
868
869 if rank == 1:
870 self.start.transitions.append(Transition(self.start, match, 1.0))
871 elif rank < len(source_layers):
872 prev_match = self.layers[rank - 1][States.Match]
873 prev_match.transitions.append(Transition(prev_match, match, 1.0))
874 elif rank == len(source_layers):
875 match.transitions.append(Transition(match, self.end, 1.0))
876 else:
877 assert False
878
910
911 @property
913 """
914 Start position of this segment in its source HMM
915 @rtype: int
916 """
917 return self._source_start
918
919 @property
921 """
922 End position of this segment in its source HMM
923 @rtype: int
924 """
925 return self._source_end
926
933
936
938 self.lamda = lamda
939 self.mu = mu
940
943
945 return (self.lamda is not None or self.mu is not None)
946
949 """
950 Represents a lookup table of emission probabilities. Provides dictionary-like
951 access:
952
953 >>> state.emission[ProteinAlphabet.ALA]
954 emission probability for ALA
955
956 @param emission: an initialization dictionary of emission probabilities
957 @type emission: dict
958 @param restrict: a list of residue types allowed for this emission table.
959 Defaults to the members of L{csb.bio.sequence.ProteinAlphabet}
960 @type restrict: list
961 """
962
965
966 - def append(self, residue, probability):
967 """
968 Append a new emission probability to the table.
969
970 @param residue: residue name (type) - a member of
971 L{csb.bio.sequence.ProteinAlphabet}
972 @type residue: L{csb.core.EnumItem}
973 @param probability: emission score
974 @type probability: float
975
976 @raise EmissionExistsError: if residue is already defined
977 """
978 if residue in self:
979 raise EmissionExistsError('Residue {0} is already defined.'.format(residue))
980
981 super(EmissionTable, self).append(residue, probability)
982
983 - def set(self, table):
984 """
985 Set the emission table using the dictionary provided in the argument.
986
987 @param table: the new emission table
988 @type table: dict
989 """
990 super(EmissionTable, self)._set(table)
991
992 - def update(self, residue, probability):
993 """
994 Update the emission C{probability} of a given emission C{residue}.
995
996 @param residue: name (type) of the residue to be updated
997 @type residue: L{csb.core.EnumItem}
998 @param probability: new emission score
999 @type probability: float
1000 """
1001 super(EmissionTable, self)._update({residue: probability})
1002
1005 """
1006 Represents a lookup table of transitions that are possible from within a given state.
1007
1008 Provides dictionary-like access, where dictionary keys are target states.
1009 These are members of the L{States} enumeration, e.g.:
1010
1011 >>> state.transitions[States.Match]
1012 transition info regarding transition from the current state to a Match state
1013 >>> state.transitions[States.Match].predecessor
1014 state
1015 >>> state.transitions[States.Match].successor
1016 the next match state
1017
1018 @param transitions: an initialization dictionary of target L{State}:L{Transition} pairs
1019 @type transitions: dict
1020 @param restrict: a list of target states allowed for this transition table.
1021 Defaults to the L{States} enum members
1022 @type restrict: list
1023 """
1024
1026 super(TransitionTable, self).__init__(transitions, restrict)
1027
1028 @property
1031
1032 - def append(self, transition):
1033 """
1034 Append a new C{transition} to the table.
1035
1036 @param transition: transition info
1037 @type transition: L{Transition}
1038
1039 @raise TransitionExistsError: when a transition to the same target state
1040 already exists for the current state
1041 """
1042
1043 if transition.successor.type in self:
1044 msg = 'Transition to a {0} state is already defined.'
1045 raise TransitionExistsError(msg.format(transition.successor.type))
1046
1047 super(TransitionTable, self).append(transition.successor.type, transition)
1048
1049 - def set(self, table):
1050 """
1051 Set the transition table using the dictionary provided in the argument.
1052
1053 @param table: the new transition table
1054 @type table: dict
1055 """
1056 super(TransitionTable, self)._set(table)
1057
1058 - def update(self, target_statekind, transition):
1059 """
1060 Update the information of a transition, which points to a target
1061 state of the specified L{States} kind.
1062
1063 @param target_statekind: the key of the transition to be updated
1064 @type target_statekind: L{csb.core.EnumItem}
1065 @param transition: new transition info object
1066 @type transition: L{Transition}
1067
1068 @raise ValueError: if I{transition.successor.type} differs from
1069 C{target_statekind}
1070 """
1071 if transition.successor.type != target_statekind:
1072 raise ValueError("Successor's type differs from the specified target state.")
1073
1074 super(TransitionTable, self)._update({target_statekind: transition})
1075
1078 """
1079 Provides consecutive, 1-based access to all of the layers in the profile.
1080 Each profile layer contains a catalog of available states at that index, e.g.:
1081
1082 >>> profile.layers[i]
1083 the catalog at profile layer i
1084 >>> profile.layers[i][States.Deletion]
1085 the deletion state at index i
1086
1087 @param layers: initialization list of L{HMMLayer}s
1088 @type layers: list
1089 """
1092
1093 @property
1096
1097 -class HMMLayer(csb.core.DictionaryContainer):
1098 """
1099 Provides a dictionary-like catalog of the available states at this layer.
1100 Lookup keys are members of the L{States} enumeration, e.g.:
1101
1102 >>> profile.layers[i][States.Deletion]
1103 the deletion state at layer number i
1104
1105 @param rank: layer's number
1106 @type rank: int
1107 @param residue: a representative L{ProteinResidue} that is associated with
1108 this layer
1109 @type residue: L{ProteinResidue}
1110 @param states: initialization dictionary of L{States}.Item:L{State} pairs
1111 @type states: dict
1112 """
1113 - def __init__(self, rank, residue, states=None):
1124
1125 @property
1128
1129 @property
1131 """
1132 Layer's position
1133 @rtype: int
1134 """
1135 return self._rank
1136 @rank.setter
1137 - def rank(self, value):
1138 self._rank = int(value)
1139
1140 @property
1142 """
1143 Representative residue
1144 @rtype: L{Residue}
1145 """
1146 return self._residue
1147 @residue.setter
1152
1153 @property
1155 """
1156 Number of effective matches at this layer
1157 @rtype: int
1158 """
1159 return self._effective_matches
1160 @effective_matches.setter
1162 self._effective_matches = value
1163
1164 @property
1166 """
1167 Number of effective insertions at this layer
1168 @rtype: int
1169 """
1170 return self._effective_insertions
1171 @effective_insertions.setter
1173 self._effective_insertions = value
1174
1175 @property
1177 """
1178 Number of effective deletions at this layer
1179 @rtype: int
1180 """
1181 return self._effective_deletions
1182 @effective_deletions.setter
1184 self._effective_deletions = value
1185
1187 """
1188 Append a new C{state} to the catalog.
1189
1190 @param state: the new state
1191 @type state: L{State}
1192
1193 @raise StateExistsError: when a state of the same type is already defined
1194 """
1195 if state.type in self:
1196 raise StateExistsError(
1197 'State {0} is already defined at this position.'.format(state.type))
1198
1199 super(HMMLayer, self).append(state.type, state)
1200
1201 - def update(self, state_kind, state):
1202 """
1203 Update the sate of the specified kind under the current layer.
1204
1205 @param state_kind: state type (key) - a member of L{States}
1206 @type state_kind: L{csb.core.EnumItem}
1207 @param state: the new state info
1208 @type state: L{State}
1209
1210 @raise ValueError: if state.type differs from state_kind
1211 """
1212 if state.type != state_kind:
1213 raise ValueError("State's type differs from the specified state_kind")
1214
1215 super(HMMLayer, self)._update({state_kind: state})
1216
1217
1218 -class State(object):
1219 """
1220 Describes a Hidden Markov Model state.
1221
1222 @param type: one of the L{States} enumeration values, e.g. States.Match
1223 @type type: L{csb.core.EnumItem}
1224 @param emit: a collection of emittable state names allowed for the state,
1225 e.g. the members of I{SequenceAlphabets.Protein}. If not defined,
1226 the state will be created as a silent (unobservable).
1227 @type emit: list
1228
1229 @raise ValueError: if type is not a member of the States enum
1230 """
1231
1245
1247 return "<HMM {0.type!r} State>".format(self)
1248
1249 @property
1251 """
1252 State type: one of the L{States}
1253 @rtype: enum item
1254 """
1255 return self._type
1256 @type.setter
1257 - def type(self, value):
1261
1262 @property
1264 return self._rank
1265 @rank.setter
1266 - def rank(self, value):
1267 self._rank = int(value)
1268
1269 @property
1271 """
1272 Lookup table with available transitions to other states
1273 @rtype: L{TransitionTable}
1274 """
1275 return self._transitions
1276
1277 @property
1279 """
1280 Lookup table with available emission probabilities
1281 @rtype: L{EmissionTable}
1282 """
1283 if self._emission is None:
1284 raise UnobservableStateError('Silent {0!r} state'.format(self.type))
1285 return self._emission
1286
1287 @property
1289 """
1290 Lookup table with background probabilities
1291 @rtype: L{EmissionTable}
1292 """
1293 return self._background
1294
1295 @property
1297 """
1298 Whether this state can emit something
1299 @rtype: bool
1300 """
1301 try:
1302 return self.emission is None
1303 except UnobservableStateError:
1304 return True
1305
1307 """
1308 Simplifies the construction of protein profile HMM states.
1309 """
1310
1313
1320
1327
1329 return State(States.Deletion)
1330
1333
1335 self.source_state = source.type
1336 self.target_state = target.type
1337
1339 return '{0}->{1}'.format(self.source_state, self.target_state)
1340
1342 """
1343 Describes a Hidden Markov Model transition between two states.
1344
1345 @param predecessor: source state
1346 @type predecessor: L{State}
1347 @param successor: target state
1348 @type successor: L{State}
1349 @param probability: transition score
1350 @type probability: float
1351 """
1352
1353 - def __init__(self, predecessor, successor, probability):
1364
1366 return '<HMM Transition: {0.type} {0.probability}>'.format(self)
1367
1368 @property
1370 """
1371 Transition source state
1372 @rtype: L{State}
1373 """
1374 return self._predecessor
1375
1376 @property
1378 """
1379 Transition target state
1380 @rtype: L{State}
1381 """
1382 return self._successor
1383
1384 @property
1386 """
1387 Transition score
1388 @rtype: float
1389 """
1390 return self._probability
1391 @probability.setter
1393 if not (value >=0):
1394 raise ValueError('Transition probability must be a positive number.')
1395 self._probability = float(value)
1396
1397 @property
1399 """
1400 Struct, containing information about the source and target state types
1401 @rtype: L{TransitionType}
1402 """
1403 return self._type
1404
1407 """
1408 Represents a query-template alignment in an HHpred result.
1409
1410 @param hit: relevant hit object
1411 @type param: L{HHpredHit}
1412 @param query: the query sequence in the alignment region, with gaps
1413 @type query: str
1414 @param subject: the subject sequence in the alignment region, with gaps
1415 @type subject: str
1416 """
1417
1418 GAP = sequence.ProteinAlphabet.GAP
1419
1420 - def __init__(self, hit, query, subject):
1431
1432 @property
1434 """
1435 Query sequence (with gaps)
1436 @rtype: str
1437 """
1438 return self.rows[1].sequence
1439
1440 @property
1442 """
1443 Subject sequence (with gaps)
1444 @rtype: str
1445 """
1446 return self.rows[2].sequence
1447
1448 @property
1450 """
1451 Find all ungapped query-subject segments in the alignment.
1452 Return a generator over all ungapped alignment segments, represented
1453 by L{HHpredHit} objects
1454
1455 @rtype: generator
1456 """
1457
1458 def make_segment(sstart, send, qstart, qend):
1459
1460 seg = HHpredHit(self._hit.rank, self._hit.id, sstart, send,
1461 qstart, qend, self._hit.probability, self._hit.qlength)
1462
1463 seg.slength = self._hit.slength
1464 seg.evalue = self._hit.evalue
1465 seg.pvalue = self._hit.pvalue
1466 seg.score = self._hit.score
1467 seg.ss_score = self._hit.ss_score
1468 seg.identity = self._hit.identity
1469 seg.similarity = self._hit.similarity
1470 seg.prob_sum = self._hit.prob_sum
1471
1472 return seg
1473
1474 in_segment = False
1475 qs = self._hit.qstart - 1
1476 ss = self._hit.start - 1
1477 qi, si = qs, ss
1478 qe, se = qs, ss
1479
1480 for q, s in zip(self.query, self.subject):
1481
1482 if q != HHpredHitAlignment.GAP:
1483 qi += 1
1484 if s != HHpredHitAlignment.GAP:
1485 si += 1
1486
1487 if HHpredHitAlignment.GAP in (q, s):
1488 if in_segment:
1489 yield make_segment(ss, se, qs, qe)
1490 in_segment = False
1491 qs, ss = 0, 0
1492 qe, se = 0, 0
1493 else:
1494 if not in_segment:
1495 in_segment = True
1496 qs, ss = qi, si
1497
1498 qe, se = qi, si
1499
1500 if in_segment:
1501 yield make_segment(ss, se, qs, qe)
1502
1510
1512 """
1513 Represents a single HHsearch hit.
1514
1515 @param rank: rank of the hit
1516 @type rank: int
1517 @param id: id of the hit
1518 @type id: str
1519 @param start: subject start
1520 @type start: int
1521 @param end: subject end
1522 @type end: int
1523 @param qstart: query start
1524 @type qstart: int
1525 @param qend: query end
1526 @type qend: int
1527 @param probability: probability of the hit
1528 @type probability: float
1529 @param qlength: length of the query
1530 @type qlength: int
1531 """
1532
1533 - def __init__(self, rank, id, start, end, qstart, qend, probability,
1534 qlength):
1535
1536 self._rank = None
1537 self._id = None
1538 self._start = None
1539 self._end = None
1540 self._qstart = None
1541 self._qend = None
1542 self._probability = None
1543 self._qlength = None
1544 self._alignment = None
1545
1546 self._slength = None
1547 self._evalue = None
1548 self._pvalue = None
1549 self._score = None
1550 self._ss_score = None
1551 self._identity = None
1552 self._similarity = None
1553 self._prob_sum = None
1554
1555
1556 self.rank = rank
1557 self.id = id
1558 self.start = start
1559 self.end = end
1560 self.qstart = qstart
1561 self.qend = qend
1562 self.probability = probability
1563 self.qlength = qlength
1564
1566 return "{0.id} {0.probability} {0.start}-{0.end}".format(self)
1567
1569 return "<HHpredHit: {0!s}>".format(self)
1570
1573
1575 """
1576 Return True if C{self} is completely identical to C{other} (same id, same start
1577 and end positions).
1578
1579 @param other: right-hand-term
1580 @type other: HHpredHit
1581
1582 @rtype: bool
1583 """
1584 return (self.id == other.id and self.start == other.start and self.end == other.end)
1585
1587 """
1588 Return True if C{self} is a superior to C{other} in terms of length
1589 and probability. These criteria are applied in the following order:
1590
1591 1. Length (the longer hit is better)
1592 2. Probability (if they have the same length, the one with the higher
1593 probability is better)
1594 3. Address (if they have the same length and probability, the one with
1595 higher memory ID wins; for purely practical reasons)
1596
1597 @param other: right-hand-term
1598 @type other: HHpredHit
1599
1600 @rtype: bool
1601 """
1602 if self.length > other.length:
1603 return True
1604 elif self.length == other.length:
1605 if self.probability > other.probability:
1606 return True
1607 elif self.probability == other.probability:
1608 if id(self) > id(other):
1609 return True
1610 return False
1611
1612 - def includes(self, other, tolerance=1):
1613 """
1614 Return True if C{other} overlaps with C{self}, that means C{other}
1615 is fully or partially included in C{self} when aligned over the query.
1616
1617 @param other: right-hand-term
1618 @type other: HHpredHit
1619 @param tolerance: allow partial overlaps for that number of residues at
1620 either end
1621 @type tolerance: int
1622
1623 @rtype: bool
1624 """
1625 if self.id == other.id:
1626 if other.start >= self.start:
1627 if (other.end - self.end) <= tolerance:
1628 return True
1629 elif other.end <= self.end:
1630 if (self.start - other.start) <= tolerance:
1631 return True
1632
1633 return False
1634
1636 """
1637 Add query/subject alignment to the hit.
1638
1639 @param query: the query sequence within the alignment region, with gaps
1640 @type query: str
1641 @param subject: the subject sequence within the alignment region, with
1642 gaps
1643 @type subject: str
1644 """
1645 self._alignment = HHpredHitAlignment(self, query, subject)
1646
1647 @property
1650 @rank.setter
1651 - def rank(self, value):
1657
1658 @property
1661 @id.setter
1662 - def id(self, value):
1668
1669 @property
1672 @start.setter
1673 - def start(self, value):
1679
1680 @property
1683 @end.setter
1684 - def end(self, value):
1690
1691 @property
1694 @qstart.setter
1701
1702 @property
1705 @qend.setter
1706 - def qend(self, value):
1712
1713 @property
1715 return self._qlength
1716 @qlength.setter
1723
1724 @property
1726 return self._probability
1727 @probability.setter
1729 try:
1730 value = float(value)
1731 except:
1732 raise TypeError('probability must be float, not {0}'.format(type(value)))
1733 self._probability = value
1734
1735 @property
1737 return self._alignment
1738
1739 @property
1741 try:
1742 return self.end - self.start + 1
1743 except:
1744 return 0
1745
1746 @property
1748 return self._slength
1749 @slength.setter
1751 self._slength = value
1752
1753 @property
1756 @evalue.setter
1758 self._evalue = value
1759
1760 @property
1763 @pvalue.setter
1765 self._pvalue = value
1766
1767 @property
1770 @score.setter
1771 - def score(self, value):
1773
1774 @property
1776 return self._ss_score
1777 @ss_score.setter
1779 self._ss_score = value
1780
1781 @property
1783 return self._identity
1784 @identity.setter
1786 self._identity = value
1787
1788 @property
1790 return self._similarity
1791 @similarity.setter
1793 self._similarity = value
1794
1795 @property
1797 return self._prob_sum
1798 @prob_sum.setter
1800 self._prob_sum = value
1801
1804 """
1805 Represents a collection of L{HHpredHit}s.
1806 """
1807
1808 - def __init__(self, hits, query_name='', match_columns=-1, no_of_seqs='',
1809 neff=-1., searched_hmms=-1, date='', command=''):
1827
1828 @property
1830 return self._query_name
1831 @query_name.setter
1833 self._query_name = value
1834
1835 @property
1837 return self._match_columns
1838 @match_columns.setter
1840 self._match_columns = value
1841
1842 @property
1844 return self._no_of_seqs
1845 @no_of_seqs.setter
1847 self._no_of_seqs = value
1848
1849 @property
1852 @neff.setter
1853 - def neff(self, value):
1855
1856 @property
1858 return self._searched_hmms
1859 @searched_hmms.setter
1861 self._searched_hmms = value
1862
1863 @property
1866 @date.setter
1867 - def date(self, value):
1869
1870 @property
1872 return self._command
1873 @command.setter
1875 self._command = value
1876
1878 return "HHpredHitList\n\tquery={0.query_name}\n\tmatch_columns={0.match_columns}\n\tno_of_seqs={0.no_of_seqs}\n\tneff={0.neff}\n\tsearched_hmms={0.searched_hmms}\n\tdate={0.date}\n\tcommand={0.command}".format(self)
1879
1881 return "<HHpredHitList: {0} hits>".format(len(self))
1882
1884 return self._hits[index]
1885
1887 return iter(self._hits)
1888
1890 return len(self._hits)
1891
1893 self._hits.sort(key=lambda i: i.rank)
1894