Package csb :: Package bio :: Package hmm
[frames] | no frames]

Source Code for Package csb.bio.hmm

   1  """ 
   2  HHpred and Hidden Markov Model APIs. 
   3   
   4  This package defines the abstractions for working with HHpred's HMMs and 
   5  hit lists. L{ProfileHMM} is the most important object of this module. 
   6  It describes a sequence profile hidden Markov model in the way HHpred 
   7  sees this concept: 
   8   
   9      - a profile is composed of a list of L{HMMLayer}s, which contain a 
  10        number of L{State}s 
  11      - these L{States} can be of different types: Match, Insertion Deletion, etc. 
  12      - a profile contains a multiple alignment, from which it is derived 
  13      - this multiple alignment is an A3M (condensed) Alignment, where the  
  14        first sequence is a master sequence 
  15      - the match states in all layers correspond to the residues of the master 
  16        sequence 
  17   
  18  L{ProfileHMM} objects provide list-like access to their layers: 
  19   
  20      >>> hmm.layers[1] 
  21      <HMMLayer>    # first layer: layer at master residue=1 
  22       
  23  Every layer provides dictionary-like access to its states: 
  24   
  25      >>> layer[States.Match] 
  26      <Match State> 
  27       
  28  and every state provides dictionary-like access to its transitions to other 
  29  states: 
  30   
  31      >>> state = hmm.layers[1][States.match] 
  32      >>> state.transitions[States.Insertion] 
  33      <Transition>       # Match > Insertion 
  34      >>> transition.predecessor 
  35      <Match State>      # source state 
  36      >>> transition.successor 
  37      <Insertion State>  # target state 
  38   
  39  Whether this transition points to a state at the same (i) or the next layer 
  40  (i+1) depends on the semantics of the source and the target states. 
  41   
  42  Building HMMs from scratch is supported through a number of C{append} methods 
  43  at various places: 
  44   
  45      >>> layer = HMMLayer(...) 
  46      >>> layer.append(State(...)) 
  47      >>> hmm.layers.append(layer) 
  48   
  49  See L{HMMLayersCollection}, L{HMMLayer}, L{EmissionTable} and L{TransitionTable} 
  50  for details. 
  51  """ 
  52   
  53  import sys 
  54  import math 
  55   
  56  import csb.core 
  57  import csb.io 
  58  import csb.bio.structure as structure 
  59  import csb.bio.sequence as sequence 
  60   
  61  from csb.core import Enum 
62 63 64 -class UnobservableStateError(AttributeError):
65 pass
66
67 -class StateNotFoundError(csb.core.ItemNotFoundError):
68 pass
69
70 -class TransitionNotFoundError(StateNotFoundError):
71 pass
72
73 -class LayerIndexError(csb.core.CollectionIndexError):
74 pass
75
76 -class StateExistsError(KeyError):
77 pass
78
79 -class TransitionExistsError(KeyError):
80 pass
81
82 -class EmissionExistsError(KeyError):
83 pass
84
85 -class HMMArgumentError(ValueError):
86 pass
87
88 89 -class States(csb.core.enum):
90 """ 91 Enumeration of HMM state types 92 """ 93 Match='M'; Insertion='I'; Deletion='D'; Start='S'; End='E'
94
95 -class ScoreUnits(csb.core.enum):
96 """ 97 Enumeration of HMM emission and transition score units 98 """ 99 LogScales='LogScales'; Probability='Probability'
100 101 BACKGROUND = [ 0.076627178753322270, 0.018866884241976509, 0.053996136712517316, 102 0.059788009880742142, 0.034939432842683173, 0.075415244982547675, 103 0.036829356494115069, 0.050485048600600511, 0.059581159080509941, 104 0.099925728794059046, 0.021959667190729986, 0.040107059298840765, 105 0.045310838527464106, 0.032644867589507229, 0.051296350550656143, 106 0.046617000834108295, 0.071051060827250878, 0.072644631719882335, 107 0.012473412286822654, 0.039418044025976547 ] 108 """ 109 Background amino acid probabilities 110 """ 111 112 RELATIVE_SA = { 'A': 0.02, 'B': 0.14, 'C': 0.33, 'D': 0.55, 'E': 1.00 } 113 """ 114 Relative solvent accessibility codes (upper bounds) 115 """
116 117 118 -class ProfileHMM(object):
119 """ 120 Describes a protein profile Hidden Markov Model. 121 Optional parameters: 122 123 @param units: defines the units of the transition and emission scores 124 @type units: L{ScoreUnits} 125 @param scale: the scaling factor used to convert emission/transition 126 probabilities 127 @type scale: float 128 @param logbase: the base of the logarithm used for scaling the emission and 129 transition probabilities 130 @type logbase: float 131 """ 132
133 - def __init__(self, units=ScoreUnits.LogScales, scale=-1000., logbase=2):
134 135 self._name = None 136 self._id = None 137 self._family = None 138 self._length = ProfileLength(0, 0) 139 self._alignment = None 140 self._consensus = None 141 self._dssp = None 142 self._dssp_solvent = None 143 self._psipred = None 144 self._effective_matches = None 145 self._evd = EVDParameters(None, None) 146 self._version = None 147 self._pseudocounts = False 148 self._emission_pseudocounts = False 149 self._transition_pseudocounts = False 150 self._layers = HMMLayersCollection() 151 self._start = State(States.Start) 152 self._start_insertion = None 153 self._end = State(States.End) 154 self._scale = scale 155 self._logbase = logbase 156 if units is None: 157 self._score_units = ScoreUnits.LogScales 158 else: 159 self._score_units = units
160 161 @property
162 - def name(self):
163 """ 164 Profile name (NAME) 165 @rtype: str 166 """ 167 return self._name
168 @name.setter
169 - def name(self, value):
170 self._name = str(value)
171 172 @property
173 - def id(self):
174 """ 175 Profile entry ID (FILE) 176 @rtype: str 177 """ 178 return self._id
179 @id.setter
180 - def id(self, value):
181 self._id = str(value)
182 183 @property
184 - def family(self):
185 """ 186 Alternative entry ID (FAM) 187 @rtype: str 188 """ 189 return self._family
190 @family.setter
191 - def family(self, value):
192 self._family = str(value)
193 194 @property
195 - def length(self):
196 """ 197 Profile length 198 @rtype: L{ProfileLength} 199 """ 200 return self._length
201 @length.setter
202 - def length(self, value):
203 if not isinstance(value, ProfileLength): 204 raise TypeError(value) 205 self._length = value
206 207 @property
208 - def alignment(self):
209 """ 210 Source multiple alignment 211 @rtype: L{A3MAlignment} 212 """ 213 return self._alignment
214 @alignment.setter
215 - def alignment(self, value):
216 if not isinstance(value, sequence.A3MAlignment): 217 raise TypeError(value) 218 self._alignment = value
219 220 @property
221 - def consensus(self):
222 """ 223 Consensus sequence 224 @rtype: L{AbstractSequence} 225 """ 226 return self._consensus
227 @consensus.setter
228 - def consensus(self, value):
229 if not isinstance(value, sequence.AbstractSequence): 230 raise TypeError(value) 231 self._consensus = value
232 233 @property
234 - def dssp(self):
235 """ 236 DSSP (calculated) secondary structure 237 @rtype: L{SecondaryStructure} 238 """ 239 return self._dssp
240 @dssp.setter
241 - def dssp(self, value):
242 if not isinstance(value, structure.SecondaryStructure): 243 raise TypeError(value) 244 self._dssp = value
245 246 @property
247 - def dssp_solvent(self):
248 """ 249 Solvent accessibility values 250 @rtype: str 251 """ 252 return self._dssp_solvent
253 @dssp_solvent.setter
254 - def dssp_solvent(self, value):
255 self._dssp_solvent = str(value)
256 257 @property
258 - def psipred(self):
259 """ 260 PSIPRED (predicted) secondary structure 261 @rtype: L{SecondaryStructure} 262 """ 263 return self._psipred
264 @psipred.setter
265 - def psipred(self, value):
266 if not isinstance(value, structure.SecondaryStructure): 267 raise TypeError(value) 268 self._psipred = value
269 270 @property
271 - def effective_matches(self):
272 """ 273 Number of effective matches (NEFF) 274 """ 275 return self._effective_matches
276 @effective_matches.setter
277 - def effective_matches(self, value):
278 self._effective_matches = value
279 280 @property
281 - def evd(self):
282 """ 283 Extreme-value distribution parameters (EVD) 284 @rtype: L{EVDParameters} 285 """ 286 return self._evd
287 @evd.setter
288 - def evd(self, value):
289 if not isinstance(value, EVDParameters): 290 raise TypeError(value) 291 self._evd = value
292 293 @property
294 - def version(self):
295 """ 296 Format version number (HHsearch) 297 @rtype: str 298 """ 299 return self._version
300 @version.setter
301 - def version(self, value):
302 self._version = str(value)
303 304 @property
305 - def pseudocounts(self):
306 """ 307 @rtype: bool 308 """ 309 return self._pseudocounts
310 @pseudocounts.setter
311 - def pseudocounts(self, value):
312 self._pseudocounts = bool(value)
313 314 @property
315 - def emission_pseudocounts(self):
316 """ 317 @rtype: bool 318 """ 319 return self._emission_pseudocounts
320 @emission_pseudocounts.setter
321 - def emission_pseudocounts(self, value):
322 self._emission_pseudocounts = bool(value)
323 324 @property
325 - def transition_pseudocounts(self):
326 """ 327 @rtype: bool 328 """ 329 return self._transition_pseudocounts
330 @transition_pseudocounts.setter
331 - def transition_pseudocounts(self, value):
332 self._transition_pseudocounts = bool(value)
333 334 @property
335 - def layers(self):
336 """ 337 List-like access to the HMM's layers 338 @rtype: L{HMMLayersCollection} 339 """ 340 return self._layers
341 342 @property
343 - def start(self):
344 """ 345 Start state (at the start layer) 346 @rtype: L{State} 347 """ 348 return self._start
349 @start.setter
350 - def start(self, value):
351 if value is None or (isinstance(value, State) and value.type == States.Start): 352 self._start = value 353 else: 354 raise TypeError(value)
355 356 @property
357 - def start_insertion(self):
358 """ 359 Insertion state at the start layer 360 @rtype: L{State} 361 """ 362 return self._start_insertion
363 @start_insertion.setter
364 - def start_insertion(self, value):
365 if value is None or (isinstance(value, State) and value.type == States.Insertion): 366 self._start_insertion = value 367 else: 368 raise TypeError(value)
369 370 @property
371 - def end(self):
372 """ 373 Final state (at the end layer) 374 @rtype: L{State} 375 """ 376 return self._end
377 @end.setter
378 - def end(self, value):
379 if value is None or (isinstance(value, State) and value.type == States.End): 380 self._end = value 381 else: 382 raise TypeError(value)
383 384 @property
385 - def scale(self):
386 """ 387 Score scaling factor 388 @rtype: float 389 """ 390 return self._scale
391 392 @property
393 - def logbase(self):
394 """ 395 Base of the logarithm used for score scaling 396 @rtype: float 397 """ 398 return self._logbase
399 400 @property
401 - def score_units(self):
402 """ 403 Current score units 404 @rtype: L{ScoreUnits} member 405 """ 406 return self._score_units
407 408 @property
409 - def residues(self):
410 """ 411 List of representative residues, attached to each layer 412 @rtype: collection of L{Residue} 413 """ 414 res = [layer.residue for layer in self.layers] 415 return csb.core.ReadOnlyCollectionContainer( 416 res, type=structure.Residue, start_index=1)
417 418 @property
419 - def all_layers(self):
420 """ 421 A list of layers including start and start_insertion 422 @rtype: list of L{HMMLayer} 423 """ 424 complete_layers = [] 425 first_layer = HMMLayer(rank=0, residue=None) 426 first_layer.append(self.start) 427 if self.start_insertion: 428 first_layer.append(self.start_insertion) 429 complete_layers.append(first_layer) 430 for layer in self.layers: 431 complete_layers.append(layer) 432 433 return complete_layers
434 435 @property
436 - def has_structure(self):
437 """ 438 True if this profile contains structural data 439 @rtype: bool 440 """ 441 has = False 442 for layer in self.layers: 443 if layer.residue.has_structure: 444 return True 445 return has
446
447 - def serialize(self, file_name):
448 """ 449 Serialize this HMM to a file. 450 451 @param file_name: target file name 452 @type file_name: str 453 """ 454 rec = sys.getrecursionlimit() 455 sys.setrecursionlimit(10000) 456 csb.io.Pickle.dump(self, open(file_name, 'wb')) 457 sys.setrecursionlimit(rec)
458 459 @staticmethod
460 - def deserialize(file_name):
461 """ 462 De-serialize an HMM from a file. 463 464 @param file_name: source file name (pickle) 465 @type file_name: str 466 """ 467 rec = sys.getrecursionlimit() 468 sys.setrecursionlimit(10000) 469 try: 470 return csb.io.Pickle.load(open(file_name, 'rb')) 471 finally: 472 sys.setrecursionlimit(rec)
473
474 - def _convert(self, units, score, scale, logbase):
475 476 if units == ScoreUnits.Probability: 477 return logbase ** (score / scale) 478 elif units == ScoreUnits.LogScales: 479 if score == 0: 480 #score = sys.float_info.min 481 return None 482 return math.log(score, logbase) * scale 483 else: 484 raise ValueError('Unknown target unit {0}'.format(units))
485
486 - def to_hmm(self, output_file=None, convert_scores=False):
487 """ 488 Dump the profile in HHM format. 489 490 @param output_file: the output file name 491 @type output_file: str 492 @param convert_scores: if True, forces automatic convertion to 493 L{ScoreUnits}.LogScales, which is required 494 by the output file format 495 @type convert_scores: bool 496 """ 497 from csb.bio.io.hhpred import HHMFileBuilder 498 499 if convert_scores: 500 self.convert_scores(ScoreUnits.LogScales) 501 502 temp = csb.io.MemoryStream() 503 504 builder = HHMFileBuilder(temp) 505 builder.add_hmm(self) 506 507 data = temp.getvalue() 508 temp.close() 509 510 if not output_file: 511 return data 512 else: 513 with csb.io.EntryWriter(output_file, close=False) as out: 514 out.write(data)
515
516 - def segment(self, start, end):
517 """ 518 Extract a sub-segment of the profile. 519 520 @param start: start layer of the segment (rank) 521 @type start: int 522 @param end: end layer of the segment (rank) 523 @type end: int 524 525 @return: a deepcopy of the extracted HMM segment 526 @rtype: L{ProfileHMMSegment} 527 """ 528 return ProfileHMMSegment(self, start, end)
529
530 - def subregion(self, start, end):
531 532 return ProfileHMMRegion(self, start, end)
533
534 - def add_emission_pseudocounts(self, *a, **k):
535 """ 536 See L{csb.bio.hmm.pseudocounts.PseudocountBuilder} 537 """ 538 from csb.bio.hmm.pseudocounts import PseudocountBuilder 539 PseudocountBuilder(self).add_emission_pseudocounts(*a, **k)
540
541 - def add_transition_pseudocounts(self, *a, **k):
542 """ 543 See L{csb.bio.hmm.pseudocounts.PseudocountBuilder} 544 """ 545 from csb.bio.hmm.pseudocounts import PseudocountBuilder 546 PseudocountBuilder(self).add_transition_pseudocounts(*a, **k)
547
548 - def structure(self, chain_id=None, accession=None):
549 """ 550 Extract the structural information from the HMM. 551 552 @param accession: defines the accession number of the structure 553 @type accession: str 554 @param chain_id: defines explicitly the chain identifier 555 @type chain_id: str 556 557 @return: a shallow L{Structure} wrapper around the residues in the HMM. 558 @rtype: L{Structure} 559 """ 560 struct = structure.Structure(accession or self.id) 561 chain = self.chain(chain_id) 562 struct.chains.append(chain) 563 564 return struct
565
566 - def chain(self, chain_id=None):
567 """ 568 Extract the structural information from the HMM. 569 570 @param chain_id: defines explicitly the chain identifier 571 @type chain_id: str 572 573 @return: a shallow L{Chain} wrapper around the residues in the HMM. 574 @rtype: L{Chain} 575 """ 576 if chain_id is None: 577 if self.id: 578 chain_id = self.id.rstrip()[-1] 579 else: 580 chain_id = '_' 581 582 chain = structure.Chain(chain_id, type=sequence.SequenceTypes.Protein, 583 residues=self.residues) 584 chain._torsion_computed = True 585 return chain
586
587 - def emission_profile(self):
588 """ 589 Extract the emission scores of all match states in the profile. 590 The metric of the emission scores returned depends on the current 591 hmm.score_units setting - you may need to call hmm.convert_scores() 592 to adjust the hmm to your particular needs. 593 594 @return: a list of dictionaries; each dict key is a single amino acid 595 @rtype: list 596 """ 597 profile = [] 598 599 for layer in self.layers: 600 emission = {} 601 602 for aa in layer[States.Match].emission: 603 emission[str(aa)] = layer[States.Match].emission[aa] or 0.0 604 profile.append(emission) 605 606 return profile
607
608 - def convert_scores(self, units=ScoreUnits.Probability, method=None):
609 """ 610 Convert emission and transition scores to the specified units. 611 612 @param units: the target units for the conversion (a member of 613 L{ScoreUnits}). 614 @type units: L{csb.core.EnumItem} 615 @param method: if defined, implements the exact mathematical 616 transformation that will be applied. It must be a 617 function or lambda expression with the following 618 signature:: 619 def (target_units, score, scale, logbase) 620 621 and it has to return the score converted to 622 C{target_units}. If method performs a conversion from 623 probabilities to scaled logs, you should also update 624 C{hmm.scale} and C{hmm.logbase}. 625 @type method: function, lambda 626 """ 627 628 if self._score_units == units: 629 return 630 631 if method is not None: 632 convert = method 633 else: 634 convert = self._convert 635 636 for layer in self.layers: 637 for state_kind in layer: 638 state = layer[state_kind] 639 if not state.silent: 640 for residue in state.emission: 641 if state.emission[residue] is not None: 642 state.emission.update(residue, convert( 643 units, state.emission[residue], 644 self.scale, self.logbase)) 645 for residue in state.background: 646 if state.background[residue] is not None: 647 state.background.update(residue, convert( 648 units, state.background[residue], 649 self.scale, self.logbase)) 650 for tran_kind in state.transitions: 651 transition = state.transitions[tran_kind] 652 transition.probability = convert(units, transition.probability, 653 self.scale, self.logbase) 654 # The Neff-s are interger numbers and should not be transformed 655 # (except when writing the profile to a hhm file) 656 657 if self.start_insertion: 658 for t_it in self.start_insertion.transitions: 659 transition = self.start_insertion.transitions[t_it] 660 transition.probability = convert(units, transition.probability, 661 self.scale, self.logbase) 662 663 for residue in self.start_insertion.emission: 664 state = self.start_insertion 665 if state.emission[residue] is not None: 666 state.emission.update(residue, 667 convert(units, state.emission[residue], self.scale, self.logbase)) 668 state.background.update(residue, 669 convert(units, state.background[residue], self.scale, self.logbase)) 670 671 for tran_kind in self.start.transitions: 672 transition = self.start.transitions[tran_kind] 673 transition.probability = convert(units, 674 transition.probability, self.scale, self.logbase) 675 676 677 self._score_units = units
678
679 - def emission_similarity(self, other):
680 """ 681 Compute the Log-sum-of-odds score between the emission tables of self 682 and other (Soeding 2004). If no observable Match state is found at a 683 given layer, the Insertion state is used instead. 684 685 @note: This is not a full implementation of the formula since only 686 emission vectors are involved in the computation and any transition 687 probabilities are ignored. 688 689 @param other: the subject HMM 690 @type other: L{ProfileHMM} 691 692 @return: emission log-sum-of-odds similarity between C{self} and 693 C{other} 694 @rtype: float 695 696 @raise ValueError: when self and other differ in their length, when the 697 score_units are not Probability, or when no 698 observable states are present 699 """ 700 score = 1 701 702 if self.layers.length != other.layers.length or self.layers.length < 1: 703 raise ValueError('Both HMMs must have the same nonzero number of layers') 704 if self.score_units != ScoreUnits.Probability or \ 705 other.score_units != ScoreUnits.Probability: 706 raise ValueError('Scores must be converted to probabilities first.') 707 708 for q_layer, s_layer in zip(self.layers, other.layers): 709 710 try: 711 if States.Match in q_layer and not q_layer[States.Match].silent: 712 q_state = q_layer[States.Match] 713 else: 714 q_state = q_layer[States.Insertion] 715 716 if States.Match in s_layer and not s_layer[States.Match].silent: 717 s_state = s_layer[States.Match] 718 else: 719 s_state = s_layer[States.Insertion] 720 except csb.core.ItemNotFoundError: 721 raise ValueError('Query and subject must contain observable states ' 722 'at each layer') 723 724 emission_dotproduct = 0 725 726 for aa in q_state.emission: 727 728 q_emission = q_state.emission[aa] or sys.float_info.min 729 s_emission = s_state.emission[aa] or sys.float_info.min 730 emission_dotproduct += (q_emission * s_emission / 731 q_state.background[aa]) 732 733 score *= emission_dotproduct 734 735 return math.log(score)
736
737 - def _assign_secstructure(self):
738 """ 739 Attach references from each profile layer to the relevant DSSP secondary 740 structure element. 741 """ 742 assert self.dssp is not None 743 744 for motif in self.dssp: 745 for i in range(motif.start, motif.end + 1): 746 self.layers[i].residue.secondary_structure = motif
747
748 749 -class ProfileHMMSegment(ProfileHMM):
750 """ 751 Represents a segment (fragment) of a ProfileHMM. 752 753 @param hmm: source HMM 754 @type hmm: ProfileHMM 755 @param start: start layer of the segment (rank) 756 @type start: int 757 @param end: end layer of the segment (rank) 758 @type end: int 759 760 @raise ValueError: when start or end positions are out of range 761 """ 762
763 - def __init__(self, hmm, start, end):
764 765 if start < hmm.layers.start_index or start > hmm.layers.last_index: 766 raise IndexError('Start position {0} is out of range'.format(start)) 767 if end < hmm.layers.start_index or end > hmm.layers.last_index: 768 raise IndexError('End position {0} is out of range'.format(end)) 769 770 #hmm = csb.core.deepcopy(hmm) 771 772 super(ProfileHMMSegment, self).__init__(units=hmm.score_units, 773 scale=hmm.scale, logbase=hmm.logbase) 774 self.id = hmm.id 775 self.family = hmm.family 776 self.name = hmm.name 777 self.pseudocounts = hmm.pseudocounts 778 self.evd = hmm.evd 779 self.version = hmm.version 780 self.source = hmm.id 781 self._source_start = start 782 self._source_end = end 783 784 if hmm.alignment: 785 self.alignment = hmm.alignment.hmm_subregion(start, end) 786 self.consensus = hmm.consensus.subregion(start, end) 787 788 layers = csb.core.deepcopy(hmm.layers[start : end + 1]) 789 max_score = 1.0 790 if hmm.score_units != ScoreUnits.Probability: 791 max_score = hmm._convert(hmm.score_units, 792 max_score, hmm.scale, hmm.logbase) 793 self._build_graph(layers, max_score) 794 795 if hmm.dssp: 796 self.dssp = hmm.dssp.subregion(start, end) 797 self._assign_secstructure() 798 if hmm.psipred: 799 self.psipred = hmm.psipred.subregion(start, end) 800 801 self.length.layers = self.layers.length 802 self.length.matches = self.layers.length 803 self.effective_matches = sum([(l.effective_matches or 0.0) for l in self.layers]) / self.layers.length
804 805 @property
806 - def source_start(self):
807 """ 808 Start position of this segment in its source HMM 809 @rtype: int 810 """ 811 return self._source_start
812 813 @property
814 - def source_end(self):
815 """ 816 End position of this segment in its source HMM 817 @rtype: int 818 """ 819 return self._source_end
820
821 - def _build_graph(self, source_layers, max_score):
822 823 for rank, layer in enumerate(source_layers, start=1): 824 825 for atom_kind in layer.residue.atoms: 826 layer.residue.atoms[atom_kind].rank = rank 827 layer.residue._rank = rank 828 layer.rank = rank 829 830 self.layers.append(layer) 831 832 if rank == 1: 833 for state_kind in layer: 834 if state_kind in(States.Match, States.Deletion): 835 start_tran = Transition(self.start, layer[state_kind], max_score) 836 self.start.transitions.append(start_tran) 837 elif rank == len(source_layers): 838 for state_kind in layer: 839 state = layer[state_kind] 840 if not (States.End in state.transitions or States.Match in state.transitions): 841 state.transitions.set({}) 842 else: 843 end_tran = Transition(state, self.end, max_score) 844 state.transitions.set({States.End: end_tran}) # TODO: I->I ?
845
846 847 -class EmissionProfileSegment(ProfileHMMSegment):
848 """ 849 Represents a segment of the Match state emission probabilities of a L{ProfileHMM}. 850 Contains only Match states, connected with equal transition probabilities of 100%. 851 """ 852
853 - def _build_graph(self, source_layers):
854 855 factory = StateFactory() 856 857 for rank, source_layer in enumerate(source_layers, start=1): 858 859 emission = source_layer[States.Match].emission 860 background = source_layer[States.Match].background 861 862 match = factory.create_match(emission, background) 863 match.rank = rank 864 865 layer = HMMLayer(rank, source_layer.residue) 866 layer.append(match) 867 self.layers.append(layer) 868 869 if rank == 1: 870 self.start.transitions.append(Transition(self.start, match, 1.0)) 871 elif rank < len(source_layers): 872 prev_match = self.layers[rank - 1][States.Match] 873 prev_match.transitions.append(Transition(prev_match, match, 1.0)) 874 elif rank == len(source_layers): 875 match.transitions.append(Transition(match, self.end, 1.0)) 876 else: 877 assert False
878
879 880 -class ProfileHMMRegion(ProfileHMM):
881 """ 882 A shallow proxy referring to a sub-region of a given Profile HMM. 883 884 @param hmm: source HMM 885 @type hmm: L{ProfileHMM} 886 @param start: start layer of the segment (rank) 887 @type start: int 888 @param end: end layer of the segment (rank) 889 @type end: int 890 891 @raise ValueError: when start or end positions are out of range 892 """ 893
894 - def __init__(self, hmm, start, end):
895 896 if start < hmm.layers.start_index or start > hmm.layers.last_index: 897 raise IndexError('Start position {0} is out of range'.format(start)) 898 if end < hmm.layers.start_index or end > hmm.layers.last_index: 899 raise IndexError('End position {0} is out of range'.format(end)) 900 if hmm.score_units != ScoreUnits.Probability: 901 raise ValueError('Scores must be converted to probabilities first.') 902 903 self._layers = HMMLayersCollection(hmm.layers[start : end + 1]) 904 self._score_units = hmm.score_units 905 self.id = hmm.id 906 self.name = hmm.name 907 self.family = hmm.family 908 self._source_start = start 909 self._source_end = end
910 911 @property
912 - def source_start(self):
913 """ 914 Start position of this segment in its source HMM 915 @rtype: int 916 """ 917 return self._source_start
918 919 @property
920 - def source_end(self):
921 """ 922 End position of this segment in its source HMM 923 @rtype: int 924 """ 925 return self._source_end
926
927 928 -class ProfileLength(object):
929
930 - def __init__(self, matches, layers):
931 self.matches = matches 932 self.layers = layers
933
934 935 -class EVDParameters(object):
936
937 - def __init__(self, lamda, mu):
938 self.lamda = lamda 939 self.mu = mu
940
941 - def __nonzero__(self):
942 return self.__bool__()
943
944 - def __bool__(self):
945 return (self.lamda is not None or self.mu is not None)
946
947 948 -class EmissionTable(csb.core.DictionaryContainer):
949 """ 950 Represents a lookup table of emission probabilities. Provides dictionary-like 951 access: 952 953 >>> state.emission[ProteinAlphabet.ALA] 954 emission probability for ALA 955 956 @param emission: an initialization dictionary of emission probabilities 957 @type emission: dict 958 @param restrict: a list of residue types allowed for this emission table. 959 Defaults to the members of L{csb.bio.sequence.ProteinAlphabet} 960 @type restrict: list 961 """ 962
963 - def __init__(self, emission=None, restrict=Enum.members(sequence.ProteinAlphabet)):
964 super(EmissionTable, self).__init__(emission, restrict)
965
966 - def append(self, residue, probability):
967 """ 968 Append a new emission probability to the table. 969 970 @param residue: residue name (type) - a member of 971 L{csb.bio.sequence.ProteinAlphabet} 972 @type residue: L{csb.core.EnumItem} 973 @param probability: emission score 974 @type probability: float 975 976 @raise EmissionExistsError: if residue is already defined 977 """ 978 if residue in self: 979 raise EmissionExistsError('Residue {0} is already defined.'.format(residue)) 980 981 super(EmissionTable, self).append(residue, probability)
982
983 - def set(self, table):
984 """ 985 Set the emission table using the dictionary provided in the argument. 986 987 @param table: the new emission table 988 @type table: dict 989 """ 990 super(EmissionTable, self)._set(table)
991
992 - def update(self, residue, probability):
993 """ 994 Update the emission C{probability} of a given emission C{residue}. 995 996 @param residue: name (type) of the residue to be updated 997 @type residue: L{csb.core.EnumItem} 998 @param probability: new emission score 999 @type probability: float 1000 """ 1001 super(EmissionTable, self)._update({residue: probability})
1002
1003 1004 -class TransitionTable(csb.core.DictionaryContainer):
1005 """ 1006 Represents a lookup table of transitions that are possible from within a given state. 1007 1008 Provides dictionary-like access, where dictionary keys are target states. 1009 These are members of the L{States} enumeration, e.g.: 1010 1011 >>> state.transitions[States.Match] 1012 transition info regarding transition from the current state to a Match state 1013 >>> state.transitions[States.Match].predecessor 1014 state 1015 >>> state.transitions[States.Match].successor 1016 the next match state 1017 1018 @param transitions: an initialization dictionary of target L{State}:L{Transition} pairs 1019 @type transitions: dict 1020 @param restrict: a list of target states allowed for this transition table. 1021 Defaults to the L{States} enum members 1022 @type restrict: list 1023 """ 1024
1025 - def __init__(self, transitions=None, restrict=Enum.members(States)):
1026 super(TransitionTable, self).__init__(transitions, restrict) 1027 1028 @property
1029 - def _exception(self):
1030 return TransitionNotFoundError
1031
1032 - def append(self, transition):
1033 """ 1034 Append a new C{transition} to the table. 1035 1036 @param transition: transition info 1037 @type transition: L{Transition} 1038 1039 @raise TransitionExistsError: when a transition to the same target state 1040 already exists for the current state 1041 """ 1042 1043 if transition.successor.type in self: 1044 msg = 'Transition to a {0} state is already defined.' 1045 raise TransitionExistsError(msg.format(transition.successor.type)) 1046 1047 super(TransitionTable, self).append(transition.successor.type, transition)
1048
1049 - def set(self, table):
1050 """ 1051 Set the transition table using the dictionary provided in the argument. 1052 1053 @param table: the new transition table 1054 @type table: dict 1055 """ 1056 super(TransitionTable, self)._set(table)
1057
1058 - def update(self, target_statekind, transition):
1059 """ 1060 Update the information of a transition, which points to a target 1061 state of the specified L{States} kind. 1062 1063 @param target_statekind: the key of the transition to be updated 1064 @type target_statekind: L{csb.core.EnumItem} 1065 @param transition: new transition info object 1066 @type transition: L{Transition} 1067 1068 @raise ValueError: if I{transition.successor.type} differs from 1069 C{target_statekind} 1070 """ 1071 if transition.successor.type != target_statekind: 1072 raise ValueError("Successor's type differs from the specified target state.") 1073 1074 super(TransitionTable, self)._update({target_statekind: transition})
1075
1076 1077 -class HMMLayersCollection(csb.core.CollectionContainer):
1078 """ 1079 Provides consecutive, 1-based access to all of the layers in the profile. 1080 Each profile layer contains a catalog of available states at that index, e.g.: 1081 1082 >>> profile.layers[i] 1083 the catalog at profile layer i 1084 >>> profile.layers[i][States.Deletion] 1085 the deletion state at index i 1086 1087 @param layers: initialization list of L{HMMLayer}s 1088 @type layers: list 1089 """
1090 - def __init__(self, layers=None):
1092 1093 @property
1094 - def _exception(self):
1095 return LayerIndexError
1096
1097 -class HMMLayer(csb.core.DictionaryContainer):
1098 """ 1099 Provides a dictionary-like catalog of the available states at this layer. 1100 Lookup keys are members of the L{States} enumeration, e.g.: 1101 1102 >>> profile.layers[i][States.Deletion] 1103 the deletion state at layer number i 1104 1105 @param rank: layer's number 1106 @type rank: int 1107 @param residue: a representative L{ProteinResidue} that is associated with 1108 this layer 1109 @type residue: L{ProteinResidue} 1110 @param states: initialization dictionary of L{States}.Item:L{State} pairs 1111 @type states: dict 1112 """
1113 - def __init__(self, rank, residue, states=None):
1114 1115 super(HMMLayer, self).__init__(states, restrict=Enum.members(States)) 1116 1117 self._rank = int(rank) 1118 self._residue = None 1119 self._effective_matches = None 1120 self._effective_insertions = None 1121 self._effective_deletions = None 1122 1123 self.residue = residue
1124 1125 @property
1126 - def _exception(self):
1127 return StateNotFoundError
1128 1129 @property
1130 - def rank(self):
1131 """ 1132 Layer's position 1133 @rtype: int 1134 """ 1135 return self._rank
1136 @rank.setter
1137 - def rank(self, value):
1138 self._rank = int(value)
1139 1140 @property
1141 - def residue(self):
1142 """ 1143 Representative residue 1144 @rtype: L{Residue} 1145 """ 1146 return self._residue
1147 @residue.setter
1148 - def residue(self, residue):
1149 if residue and residue.type == sequence.SequenceAlphabets.Protein.GAP: 1150 raise HMMArgumentError('HMM match states cannot be gaps') 1151 self._residue = residue
1152 1153 @property
1154 - def effective_matches(self):
1155 """ 1156 Number of effective matches at this layer 1157 @rtype: int 1158 """ 1159 return self._effective_matches
1160 @effective_matches.setter
1161 - def effective_matches(self, value):
1162 self._effective_matches = value
1163 1164 @property
1165 - def effective_insertions(self):
1166 """ 1167 Number of effective insertions at this layer 1168 @rtype: int 1169 """ 1170 return self._effective_insertions
1171 @effective_insertions.setter
1172 - def effective_insertions(self, value):
1173 self._effective_insertions = value
1174 1175 @property
1176 - def effective_deletions(self):
1177 """ 1178 Number of effective deletions at this layer 1179 @rtype: int 1180 """ 1181 return self._effective_deletions
1182 @effective_deletions.setter
1183 - def effective_deletions(self, value):
1184 self._effective_deletions = value
1185
1186 - def append(self, state):
1187 """ 1188 Append a new C{state} to the catalog. 1189 1190 @param state: the new state 1191 @type state: L{State} 1192 1193 @raise StateExistsError: when a state of the same type is already defined 1194 """ 1195 if state.type in self: 1196 raise StateExistsError( 1197 'State {0} is already defined at this position.'.format(state.type)) 1198 1199 super(HMMLayer, self).append(state.type, state)
1200
1201 - def update(self, state_kind, state):
1202 """ 1203 Update the sate of the specified kind under the current layer. 1204 1205 @param state_kind: state type (key) - a member of L{States} 1206 @type state_kind: L{csb.core.EnumItem} 1207 @param state: the new state info 1208 @type state: L{State} 1209 1210 @raise ValueError: if state.type differs from state_kind 1211 """ 1212 if state.type != state_kind: 1213 raise ValueError("State's type differs from the specified state_kind") 1214 1215 super(HMMLayer, self)._update({state_kind: state})
1216
1217 1218 -class State(object):
1219 """ 1220 Describes a Hidden Markov Model state. 1221 1222 @param type: one of the L{States} enumeration values, e.g. States.Match 1223 @type type: L{csb.core.EnumItem} 1224 @param emit: a collection of emittable state names allowed for the state, 1225 e.g. the members of I{SequenceAlphabets.Protein}. If not defined, 1226 the state will be created as a silent (unobservable). 1227 @type emit: list 1228 1229 @raise ValueError: if type is not a member of the States enum 1230 """ 1231
1232 - def __init__(self, type, emit=None):
1233 1234 self._type = None 1235 self._rank = None 1236 self._transitions = TransitionTable() 1237 self._emission = None 1238 self._background = None 1239 1240 self.type = type 1241 1242 if emit is not None: 1243 self._emission = EmissionTable(restrict=emit) 1244 self._background = EmissionTable(restrict=emit)
1245
1246 - def __repr__(self):
1247 return "<HMM {0.type!r} State>".format(self)
1248 1249 @property
1250 - def type(self):
1251 """ 1252 State type: one of the L{States} 1253 @rtype: enum item 1254 """ 1255 return self._type
1256 @type.setter
1257 - def type(self, value):
1258 if value.enum is not States: 1259 raise TypeError(value) 1260 self._type = value
1261 1262 @property
1263 - def rank(self):
1264 return self._rank
1265 @rank.setter
1266 - def rank(self, value):
1267 self._rank = int(value)
1268 1269 @property
1270 - def transitions(self):
1271 """ 1272 Lookup table with available transitions to other states 1273 @rtype: L{TransitionTable} 1274 """ 1275 return self._transitions
1276 1277 @property
1278 - def emission(self):
1279 """ 1280 Lookup table with available emission probabilities 1281 @rtype: L{EmissionTable} 1282 """ 1283 if self._emission is None: 1284 raise UnobservableStateError('Silent {0!r} state'.format(self.type)) 1285 return self._emission
1286 1287 @property
1288 - def background(self):
1289 """ 1290 Lookup table with background probabilities 1291 @rtype: L{EmissionTable} 1292 """ 1293 return self._background
1294 1295 @property
1296 - def silent(self):
1297 """ 1298 Whether this state can emit something 1299 @rtype: bool 1300 """ 1301 try: 1302 return self.emission is None 1303 except UnobservableStateError: 1304 return True
1305
1306 -class StateFactory(object):
1307 """ 1308 Simplifies the construction of protein profile HMM states. 1309 """ 1310
1311 - def __init__(self):
1313
1314 - def create_match(self, emission, background):
1315 1316 state = State(States.Match, emit=self._aa) 1317 state.emission.set(emission) 1318 state.background.set(background) 1319 return state
1320
1321 - def create_insertion(self, background):
1322 1323 state = State(States.Insertion, emit=self._aa) 1324 state.emission.set(background) 1325 state.background.set(background) 1326 return state
1327
1328 - def create_deletion(self):
1329 return State(States.Deletion)
1330
1331 1332 -class TransitionType(object):
1333
1334 - def __init__(self, source, target):
1335 self.source_state = source.type 1336 self.target_state = target.type
1337
1338 - def __repr__(self):
1339 return '{0}->{1}'.format(self.source_state, self.target_state)
1340
1341 -class Transition(object):
1342 """ 1343 Describes a Hidden Markov Model transition between two states. 1344 1345 @param predecessor: source state 1346 @type predecessor: L{State} 1347 @param successor: target state 1348 @type successor: L{State} 1349 @param probability: transition score 1350 @type probability: float 1351 """ 1352
1353 - def __init__(self, predecessor, successor, probability):
1354 1355 if not (isinstance(predecessor, State) or isinstance(successor, State)): 1356 raise TypeError('Predecessor and successor must be State instances.') 1357 1358 self._predecessor = predecessor 1359 self._successor = successor 1360 self._probability = None 1361 self._type = TransitionType(predecessor, successor) 1362 1363 self.probability = probability
1364
1365 - def __str__(self):
1366 return '<HMM Transition: {0.type} {0.probability}>'.format(self)
1367 1368 @property
1369 - def predecessor(self):
1370 """ 1371 Transition source state 1372 @rtype: L{State} 1373 """ 1374 return self._predecessor
1375 1376 @property
1377 - def successor(self):
1378 """ 1379 Transition target state 1380 @rtype: L{State} 1381 """ 1382 return self._successor
1383 1384 @property
1385 - def probability(self):
1386 """ 1387 Transition score 1388 @rtype: float 1389 """ 1390 return self._probability
1391 @probability.setter
1392 - def probability(self, value):
1393 if not (value >=0): 1394 raise ValueError('Transition probability must be a positive number.') 1395 self._probability = float(value)
1396 1397 @property
1398 - def type(self):
1399 """ 1400 Struct, containing information about the source and target state types 1401 @rtype: L{TransitionType} 1402 """ 1403 return self._type
1404
1405 1406 -class HHpredHitAlignment(sequence.SequenceAlignment):
1407 """ 1408 Represents a query-template alignment in an HHpred result. 1409 1410 @param hit: relevant hit object 1411 @type param: L{HHpredHit} 1412 @param query: the query sequence in the alignment region, with gaps 1413 @type query: str 1414 @param subject: the subject sequence in the alignment region, with gaps 1415 @type subject: str 1416 """ 1417 1418 GAP = sequence.ProteinAlphabet.GAP 1419
1420 - def __init__(self, hit, query, subject):
1421 1422 if not isinstance(hit, HHpredHit): 1423 raise TypeError(hit) 1424 1425 self._hit = hit 1426 1427 q = sequence.Sequence('query', '', ''.join(query), type=sequence.SequenceTypes.Protein) 1428 s = sequence.Sequence(hit.id, '', ''.join(subject), type=sequence.SequenceTypes.Protein) 1429 1430 super(HHpredHitAlignment, self).__init__((q, s))
1431 1432 @property
1433 - def query(self):
1434 """ 1435 Query sequence (with gaps) 1436 @rtype: str 1437 """ 1438 return self.rows[1].sequence
1439 1440 @property
1441 - def subject(self):
1442 """ 1443 Subject sequence (with gaps) 1444 @rtype: str 1445 """ 1446 return self.rows[2].sequence
1447 1448 @property
1449 - def segments(self):
1450 """ 1451 Find all ungapped query-subject segments in the alignment. 1452 Return a generator over all ungapped alignment segments, represented 1453 by L{HHpredHit} objects 1454 1455 @rtype: generator 1456 """ 1457 1458 def make_segment(sstart, send, qstart, qend): 1459 1460 seg = HHpredHit(self._hit.rank, self._hit.id, sstart, send, 1461 qstart, qend, self._hit.probability, self._hit.qlength) 1462 1463 seg.slength = self._hit.slength 1464 seg.evalue = self._hit.evalue 1465 seg.pvalue = self._hit.pvalue 1466 seg.score = self._hit.score 1467 seg.ss_score = self._hit.ss_score 1468 seg.identity = self._hit.identity 1469 seg.similarity = self._hit.similarity 1470 seg.prob_sum = self._hit.prob_sum 1471 1472 return seg
1473 1474 in_segment = False 1475 qs = self._hit.qstart - 1 1476 ss = self._hit.start - 1 1477 qi, si = qs, ss 1478 qe, se = qs, ss 1479 1480 for q, s in zip(self.query, self.subject): 1481 1482 if q != HHpredHitAlignment.GAP: 1483 qi += 1 1484 if s != HHpredHitAlignment.GAP: 1485 si += 1 1486 1487 if HHpredHitAlignment.GAP in (q, s): 1488 if in_segment: 1489 yield make_segment(ss, se, qs, qe) 1490 in_segment = False 1491 qs, ss = 0, 0 1492 qe, se = 0, 0 1493 else: 1494 if not in_segment: 1495 in_segment = True 1496 qs, ss = qi, si 1497 1498 qe, se = qi, si 1499 1500 if in_segment: 1501 yield make_segment(ss, se, qs, qe)
1502
1503 - def to_a3m(self):
1504 """ 1505 @return: a query-centric A3M alignment. 1506 @rtype: L{csb.bio.sequence.A3MAlignment} 1507 """ 1508 a3m = self.format(sequence.AlignmentFormats.A3M) 1509 return sequence.A3MAlignment.parse(a3m, strict=False)
1510
1511 -class HHpredHit(object):
1512 """ 1513 Represents a single HHsearch hit. 1514 1515 @param rank: rank of the hit 1516 @type rank: int 1517 @param id: id of the hit 1518 @type id: str 1519 @param start: subject start 1520 @type start: int 1521 @param end: subject end 1522 @type end: int 1523 @param qstart: query start 1524 @type qstart: int 1525 @param qend: query end 1526 @type qend: int 1527 @param probability: probability of the hit 1528 @type probability: float 1529 @param qlength: length of the query 1530 @type qlength: int 1531 """ 1532
1533 - def __init__(self, rank, id, start, end, qstart, qend, probability, 1534 qlength):
1535 1536 self._rank = None 1537 self._id = None 1538 self._start = None 1539 self._end = None 1540 self._qstart = None 1541 self._qend = None 1542 self._probability = None 1543 self._qlength = None 1544 self._alignment = None 1545 1546 self._slength = None 1547 self._evalue = None 1548 self._pvalue = None 1549 self._score = None 1550 self._ss_score = None 1551 self._identity = None 1552 self._similarity = None 1553 self._prob_sum = None 1554 1555 # managed properties 1556 self.rank = rank 1557 self.id = id 1558 self.start = start 1559 self.end = end 1560 self.qstart = qstart 1561 self.qend = qend 1562 self.probability = probability 1563 self.qlength = qlength
1564
1565 - def __str__(self):
1566 return "{0.id} {0.probability} {0.start}-{0.end}".format(self)
1567
1568 - def __repr__(self):
1569 return "<HHpredHit: {0!s}>".format(self)
1570
1571 - def __lt__(self, other):
1572 return self.rank < other.rank
1573
1574 - def equals(self, other):
1575 """ 1576 Return True if C{self} is completely identical to C{other} (same id, same start 1577 and end positions). 1578 1579 @param other: right-hand-term 1580 @type other: HHpredHit 1581 1582 @rtype: bool 1583 """ 1584 return (self.id == other.id and self.start == other.start and self.end == other.end)
1585
1586 - def surpasses(self, other):
1587 """ 1588 Return True if C{self} is a superior to C{other} in terms of length 1589 and probability. These criteria are applied in the following order: 1590 1591 1. Length (the longer hit is better) 1592 2. Probability (if they have the same length, the one with the higher 1593 probability is better) 1594 3. Address (if they have the same length and probability, the one with 1595 higher memory ID wins; for purely practical reasons) 1596 1597 @param other: right-hand-term 1598 @type other: HHpredHit 1599 1600 @rtype: bool 1601 """ 1602 if self.length > other.length: 1603 return True 1604 elif self.length == other.length: 1605 if self.probability > other.probability: 1606 return True 1607 elif self.probability == other.probability: 1608 if id(self) > id(other): 1609 return True 1610 return False
1611
1612 - def includes(self, other, tolerance=1):
1613 """ 1614 Return True if C{other} overlaps with C{self}, that means C{other} 1615 is fully or partially included in C{self} when aligned over the query. 1616 1617 @param other: right-hand-term 1618 @type other: HHpredHit 1619 @param tolerance: allow partial overlaps for that number of residues at 1620 either end 1621 @type tolerance: int 1622 1623 @rtype: bool 1624 """ 1625 if self.id == other.id: 1626 if other.start >= self.start: 1627 if (other.end - self.end) <= tolerance: 1628 return True 1629 elif other.end <= self.end: 1630 if (self.start - other.start) <= tolerance: 1631 return True 1632 1633 return False
1634
1635 - def add_alignment(self, query, subject):
1636 """ 1637 Add query/subject alignment to the hit. 1638 1639 @param query: the query sequence within the alignment region, with gaps 1640 @type query: str 1641 @param subject: the subject sequence within the alignment region, with 1642 gaps 1643 @type subject: str 1644 """ 1645 self._alignment = HHpredHitAlignment(self, query, subject)
1646 1647 @property
1648 - def rank(self):
1649 return self._rank
1650 @rank.setter
1651 - def rank(self, value):
1652 try: 1653 value = int(value) 1654 except: 1655 raise TypeError('rank must be int, not {1}'.format(type(value))) 1656 self._rank = value
1657 1658 @property
1659 - def id(self):
1660 return self._id
1661 @id.setter
1662 - def id(self, value):
1663 try: 1664 value = str(value) 1665 except: 1666 raise TypeError('id must be string, not {0}'.format(type(value))) 1667 self._id = value
1668 1669 @property
1670 - def start(self):
1671 return self._start
1672 @start.setter
1673 - def start(self, value):
1674 try: 1675 value = int(value) 1676 except: 1677 raise TypeError('start must be int, not {0}'.format(type(value))) 1678 self._start = value
1679 1680 @property
1681 - def end(self):
1682 return self._end
1683 @end.setter
1684 - def end(self, value):
1685 try: 1686 value = int(value) 1687 except: 1688 raise TypeError('end must be int, not {0}'.format(type(value))) 1689 self._end = value
1690 1691 @property
1692 - def qstart(self):
1693 return self._qstart
1694 @qstart.setter
1695 - def qstart(self, value):
1696 try: 1697 value = int(value) 1698 except: 1699 raise TypeError('qstart must be int, not {0}'.format(type(value))) 1700 self._qstart = value
1701 1702 @property
1703 - def qend(self):
1704 return self._qend
1705 @qend.setter
1706 - def qend(self, value):
1707 try: 1708 value = int(value) 1709 except: 1710 raise TypeError('qend must be int, not {0}'.format(type(value))) 1711 self._qend = value
1712 1713 @property
1714 - def qlength(self):
1715 return self._qlength
1716 @qlength.setter
1717 - def qlength(self, value):
1718 try: 1719 value = int(value) 1720 except: 1721 raise TypeError('qlength must be int, not {0}'.format(type(value))) 1722 self._qlength = value
1723 1724 @property
1725 - def probability(self):
1726 return self._probability
1727 @probability.setter
1728 - def probability(self, value):
1729 try: 1730 value = float(value) 1731 except: 1732 raise TypeError('probability must be float, not {0}'.format(type(value))) 1733 self._probability = value
1734 1735 @property
1736 - def alignment(self):
1737 return self._alignment
1738 1739 @property
1740 - def length(self):
1741 try: 1742 return self.end - self.start + 1 1743 except: 1744 return 0
1745 1746 @property
1747 - def slength(self):
1748 return self._slength
1749 @slength.setter
1750 - def slength(self, value):
1751 self._slength = value
1752 1753 @property
1754 - def evalue(self):
1755 return self._evalue
1756 @evalue.setter
1757 - def evalue(self, value):
1758 self._evalue = value
1759 1760 @property
1761 - def pvalue(self):
1762 return self._pvalue
1763 @pvalue.setter
1764 - def pvalue(self, value):
1765 self._pvalue = value
1766 1767 @property
1768 - def score(self):
1769 return self._score
1770 @score.setter
1771 - def score(self, value):
1772 self._score = value
1773 1774 @property
1775 - def ss_score(self):
1776 return self._ss_score
1777 @ss_score.setter
1778 - def ss_score(self, value):
1779 self._ss_score = value
1780 1781 @property
1782 - def identity(self):
1783 return self._identity
1784 @identity.setter
1785 - def identity(self, value):
1786 self._identity = value
1787 1788 @property
1789 - def similarity(self):
1790 return self._similarity
1791 @similarity.setter
1792 - def similarity(self, value):
1793 self._similarity = value
1794 1795 @property
1796 - def prob_sum(self):
1797 return self._prob_sum
1798 @prob_sum.setter
1799 - def prob_sum(self, value):
1800 self._prob_sum = value
1801
1802 1803 -class HHpredHitList(object):
1804 """ 1805 Represents a collection of L{HHpredHit}s. 1806 """ 1807
1808 - def __init__(self, hits, query_name='', match_columns=-1, no_of_seqs='', 1809 neff=-1., searched_hmms=-1, date='', command=''):
1810 self._hits = list(hits) 1811 1812 self._query_name = None 1813 self._match_columns = None 1814 self._no_of_seqs = None 1815 self._neff = None 1816 self._searched_hmms = None 1817 self._date = None 1818 self._command = None 1819 1820 self.query_name = query_name 1821 self.match_columns = match_columns 1822 self.no_of_seqs = no_of_seqs 1823 self.neff = neff 1824 self.searched_hmms = searched_hmms 1825 self.date = date 1826 self.command = command
1827 1828 @property
1829 - def query_name(self):
1830 return self._query_name
1831 @query_name.setter
1832 - def query_name(self, value):
1833 self._query_name = value
1834 1835 @property
1836 - def match_columns(self):
1837 return self._match_columns
1838 @match_columns.setter
1839 - def match_columns(self, value):
1840 self._match_columns = value
1841 1842 @property
1843 - def no_of_seqs(self):
1844 return self._no_of_seqs
1845 @no_of_seqs.setter
1846 - def no_of_seqs(self, value):
1847 self._no_of_seqs = value
1848 1849 @property
1850 - def neff(self):
1851 return self._neff
1852 @neff.setter
1853 - def neff(self, value):
1854 self._neff = value
1855 1856 @property
1857 - def searched_hmms(self):
1858 return self._searched_hmms
1859 @searched_hmms.setter
1860 - def searched_hmms(self, value):
1861 self._searched_hmms = value
1862 1863 @property
1864 - def date(self):
1865 return self._date
1866 @date.setter
1867 - def date(self, value):
1868 self._date = value
1869 1870 @property
1871 - def command(self):
1872 return self._command
1873 @command.setter
1874 - def command(self, value):
1875 self._command = value
1876
1877 - def __str__(self):
1878 return "HHpredHitList\n\tquery={0.query_name}\n\tmatch_columns={0.match_columns}\n\tno_of_seqs={0.no_of_seqs}\n\tneff={0.neff}\n\tsearched_hmms={0.searched_hmms}\n\tdate={0.date}\n\tcommand={0.command}".format(self)
1879
1880 - def __repr__(self):
1881 return "<HHpredHitList: {0} hits>".format(len(self))
1882
1883 - def __getitem__(self, index):
1884 return self._hits[index]
1885
1886 - def __iter__(self):
1887 return iter(self._hits)
1888
1889 - def __len__(self):
1890 return len(self._hits)
1891
1892 - def sort(self):
1893 self._hits.sort(key=lambda i: i.rank)
1894