csb.bio.hmm

1 """ 2 HHpred and Hidden Markov Model APIs. 3 4 This package defines the abstractions for working with HHpred's HMMs and 5 hit lists. L{ProfileHMM} is the most important object of this module. 6 It describes a sequence profile hidden Markov model in the way HHpred 7 sees this concept: 8 9 - a profile is composed of a list of L{HMMLayer}s, which contain a 10 number of L{State}s 11 - these L{States} can be of different types: Match, Insertion Deletion, etc. 12 - a profile contains a multiple alignment, from which it is derived 13 - this multiple alignment is an A3M (condensed) Alignment, where the 14 first sequence is a master sequence 15 - the match states in all layers correspond to the residues of the master 16 sequence 17 18 L{ProfileHMM} objects provide list-like access to their layers: 19 20 >>> hmm.layers[1] 21 <HMMLayer> # first layer: layer at master residue=1 22 23 Every layer provides dictionary-like access to its states: 24 25 >>> layer[States.Match] 26 <Match State> 27 28 and every state provides dictionary-like access to its transitions to other 29 states: 30 31 >>> state = hmm.layers[1][States.match] 32 >>> state.transitions[States.Insertion] 33 <Transition> # Match > Insertion 34 >>> transition.predecessor 35 <Match State> # source state 36 >>> transition.successor 37 <Insertion State> # target state 38 39 Whether this transition points to a state at the same (i) or the next layer 40 (i+1) depends on the semantics of the source and the target states. 41 42 Building HMMs from scratch is supported through a number of C{append} methods 43 at various places: 44 45 >>> layer = HMMLayer(...) 46 >>> layer.append(State(...)) 47 >>> hmm.layers.append(layer) 48 49 See L{HMMLayersCollection}, L{HMMLayer}, L{EmissionTable} and L{TransitionTable} 50 for details. 51 """ 52 53 import sys 54 import math 55 56 import csb.core 57 import csb.io 58 import csb.bio.structure as structure 59 import csb.bio.sequence as sequence 60 61 from csb.core import Enum

62 63 64 -class UnobservableStateError(AttributeError):

65 pass

66

67 -class StateNotFoundError(csb.core.ItemNotFoundError):

68 pass

69

70 -class TransitionNotFoundError(StateNotFoundError):

71 pass

72

73 -class LayerIndexError(csb.core.CollectionIndexError):

74 pass

75

76 -class StateExistsError(KeyError):

77 pass

78

79 -class TransitionExistsError(KeyError):

80 pass

81

82 -class EmissionExistsError(KeyError):

83 pass

84

85 -class HMMArgumentError(ValueError):

86 pass

87

88 89 -class States(csb.core.enum):

90 """ 91 Enumeration of HMM state types 92 """ 93 Match='M'; Insertion='I'; Deletion='D'; Start='S'; End='E'

94

95 -class ScoreUnits(csb.core.enum):

96 """ 97 Enumeration of HMM emission and transition score units 98 """ 99 LogScales='LogScales'; Probability='Probability'

100 101 BACKGROUND = [ 0.076627178753322270, 0.018866884241976509, 0.053996136712517316, 102 0.059788009880742142, 0.034939432842683173, 0.075415244982547675, 103 0.036829356494115069, 0.050485048600600511, 0.059581159080509941, 104 0.099925728794059046, 0.021959667190729986, 0.040107059298840765, 105 0.045310838527464106, 0.032644867589507229, 0.051296350550656143, 106 0.046617000834108295, 0.071051060827250878, 0.072644631719882335, 107 0.012473412286822654, 0.039418044025976547 ] 108 """ 109 Background amino acid probabilities 110 """ 111 112 RELATIVE_SA = { 'A': 0.02, 'B': 0.14, 'C': 0.33, 'D': 0.55, 'E': 1.00 } 113 """ 114 Relative solvent accessibility codes (upper bounds) 115 """

116 117 118 -class ProfileHMM(object):

119 """ 120 Describes a protein profile Hidden Markov Model. 121 Optional parameters: 122 123 @param units: defines the units of the transition and emission scores 124 @type units: L{ScoreUnits} 125 @param scale: the scaling factor used to convert emission/transition 126 probabilities 127 @type scale: float 128 @param logbase: the base of the logarithm used for scaling the emission and 129 transition probabilities 130 @type logbase: float 131 """ 132

133 - def __init__(self, units=ScoreUnits.LogScales, scale=-1000., logbase=2):

134 135 self._name = None 136 self._id = None 137 self._family = None 138 self._length = ProfileLength(0, 0) 139 self._alignment = None 140 self._consensus = None 141 self._dssp = None 142 self._dssp_solvent = None 143 self._psipred = None 144 self._effective_matches = None 145 self._evd = EVDParameters(None, None) 146 self._version = None 147 self._pseudocounts = False 148 self._emission_pseudocounts = False 149 self._transition_pseudocounts = False 150 self._layers = HMMLayersCollection() 151 self._start = State(States.Start) 152 self._start_insertion = None 153 self._end = State(States.End) 154 self._scale = scale 155 self._logbase = logbase 156 if units is None: 157 self._score_units = ScoreUnits.LogScales 158 else: 159 self._score_units = units

160 161 @property

162 - def name(self):

163 """ 164 Profile name (NAME) 165 @rtype: str 166 """ 167 return self._name

168 @name.setter

169 - def name(self, value):

170 self._name = str(value)

171 172 @property

173 - def id(self):

174 """ 175 Profile entry ID (FILE) 176 @rtype: str 177 """ 178 return self._id

179 @id.setter

180 - def id(self, value):

181 self._id = str(value)

182 183 @property

184 - def family(self):

185 """ 186 Alternative entry ID (FAM) 187 @rtype: str 188 """ 189 return self._family

190 @family.setter

191 - def family(self, value):

192 self._family = str(value)

193 194 @property

195 - def length(self):

196 """ 197 Profile length 198 @rtype: L{ProfileLength} 199 """ 200 return self._length

201 @length.setter

202 - def length(self, value):

203 if not isinstance(value, ProfileLength): 204 raise TypeError(value) 205 self._length = value

206 207 @property

208 - def alignment(self):

209 """ 210 Source multiple alignment 211 @rtype: L{A3MAlignment} 212 """ 213 return self._alignment

214 @alignment.setter

215 - def alignment(self, value):

216 if not isinstance(value, sequence.A3MAlignment): 217 raise TypeError(value) 218 self._alignment = value

219 220 @property

221 - def consensus(self):

222 """ 223 Consensus sequence 224 @rtype: L{AbstractSequence} 225 """ 226 return self._consensus

227 @consensus.setter

228 - def consensus(self, value):

229 if not isinstance(value, sequence.AbstractSequence): 230 raise TypeError(value) 231 self._consensus = value

232 233 @property

234 - def dssp(self):

235 """ 236 DSSP (calculated) secondary structure 237 @rtype: L{SecondaryStructure} 238 """ 239 return self._dssp

240 @dssp.setter

241 - def dssp(self, value):

242 if not isinstance(value, structure.SecondaryStructure): 243 raise TypeError(value) 244 self._dssp = value

245 246 @property

247 - def dssp_solvent(self):

248 """ 249 Solvent accessibility values 250 @rtype: str 251 """ 252 return self._dssp_solvent

253 @dssp_solvent.setter

254 - def dssp_solvent(self, value):

255 self._dssp_solvent = str(value)

256 257 @property

258 - def psipred(self):

259 """ 260 PSIPRED (predicted) secondary structure 261 @rtype: L{SecondaryStructure} 262 """ 263 return self._psipred

264 @psipred.setter

265 - def psipred(self, value):

266 if not isinstance(value, structure.SecondaryStructure): 267 raise TypeError(value) 268 self._psipred = value

269 270 @property

271 - def effective_matches(self):

272 """ 273 Number of effective matches (NEFF) 274 """ 275 return self._effective_matches

276 @effective_matches.setter

277 - def effective_matches(self, value):

278 self._effective_matches = value

279 280 @property

281 - def evd(self):

282 """ 283 Extreme-value distribution parameters (EVD) 284 @rtype: L{EVDParameters} 285 """ 286 return self._evd

287 @evd.setter

288 - def evd(self, value):

289 if not isinstance(value, EVDParameters): 290 raise TypeError(value) 291 self._evd = value

292 293 @property

294 - def version(self):

295 """ 296 Format version number (HHsearch) 297 @rtype: str 298 """ 299 return self._version

300 @version.setter

301 - def version(self, value):

302 self._version = str(value)

303 304 @property

305 - def pseudocounts(self):

306 """ 307 @rtype: bool 308 """ 309 return self._pseudocounts

310 @pseudocounts.setter

311 - def pseudocounts(self, value):

312 self._pseudocounts = bool(value)

313 314 @property

315 - def emission_pseudocounts(self):

316 """ 317 @rtype: bool 318 """ 319 return self._emission_pseudocounts

320 @emission_pseudocounts.setter

321 - def emission_pseudocounts(self, value):

322 self._emission_pseudocounts = bool(value)

323 324 @property

325 - def transition_pseudocounts(self):

326 """ 327 @rtype: bool 328 """ 329 return self._transition_pseudocounts

330 @transition_pseudocounts.setter

331 - def transition_pseudocounts(self, value):

332 self._transition_pseudocounts = bool(value)

333 334 @property

335 - def layers(self):

336 """ 337 List-like access to the HMM's layers 338 @rtype: L{HMMLayersCollection} 339 """ 340 return self._layers

341 342 @property

343 - def start(self):

344 """ 345 Start state (at the start layer) 346 @rtype: L{State} 347 """ 348 return self._start

349 @start.setter

350 - def start(self, value):

351 if value is None or (isinstance(value, State) and value.type == States.Start): 352 self._start = value 353 else: 354 raise TypeError(value)

355 356 @property

357 - def start_insertion(self):

358 """ 359 Insertion state at the start layer 360 @rtype: L{State} 361 """ 362 return self._start_insertion

363 @start_insertion.setter

364 - def start_insertion(self, value):

365 if value is None or (isinstance(value, State) and value.type == States.Insertion): 366 self._start_insertion = value 367 else: 368 raise TypeError(value)

369 370 @property

371 - def end(self):

372 """ 373 Final state (at the end layer) 374 @rtype: L{State} 375 """ 376 return self._end

377 @end.setter

378 - def end(self, value):

379 if value is None or (isinstance(value, State) and value.type == States.End): 380 self._end = value 381 else: 382 raise TypeError(value)

383 384 @property

385 - def scale(self):

386 """ 387 Score scaling factor 388 @rtype: float 389 """ 390 return self._scale

391 392 @property

393 - def logbase(self):

394 """ 395 Base of the logarithm used for score scaling 396 @rtype: float 397 """ 398 return self._logbase

399 400 @property

401 - def score_units(self):

402 """ 403 Current score units 404 @rtype: L{ScoreUnits} member 405 """ 406 return self._score_units

407 408 @property

409 - def residues(self):

410 """ 411 List of representative residues, attached to each layer 412 @rtype: collection of L{Residue} 413 """ 414 res = [layer.residue for layer in self.layers] 415 return csb.core.ReadOnlyCollectionContainer( 416 res, type=structure.Residue, start_index=1)

417 418 @property

419 - def all_layers(self):

420 """ 421 A list of layers including start and start_insertion 422 @rtype: list of L{HMMLayer} 423 """ 424 complete_layers = [] 425 first_layer = HMMLayer(rank=0, residue=None) 426 first_layer.append(self.start) 427 if self.start_insertion: 428 first_layer.append(self.start_insertion) 429 complete_layers.append(first_layer) 430 for layer in self.layers: 431 complete_layers.append(layer) 432 433 return complete_layers

434 435 @property

436 - def has_structure(self):

437 """ 438 True if this profile contains structural data 439 @rtype: bool 440 """ 441 has = False 442 for layer in self.layers: 443 if layer.residue.has_structure: 444 return True 445 return has

446

447 - def serialize(self, file_name):

448 """ 449 Serialize this HMM to a file. 450 451 @param file_name: target file name 452 @type file_name: str 453 """ 454 rec = sys.getrecursionlimit() 455 sys.setrecursionlimit(10000) 456 csb.io.Pickle.dump(self, open(file_name, 'wb')) 457 sys.setrecursionlimit(rec)

458 459 @staticmethod

460 - def deserialize(file_name):

461 """ 462 De-serialize an HMM from a file. 463 464 @param file_name: source file name (pickle) 465 @type file_name: str 466 """ 467 rec = sys.getrecursionlimit() 468 sys.setrecursionlimit(10000) 469 try: 470 return csb.io.Pickle.load(open(file_name, 'rb')) 471 finally: 472 sys.setrecursionlimit(rec)

473

474 - def _convert(self, units, score, scale, logbase):

475 476 if units == ScoreUnits.Probability: 477 return logbase ** (score / scale) 478 elif units == ScoreUnits.LogScales: 479 if score == 0: 480 #score = sys.float_info.min 481 return None 482 return math.log(score, logbase) * scale 483 else: 484 raise ValueError('Unknown target unit {0}'.format(units))

485

486 - def to_hmm(self, output_file=None, convert_scores=False):

487 """ 488 Dump the profile in HHM format. 489 490 @param output_file: the output file name 491 @type output_file: str 492 @param convert_scores: if True, forces automatic convertion to 493 L{ScoreUnits}.LogScales, which is required 494 by the output file format 495 @type convert_scores: bool 496 """ 497 from csb.bio.io.hhpred import HHMFileBuilder 498 499 if convert_scores: 500 self.convert_scores(ScoreUnits.LogScales) 501 502 temp = csb.io.MemoryStream() 503 504 builder = HHMFileBuilder(temp) 505 builder.add_hmm(self) 506 507 data = temp.getvalue() 508 temp.close() 509 510 if not output_file: 511 return data 512 else: 513 with csb.io.EntryWriter(output_file, close=False) as out: 514 out.write(data)

515

516 - def segment(self, start, end):

517 """ 518 Extract a sub-segment of the profile. 519 520 @param start: start layer of the segment (rank) 521 @type start: int 522 @param end: end layer of the segment (rank) 523 @type end: int 524 525 @return: a deepcopy of the extracted HMM segment 526 @rtype: L{ProfileHMMSegment} 527 """ 528 return ProfileHMMSegment(self, start, end)

529

530 - def subregion(self, start, end):

531 532 return ProfileHMMRegion(self, start, end)

533

534 - def add_emission_pseudocounts(self, *a, **k):

535 """ 536 See L{csb.bio.hmm.pseudocounts.PseudocountBuilder} 537 """ 538 from csb.bio.hmm.pseudocounts import PseudocountBuilder 539 PseudocountBuilder(self).add_emission_pseudocounts(*a, **k)

540

541 - def add_transition_pseudocounts(self, *a, **k):

542 """ 543 See L{csb.bio.hmm.pseudocounts.PseudocountBuilder} 544 """ 545 from csb.bio.hmm.pseudocounts import PseudocountBuilder 546 PseudocountBuilder(self).add_transition_pseudocounts(*a, **k)

547

548 - def structure(self, chain_id=None, accession=None):

549 """ 550 Extract the structural information from the HMM. 551 552 @param accession: defines the accession number of the structure 553 @type accession: str 554 @param chain_id: defines explicitly the chain identifier 555 @type chain_id: str 556 557 @return: a shallow L{Structure} wrapper around the residues in the HMM. 558 @rtype: L{Structure} 559 """ 560 struct = structure.Structure(accession or self.id) 561 chain = self.chain(chain_id) 562 struct.chains.append(chain) 563 564 return struct

565

566 - def chain(self, chain_id=None):

567 """ 568 Extract the structural information from the HMM. 569 570 @param chain_id: defines explicitly the chain identifier 571 @type chain_id: str 572 573 @return: a shallow L{Chain} wrapper around the residues in the HMM. 574 @rtype: L{Chain} 575 """ 576 if chain_id is None: 577 if self.id: 578 chain_id = self.id.rstrip()[-1] 579 else: 580 chain_id = '_' 581 582 chain = structure.Chain(chain_id, type=sequence.SequenceTypes.Protein, 583 residues=self.residues) 584 chain._torsion_computed = True 585 return chain

586

587 - def emission_profile(self):

588 """ 589 Extract the emission scores of all match states in the profile. 590 The metric of the emission scores returned depends on the current 591 hmm.score_units setting - you may need to call hmm.convert_scores() 592 to adjust the hmm to your particular needs. 593 594 @return: a list of dictionaries; each dict key is a single amino acid 595 @rtype: list 596 """ 597 profile = [] 598 599 for layer in self.layers: 600 emission = {} 601 602 for aa in layer[States.Match].emission: 603 emission[str(aa)] = layer[States.Match].emission[aa] or 0.0 604 profile.append(emission) 605 606 return profile

607

608 - def convert_scores(self, units=ScoreUnits.Probability, method=None):

609 """ 610 Convert emission and transition scores to the specified units. 611 612 @param units: the target units for the conversion (a member of 613 L{ScoreUnits}). 614 @type units: L{csb.core.EnumItem} 615 @param method: if defined, implements the exact mathematical 616 transformation that will be applied. It must be a 617 function or lambda expression with the following 618 signature:: 619 def (target_units, score, scale, logbase) 620 621 and it has to return the score converted to 622 C{target_units}. If method performs a conversion from 623 probabilities to scaled logs, you should also update 624 C{hmm.scale} and C{hmm.logbase}. 625 @type method: function, lambda 626 """ 627 628 if self._score_units == units: 629 return 630 631 if method is not None: 632 convert = method 633 else: 634 convert = self._convert 635 636 for layer in self.layers: 637 for state_kind in layer: 638 state = layer[state_kind] 639 if not state.silent: 640 for residue in state.emission: 641 if state.emission[residue] is not None: 642 state.emission.update(residue, convert( 643 units, state.emission[residue], 644 self.scale, self.logbase)) 645 for residue in state.background: 646 if state.background[residue] is not None: 647 state.background.update(residue, convert( 648 units, state.background[residue], 649 self.scale, self.logbase)) 650 for tran_kind in state.transitions: 651 transition = state.transitions[tran_kind] 652 transition.probability = convert(units, transition.probability, 653 self.scale, self.logbase) 654 # The Neff-s are interger numbers and should not be transformed 655 # (except when writing the profile to a hhm file) 656 657 if self.start_insertion: 658 for t_it in self.start_insertion.transitions: 659 transition = self.start_insertion.transitions[t_it] 660 transition.probability = convert(units, transition.probability, 661 self.scale, self.logbase) 662 663 for residue in self.start_insertion.emission: 664 state = self.start_insertion 665 if state.emission[residue] is not None: 666 state.emission.update(residue, 667 convert(units, state.emission[residue], self.scale, self.logbase)) 668 state.background.update(residue, 669 convert(units, state.background[residue], self.scale, self.logbase)) 670 671 for tran_kind in self.start.transitions: 672 transition = self.start.transitions[tran_kind] 673 transition.probability = convert(units, 674 transition.probability, self.scale, self.logbase) 675 676 677 self._score_units = units

678

679 - def emission_similarity(self, other):

680 """ 681 Compute the Log-sum-of-odds score between the emission tables of self 682 and other (Soeding 2004). If no observable Match state is found at a 683 given layer, the Insertion state is used instead. 684 685 @note: This is not a full implementation of the formula since only 686 emission vectors are involved in the computation and any transition 687 probabilities are ignored. 688 689 @param other: the subject HMM 690 @type other: L{ProfileHMM} 691 692 @return: emission log-sum-of-odds similarity between C{self} and 693 C{other} 694 @rtype: float 695 696 @raise ValueError: when self and other differ in their length, when the 697 score_units are not Probability, or when no 698 observable states are present 699 """ 700 score = 1 701 702 if self.layers.length != other.layers.length or self.layers.length < 1: 703 raise ValueError('Both HMMs must have the same nonzero number of layers') 704 if self.score_units != ScoreUnits.Probability or \ 705 other.score_units != ScoreUnits.Probability: 706 raise ValueError('Scores must be converted to probabilities first.') 707 708 for q_layer, s_layer in zip(self.layers, other.layers): 709 710 try: 711 if States.Match in q_layer and not q_layer[States.Match].silent: 712 q_state = q_layer[States.Match] 713 else: 714 q_state = q_layer[States.Insertion] 715 716 if States.Match in s_layer and not s_layer[States.Match].silent: 717 s_state = s_layer[States.Match] 718 else: 719 s_state = s_layer[States.Insertion] 720 except csb.core.ItemNotFoundError: 721 raise ValueError('Query and subject must contain observable states ' 722 'at each layer') 723 724 emission_dotproduct = 0 725 726 for aa in q_state.emission: 727 728 q_emission = q_state.emission[aa] or sys.float_info.min 729 s_emission = s_state.emission[aa] or sys.float_info.min 730 emission_dotproduct += (q_emission * s_emission / 731 q_state.background[aa]) 732 733 score *= emission_dotproduct 734 735 return math.log(score)

736

737 - def _assign_secstructure(self):

738 """ 739 Attach references from each profile layer to the relevant DSSP secondary 740 structure element. 741 """ 742 assert self.dssp is not None 743 744 for motif in self.dssp: 745 for i in range(motif.start, motif.end + 1): 746 self.layers[i].residue.secondary_structure = motif

747

748 749 -class ProfileHMMSegment(ProfileHMM):

750 """ 751 Represents a segment (fragment) of a ProfileHMM. 752 753 @param hmm: source HMM 754 @type hmm: ProfileHMM 755 @param start: start layer of the segment (rank) 756 @type start: int 757 @param end: end layer of the segment (rank) 758 @type end: int 759 760 @raise ValueError: when start or end positions are out of range 761 """ 762

763 - def __init__(self, hmm, start, end):

764 765 if start < hmm.layers.start_index or start > hmm.layers.last_index: 766 raise IndexError('Start position {0} is out of range'.format(start)) 767 if end < hmm.layers.start_index or end > hmm.layers.last_index: 768 raise IndexError('End position {0} is out of range'.format(end)) 769 770 #hmm = csb.core.deepcopy(hmm) 771 772 super(ProfileHMMSegment, self).__init__(units=hmm.score_units, 773 scale=hmm.scale, logbase=hmm.logbase) 774 self.id = hmm.id 775 self.family = hmm.family 776 self.name = hmm.name 777 self.pseudocounts = hmm.pseudocounts 778 self.evd = hmm.evd 779 self.version = hmm.version 780 self.source = hmm.id 781 self._source_start = start 782 self._source_end = end 783 784 if hmm.alignment: 785 self.alignment = hmm.alignment.hmm_subregion(start, end) 786 self.consensus = hmm.consensus.subregion(start, end) 787 788 layers = csb.core.deepcopy(hmm.layers[start : end + 1]) 789 max_score = 1.0 790 if hmm.score_units != ScoreUnits.Probability: 791 max_score = hmm._convert(hmm.score_units, 792 max_score, hmm.scale, hmm.logbase) 793 self._build_graph(layers, max_score) 794 795 if hmm.dssp: 796 self.dssp = hmm.dssp.subregion(start, end) 797 self._assign_secstructure() 798 if hmm.psipred: 799 self.psipred = hmm.psipred.subregion(start, end) 800 801 self.length.layers = self.layers.length 802 self.length.matches = self.layers.length 803 self.effective_matches = sum([(l.effective_matches or 0.0) for l in self.layers]) / self.layers.length

804 805 @property

806 - def source_start(self):

807 """ 808 Start position of this segment in its source HMM 809 @rtype: int 810 """ 811 return self._source_start

812 813 @property

814 - def source_end(self):

815 """ 816 End position of this segment in its source HMM 817 @rtype: int 818 """ 819 return self._source_end

820

821 - def _build_graph(self, source_layers, max_score):

822 823 for rank, layer in enumerate(source_layers, start=1): 824 825 for atom_kind in layer.residue.atoms: 826 layer.residue.atoms[atom_kind].rank = rank 827 layer.residue._rank = rank 828 layer.rank = rank 829 830 self.layers.append(layer) 831 832 if rank == 1: 833 for state_kind in layer: 834 if state_kind in(States.Match, States.Deletion): 835 start_tran = Transition(self.start, layer[state_kind], max_score) 836 self.start.transitions.append(start_tran) 837 elif rank == len(source_layers): 838 for state_kind in layer: 839 state = layer[state_kind] 840 if not (States.End in state.transitions or States.Match in state.transitions): 841 state.transitions.set({}) 842 else: 843 end_tran = Transition(state, self.end, max_score) 844 state.transitions.set({States.End: end_tran}) # TODO: I->I ?

845

846 847 -class EmissionProfileSegment(ProfileHMMSegment):

848 """ 849 Represents a segment of the Match state emission probabilities of a L{ProfileHMM}. 850 Contains only Match states, connected with equal transition probabilities of 100%. 851 """ 852

853 - def _build_graph(self, source_layers):

854 855 factory = StateFactory() 856 857 for rank, source_layer in enumerate(source_layers, start=1): 858 859 emission = source_layer[States.Match].emission 860 background = source_layer[States.Match].background 861 862 match = factory.create_match(emission, background) 863 match.rank = rank 864 865 layer = HMMLayer(rank, source_layer.residue) 866 layer.append(match) 867 self.layers.append(layer) 868 869 if rank == 1: 870 self.start.transitions.append(Transition(self.start, match, 1.0)) 871 elif rank < len(source_layers): 872 prev_match = self.layers[rank - 1][States.Match] 873 prev_match.transitions.append(Transition(prev_match, match, 1.0)) 874 elif rank == len(source_layers): 875 match.transitions.append(Transition(match, self.end, 1.0)) 876 else: 877 assert False

878

879 880 -class ProfileHMMRegion(ProfileHMM):

881 """ 882 A shallow proxy referring to a sub-region of a given Profile HMM. 883 884 @param hmm: source HMM 885 @type hmm: L{ProfileHMM} 886 @param start: start layer of the segment (rank) 887 @type start: int 888 @param end: end layer of the segment (rank) 889 @type end: int 890 891 @raise ValueError: when start or end positions are out of range 892 """ 893

894 - def __init__(self, hmm, start, end):

895 896 if start < hmm.layers.start_index or start > hmm.layers.last_index: 897 raise IndexError('Start position {0} is out of range'.format(start)) 898 if end < hmm.layers.start_index or end > hmm.layers.last_index: 899 raise IndexError('End position {0} is out of range'.format(end)) 900 if hmm.score_units != ScoreUnits.Probability: 901 raise ValueError('Scores must be converted to probabilities first.') 902 903 self._layers = HMMLayersCollection(hmm.layers[start : end + 1]) 904 self._score_units = hmm.score_units 905 self.id = hmm.id 906 self.name = hmm.name 907 self.family = hmm.family 908 self._source_start = start 909 self._source_end = end

910 911 @property

912 - def source_start(self):

913 """ 914 Start position of this segment in its source HMM 915 @rtype: int 916 """ 917 return self._source_start

918 919 @property

920 - def source_end(self):

921 """ 922 End position of this segment in its source HMM 923 @rtype: int 924 """ 925 return self._source_end

926

927 928 -class ProfileLength(object):

929

930 - def __init__(self, matches, layers):

931 self.matches = matches 932 self.layers = layers

933

934 935 -class EVDParameters(object):

936

937 - def __init__(self, lamda, mu):

938 self.lamda = lamda 939 self.mu = mu

940

941 - def __nonzero__(self):

942 return self.__bool__()

943

944 - def __bool__(self):

945 return (self.lamda is not None or self.mu is not None)

946

947 948 -class EmissionTable(csb.core.DictionaryContainer):

949 """ 950 Represents a lookup table of emission probabilities. Provides dictionary-like 951 access: 952 953 >>> state.emission[ProteinAlphabet.ALA] 954 emission probability for ALA 955 956 @param emission: an initialization dictionary of emission probabilities 957 @type emission: dict 958 @param restrict: a list of residue types allowed for this emission table. 959 Defaults to the members of L{csb.bio.sequence.ProteinAlphabet} 960 @type restrict: list 961 """ 962

963 - def __init__(self, emission=None, restrict=Enum.members(sequence.ProteinAlphabet)):

964 super(EmissionTable, self).__init__(emission, restrict)

965

966 - def append(self, residue, probability):

967 """ 968 Append a new emission probability to the table. 969 970 @param residue: residue name (type) - a member of 971 L{csb.bio.sequence.ProteinAlphabet} 972 @type residue: L{csb.core.EnumItem} 973 @param probability: emission score 974 @type probability: float 975 976 @raise EmissionExistsError: if residue is already defined 977 """ 978 if residue in self: 979 raise EmissionExistsError('Residue {0} is already defined.'.format(residue)) 980 981 super(EmissionTable, self).append(residue, probability)

982

983 - def set(self, table):

984 """ 985 Set the emission table using the dictionary provided in the argument. 986 987 @param table: the new emission table 988 @type table: dict 989 """ 990 super(EmissionTable, self)._set(table)

991

992 - def update(self, residue, probability):

993 """ 994 Update the emission C{probability} of a given emission C{residue}. 995 996 @param residue: name (type) of the residue to be updated 997 @type residue: L{csb.core.EnumItem} 998 @param probability: new emission score 999 @type probability: float 1000 """ 1001 super(EmissionTable, self)._update({residue: probability})

1002

1003 1004 -class TransitionTable(csb.core.DictionaryContainer):

1005 """ 1006 Represents a lookup table of transitions that are possible from within a given state. 1007 1008 Provides dictionary-like access, where dictionary keys are target states. 1009 These are members of the L{States} enumeration, e.g.: 1010 1011 >>> state.transitions[States.Match] 1012 transition info regarding transition from the current state to a Match state 1013 >>> state.transitions[States.Match].predecessor 1014 state 1015 >>> state.transitions[States.Match].successor 1016 the next match state 1017 1018 @param transitions: an initialization dictionary of target L{State}:L{Transition} pairs 1019 @type transitions: dict 1020 @param restrict: a list of target states allowed for this transition table. 1021 Defaults to the L{States} enum members 1022 @type restrict: list 1023 """ 1024

1025 - def __init__(self, transitions=None, restrict=Enum.members(States)):

1026 super(TransitionTable, self).__init__(transitions, restrict) 1027 1028 @property

1029 - def _exception(self):

1030 return TransitionNotFoundError

1031

1032 - def append(self, transition):

1033 """ 1034 Append a new C{transition} to the table. 1035 1036 @param transition: transition info 1037 @type transition: L{Transition} 1038 1039 @raise TransitionExistsError: when a transition to the same target state 1040 already exists for the current state 1041 """ 1042 1043 if transition.successor.type in self: 1044 msg = 'Transition to a {0} state is already defined.' 1045 raise TransitionExistsError(msg.format(transition.successor.type)) 1046 1047 super(TransitionTable, self).append(transition.successor.type, transition)

1048

1049 - def set(self, table):

1050 """ 1051 Set the transition table using the dictionary provided in the argument. 1052 1053 @param table: the new transition table 1054 @type table: dict 1055 """ 1056 super(TransitionTable, self)._set(table)

1057

1058 - def update(self, target_statekind, transition):

1059 """ 1060 Update the information of a transition, which points to a target 1061 state of the specified L{States} kind. 1062 1063 @param target_statekind: the key of the transition to be updated 1064 @type target_statekind: L{csb.core.EnumItem} 1065 @param transition: new transition info object 1066 @type transition: L{Transition} 1067 1068 @raise ValueError: if I{transition.successor.type} differs from 1069 C{target_statekind} 1070 """ 1071 if transition.successor.type != target_statekind: 1072 raise ValueError("Successor's type differs from the specified target state.") 1073 1074 super(TransitionTable, self)._update({target_statekind: transition})

1075

1076 1077 -class HMMLayersCollection(csb.core.CollectionContainer):

1078 """ 1079 Provides consecutive, 1-based access to all of the layers in the profile. 1080 Each profile layer contains a catalog of available states at that index, e.g.: 1081 1082 >>> profile.layers[i] 1083 the catalog at profile layer i 1084 >>> profile.layers[i][States.Deletion] 1085 the deletion state at index i 1086 1087 @param layers: initialization list of L{HMMLayer}s 1088 @type layers: list 1089 """

1090 - def __init__(self, layers=None):

1091 super(HMMLayersCollection, self).__init__(layers, type=HMMLayer, start_index=1)

1092 1093 @property

1094 - def _exception(self):

1095 return LayerIndexError

1096

1097 -class HMMLayer(csb.core.DictionaryContainer):

1098 """ 1099 Provides a dictionary-like catalog of the available states at this layer. 1100 Lookup keys are members of the L{States} enumeration, e.g.: 1101 1102 >>> profile.layers[i][States.Deletion] 1103 the deletion state at layer number i 1104 1105 @param rank: layer's number 1106 @type rank: int 1107 @param residue: a representative L{ProteinResidue} that is associated with 1108 this layer 1109 @type residue: L{ProteinResidue} 1110 @param states: initialization dictionary of L{States}.Item:L{State} pairs 1111 @type states: dict 1112 """

1113 - def __init__(self, rank, residue, states=None):

1114 1115 super(HMMLayer, self).__init__(states, restrict=Enum.members(States)) 1116 1117 self._rank = int(rank) 1118 self._residue = None 1119 self._effective_matches = None 1120 self._effective_insertions = None 1121 self._effective_deletions = None 1122 1123 self.residue = residue

1124 1125 @property

1126 - def _exception(self):

1127 return StateNotFoundError

1128 1129 @property

1130 - def rank(self):

1131 """ 1132 Layer's position 1133 @rtype: int 1134 """ 1135 return self._rank

1136 @rank.setter

1137 - def rank(self, value):

1138 self._rank = int(value)

1139 1140 @property

1141 - def residue(self):

1142 """ 1143 Representative residue 1144 @rtype: L{Residue} 1145 """ 1146 return self._residue

1147 @residue.setter

1148 - def residue(self, residue):

1149 if residue and residue.type == sequence.SequenceAlphabets.Protein.GAP: 1150 raise HMMArgumentError('HMM match states cannot be gaps') 1151 self._residue = residue

1152 1153 @property

1154 - def effective_matches(self):

1155 """ 1156 Number of effective matches at this layer 1157 @rtype: int 1158 """ 1159 return self._effective_matches

1160 @effective_matches.setter

1161 - def effective_matches(self, value):

1162 self._effective_matches = value

1163 1164 @property

1165 - def effective_insertions(self):

1166 """ 1167 Number of effective insertions at this layer 1168 @rtype: int 1169 """ 1170 return self._effective_insertions

1171 @effective_insertions.setter

1172 - def effective_insertions(self, value):

1173 self._effective_insertions = value

1174 1175 @property

1176 - def effective_deletions(self):

1177 """ 1178 Number of effective deletions at this layer 1179 @rtype: int 1180 """ 1181 return self._effective_deletions

1182 @effective_deletions.setter

1183 - def effective_deletions(self, value):

1184 self._effective_deletions = value

1185

1186 - def append(self, state):

1187 """ 1188 Append a new C{state} to the catalog. 1189 1190 @param state: the new state 1191 @type state: L{State} 1192 1193 @raise StateExistsError: when a state of the same type is already defined 1194 """ 1195 if state.type in self: 1196 raise StateExistsError( 1197 'State {0} is already defined at this position.'.format(state.type)) 1198 1199 super(HMMLayer, self).append(state.type, state)

1200

1201 - def update(self, state_kind, state):

1202 """ 1203 Update the sate of the specified kind under the current layer. 1204 1205 @param state_kind: state type (key) - a member of L{States} 1206 @type state_kind: L{csb.core.EnumItem} 1207 @param state: the new state info 1208 @type state: L{State} 1209 1210 @raise ValueError: if state.type differs from state_kind 1211 """ 1212 if state.type != state_kind: 1213 raise ValueError("State's type differs from the specified state_kind") 1214 1215 super(HMMLayer, self)._update({state_kind: state})

1216

1217 1218 -class State(object):

1219 """ 1220 Describes a Hidden Markov Model state. 1221 1222 @param type: one of the L{States} enumeration values, e.g. States.Match 1223 @type type: L{csb.core.EnumItem} 1224 @param emit: a collection of emittable state names allowed for the state, 1225 e.g. the members of I{SequenceAlphabets.Protein}. If not defined, 1226 the state will be created as a silent (unobservable). 1227 @type emit: list 1228 1229 @raise ValueError: if type is not a member of the States enum 1230 """ 1231

1232 - def __init__(self, type, emit=None):

1233 1234 self._type = None 1235 self._rank = None 1236 self._transitions = TransitionTable() 1237 self._emission = None 1238 self._background = None 1239 1240 self.type = type 1241 1242 if emit is not None: 1243 self._emission = EmissionTable(restrict=emit) 1244 self._background = EmissionTable(restrict=emit)

1245

1246 - def __repr__(self):

1247 return "<HMM {0.type!r} State>".format(self)

1248 1249 @property

1250 - def type(self):

1251 """ 1252 State type: one of the L{States} 1253 @rtype: enum item 1254 """ 1255 return self._type

1256 @type.setter

1257 - def type(self, value):

1258 if value.enum is not States: 1259 raise TypeError(value) 1260 self._type = value

1261 1262 @property

1263 - def rank(self):

1264 return self._rank

1265 @rank.setter

1266 - def rank(self, value):

1267 self._rank = int(value)

1268 1269 @property

1270 - def transitions(self):

1271 """ 1272 Lookup table with available transitions to other states 1273 @rtype: L{TransitionTable} 1274 """ 1275 return self._transitions

1276 1277 @property

1278 - def emission(self):

1279 """ 1280 Lookup table with available emission probabilities 1281 @rtype: L{EmissionTable} 1282 """ 1283 if self._emission is None: 1284 raise UnobservableStateError('Silent {0!r} state'.format(self.type)) 1285 return self._emission

1286 1287 @property

1288 - def background(self):

1289 """ 1290 Lookup table with background probabilities 1291 @rtype: L{EmissionTable} 1292 """ 1293 return self._background

1294 1295 @property

1296 - def silent(self):

1297 """ 1298 Whether this state can emit something 1299 @rtype: bool 1300 """ 1301 try: 1302 return self.emission is None 1303 except UnobservableStateError: 1304 return True

1305

1306 -class StateFactory(object):

1307 """ 1308 Simplifies the construction of protein profile HMM states. 1309 """ 1310

1311 - def __init__(self):

1312 self._aa = Enum.members(sequence.ProteinAlphabet)

1313

1314 - def create_match(self, emission, background):

1315 1316 state = State(States.Match, emit=self._aa) 1317 state.emission.set(emission) 1318 state.background.set(background) 1319 return state

1320

1321 - def create_insertion(self, background):

1322 1323 state = State(States.Insertion, emit=self._aa) 1324 state.emission.set(background) 1325 state.background.set(background) 1326 return state

1327

1328 - def create_deletion(self):

1329 return State(States.Deletion)

1330

1331 1332 -class TransitionType(object):

1333

1334 - def __init__(self, source, target):

1335 self.source_state = source.type 1336 self.target_state = target.type

1337

1338 - def __repr__(self):

1339 return '{0}->{1}'.format(self.source_state, self.target_state)

1340

1341 -class Transition(object):

1342 """ 1343 Describes a Hidden Markov Model transition between two states. 1344 1345 @param predecessor: source state 1346 @type predecessor: L{State} 1347 @param successor: target state 1348 @type successor: L{State} 1349 @param probability: transition score 1350 @type probability: float 1351 """ 1352

1353 - def __init__(self, predecessor, successor, probability):

1354 1355 if not (isinstance(predecessor, State) or isinstance(successor, State)): 1356 raise TypeError('Predecessor and successor must be State instances.') 1357 1358 self._predecessor = predecessor 1359 self._successor = successor 1360 self._probability = None 1361 self._type = TransitionType(predecessor, successor) 1362 1363 self.probability = probability

1364

1365 - def __str__(self):

1366 return '<HMM Transition: {0.type} {0.probability}>'.format(self)

1367 1368 @property

1369 - def predecessor(self):

1370 """ 1371 Transition source state 1372 @rtype: L{State} 1373 """ 1374 return self._predecessor

1375 1376 @property

1377 - def successor(self):

1378 """ 1379 Transition target state 1380 @rtype: L{State} 1381 """ 1382 return self._successor

1383 1384 @property

1385 - def probability(self):

1386 """ 1387 Transition score 1388 @rtype: float 1389 """ 1390 return self._probability

1391 @probability.setter

1392 - def probability(self, value):

1393 if not (value >=0): 1394 raise ValueError('Transition probability must be a positive number.') 1395 self._probability = float(value)

1396 1397 @property

1398 - def type(self):

1399 """ 1400 Struct, containing information about the source and target state types 1401 @rtype: L{TransitionType} 1402 """ 1403 return self._type

1404

1405 1406 -class HHpredHitAlignment(sequence.SequenceAlignment):

1407 """ 1408 Represents a query-template alignment in an HHpred result. 1409 1410 @param hit: relevant hit object 1411 @type param: L{HHpredHit} 1412 @param query: the query sequence in the alignment region, with gaps 1413 @type query: str 1414 @param subject: the subject sequence in the alignment region, with gaps 1415 @type subject: str 1416 """ 1417 1418 GAP = sequence.ProteinAlphabet.GAP 1419

1420 - def __init__(self, hit, query, subject):

1421 1422 if not isinstance(hit, HHpredHit): 1423 raise TypeError(hit) 1424 1425 self._hit = hit 1426 1427 q = sequence.Sequence('query', '', ''.join(query), type=sequence.SequenceTypes.Protein) 1428 s = sequence.Sequence(hit.id, '', ''.join(subject), type=sequence.SequenceTypes.Protein) 1429 1430 super(HHpredHitAlignment, self).__init__((q, s))

1431 1432 @property

1433 - def query(self):

1434 """ 1435 Query sequence (with gaps) 1436 @rtype: str 1437 """ 1438 return self.rows[1].sequence

1439 1440 @property

1441 - def subject(self):

1442 """ 1443 Subject sequence (with gaps) 1444 @rtype: str 1445 """ 1446 return self.rows[2].sequence

1447 1448 @property

1449 - def segments(self):

1450 """ 1451 Find all ungapped query-subject segments in the alignment. 1452 Return a generator over all ungapped alignment segments, represented 1453 by L{HHpredHit} objects 1454 1455 @rtype: generator 1456 """ 1457 1458 def make_segment(sstart, send, qstart, qend): 1459 1460 seg = HHpredHit(self._hit.rank, self._hit.id, sstart, send, 1461 qstart, qend, self._hit.probability, self._hit.qlength) 1462 1463 seg.slength = self._hit.slength 1464 seg.evalue = self._hit.evalue 1465 seg.pvalue = self._hit.pvalue 1466 seg.score = self._hit.score 1467 seg.ss_score = self._hit.ss_score 1468 seg.identity = self._hit.identity 1469 seg.similarity = self._hit.similarity 1470 seg.prob_sum = self._hit.prob_sum 1471 1472 return seg

1473 1474 in_segment = False 1475 qs = self._hit.qstart - 1 1476 ss = self._hit.start - 1 1477 qi, si = qs, ss 1478 qe, se = qs, ss 1479 1480 for q, s in zip(self.query, self.subject): 1481 1482 if q != HHpredHitAlignment.GAP: 1483 qi += 1 1484 if s != HHpredHitAlignment.GAP: 1485 si += 1 1486 1487 if HHpredHitAlignment.GAP in (q, s): 1488 if in_segment: 1489 yield make_segment(ss, se, qs, qe) 1490 in_segment = False 1491 qs, ss = 0, 0 1492 qe, se = 0, 0 1493 else: 1494 if not in_segment: 1495 in_segment = True 1496 qs, ss = qi, si 1497 1498 qe, se = qi, si 1499 1500 if in_segment: 1501 yield make_segment(ss, se, qs, qe)

1502

1503 - def to_a3m(self):

1504 """ 1505 @return: a query-centric A3M alignment. 1506 @rtype: L{csb.bio.sequence.A3MAlignment} 1507 """ 1508 a3m = self.format(sequence.AlignmentFormats.A3M) 1509 return sequence.A3MAlignment.parse(a3m, strict=False)

1510

1511 -class HHpredHit(object):

1512 """ 1513 Represents a single HHsearch hit. 1514 1515 @param rank: rank of the hit 1516 @type rank: int 1517 @param id: id of the hit 1518 @type id: str 1519 @param start: subject start 1520 @type start: int 1521 @param end: subject end 1522 @type end: int 1523 @param qstart: query start 1524 @type qstart: int 1525 @param qend: query end 1526 @type qend: int 1527 @param probability: probability of the hit 1528 @type probability: float 1529 @param qlength: length of the query 1530 @type qlength: int 1531 """ 1532

1533 - def __init__(self, rank, id, start, end, qstart, qend, probability, 1534 qlength):

1535 1536 self._rank = None 1537 self._id = None 1538 self._start = None 1539 self._end = None 1540 self._qstart = None 1541 self._qend = None 1542 self._probability = None 1543 self._qlength = None 1544 self._alignment = None 1545 1546 self._slength = None 1547 self._evalue = None 1548 self._pvalue = None 1549 self._score = None 1550 self._ss_score = None 1551 self._identity = None 1552 self._similarity = None 1553 self._prob_sum = None 1554 1555 # managed properties 1556 self.rank = rank 1557 self.id = id 1558 self.start = start 1559 self.end = end 1560 self.qstart = qstart 1561 self.qend = qend 1562 self.probability = probability 1563 self.qlength = qlength

1564

1565 - def __str__(self):

1566 return "{0.id} {0.probability} {0.start}-{0.end}".format(self)

1567

1568 - def __repr__(self):

1569 return "<HHpredHit: {0!s}>".format(self)

1570

1571 - def __lt__(self, other):

1572 return self.rank < other.rank

1573

1574 - def equals(self, other):

1575 """ 1576 Return True if C{self} is completely identical to C{other} (same id, same start 1577 and end positions). 1578 1579 @param other: right-hand-term 1580 @type other: HHpredHit 1581 1582 @rtype: bool 1583 """ 1584 return (self.id == other.id and self.start == other.start and self.end == other.end)

1585

1586 - def surpasses(self, other):

1587 """ 1588 Return True if C{self} is a superior to C{other} in terms of length 1589 and probability. These criteria are applied in the following order: 1590 1591 1. Length (the longer hit is better) 1592 2. Probability (if they have the same length, the one with the higher 1593 probability is better) 1594 3. Address (if they have the same length and probability, the one with 1595 higher memory ID wins; for purely practical reasons) 1596 1597 @param other: right-hand-term 1598 @type other: HHpredHit 1599 1600 @rtype: bool 1601 """ 1602 if self.length > other.length: 1603 return True 1604 elif self.length == other.length: 1605 if self.probability > other.probability: 1606 return True 1607 elif self.probability == other.probability: 1608 if id(self) > id(other): 1609 return True 1610 return False

1611

1612 - def includes(self, other, tolerance=1):

1613 """ 1614 Return True if C{other} overlaps with C{self}, that means C{other} 1615 is fully or partially included in C{self} when aligned over the query. 1616 1617 @param other: right-hand-term 1618 @type other: HHpredHit 1619 @param tolerance: allow partial overlaps for that number of residues at 1620 either end 1621 @type tolerance: int 1622 1623 @rtype: bool 1624 """ 1625 if self.id == other.id: 1626 if other.start >= self.start: 1627 if (other.end - self.end) <= tolerance: 1628 return True 1629 elif other.end <= self.end: 1630 if (self.start - other.start) <= tolerance: 1631 return True 1632 1633 return False

1634

1635 - def add_alignment(self, query, subject):

1636 """ 1637 Add query/subject alignment to the hit. 1638 1639 @param query: the query sequence within the alignment region, with gaps 1640 @type query: str 1641 @param subject: the subject sequence within the alignment region, with 1642 gaps 1643 @type subject: str 1644 """ 1645 self._alignment = HHpredHitAlignment(self, query, subject)

1646 1647 @property

1648 - def rank(self):

1649 return self._rank

1650 @rank.setter

1651 - def rank(self, value):

1652 try: 1653 value = int(value) 1654 except: 1655 raise TypeError('rank must be int, not {1}'.format(type(value))) 1656 self._rank = value

1657 1658 @property

1659 - def id(self):

1660 return self._id

1661 @id.setter

1662 - def id(self, value):

1663 try: 1664 value = str(value) 1665 except: 1666 raise TypeError('id must be string, not {0}'.format(type(value))) 1667 self._id = value

1668 1669 @property

1670 - def start(self):

1671 return self._start

1672 @start.setter

1673 - def start(self, value):

1674 try: 1675 value = int(value) 1676 except: 1677 raise TypeError('start must be int, not {0}'.format(type(value))) 1678 self._start = value

1679 1680 @property

1681 - def end(self):

1682 return self._end

1683 @end.setter

1684 - def end(self, value):

1685 try: 1686 value = int(value) 1687 except: 1688 raise TypeError('end must be int, not {0}'.format(type(value))) 1689 self._end = value

1690 1691 @property

1692 - def qstart(self):

1693 return self._qstart

1694 @qstart.setter

1695 - def qstart(self, value):

1696 try: 1697 value = int(value) 1698 except: 1699 raise TypeError('qstart must be int, not {0}'.format(type(value))) 1700 self._qstart = value

1701 1702 @property

1703 - def qend(self):

1704 return self._qend

1705 @qend.setter

1706 - def qend(self, value):

1707 try: 1708 value = int(value) 1709 except: 1710 raise TypeError('qend must be int, not {0}'.format(type(value))) 1711 self._qend = value

1712 1713 @property

1714 - def qlength(self):

1715 return self._qlength

1716 @qlength.setter

1717 - def qlength(self, value):

1718 try: 1719 value = int(value) 1720 except: 1721 raise TypeError('qlength must be int, not {0}'.format(type(value))) 1722 self._qlength = value

1723 1724 @property

1725 - def probability(self):

1726 return self._probability

1727 @probability.setter

1728 - def probability(self, value):

1729 try: 1730 value = float(value) 1731 except: 1732 raise TypeError('probability must be float, not {0}'.format(type(value))) 1733 self._probability = value

1734 1735 @property

1736 - def alignment(self):

1737 return self._alignment

1738 1739 @property

1740 - def length(self):

1741 try: 1742 return self.end - self.start + 1 1743 except: 1744 return 0

1745 1746 @property

1747 - def slength(self):

1748 return self._slength

1749 @slength.setter

1750 - def slength(self, value):

1751 self._slength = value

1752 1753 @property

1754 - def evalue(self):

1755 return self._evalue

1756 @evalue.setter

1757 - def evalue(self, value):

1758 self._evalue = value

1759 1760 @property

1761 - def pvalue(self):

1762 return self._pvalue

1763 @pvalue.setter

1764 - def pvalue(self, value):

1765 self._pvalue = value

1766 1767 @property

1768 - def score(self):

1769 return self._score

1770 @score.setter

1771 - def score(self, value):

1772 self._score = value

1773 1774 @property

1775 - def ss_score(self):

1776 return self._ss_score

1777 @ss_score.setter

1778 - def ss_score(self, value):

1779 self._ss_score = value

1780 1781 @property

1782 - def identity(self):

1783 return self._identity

1784 @identity.setter

1785 - def identity(self, value):

1786 self._identity = value

1787 1788 @property

1789 - def similarity(self):

1790 return self._similarity

1791 @similarity.setter

1792 - def similarity(self, value):

1793 self._similarity = value

1794 1795 @property

1796 - def prob_sum(self):

1797 return self._prob_sum

1798 @prob_sum.setter

1799 - def prob_sum(self, value):

1800 self._prob_sum = value

1801

1802 1803 -class HHpredHitList(object):

1804 """ 1805 Represents a collection of L{HHpredHit}s. 1806 """ 1807

1808 - def __init__(self, hits, query_name='', match_columns=-1, no_of_seqs='', 1809 neff=-1., searched_hmms=-1, date='', command=''):

1810 self._hits = list(hits) 1811 1812 self._query_name = None 1813 self._match_columns = None 1814 self._no_of_seqs = None 1815 self._neff = None 1816 self._searched_hmms = None 1817 self._date = None 1818 self._command = None 1819 1820 self.query_name = query_name 1821 self.match_columns = match_columns 1822 self.no_of_seqs = no_of_seqs 1823 self.neff = neff 1824 self.searched_hmms = searched_hmms 1825 self.date = date 1826 self.command = command

1827 1828 @property

1829 - def query_name(self):

1830 return self._query_name

1831 @query_name.setter

1832 - def query_name(self, value):

1833 self._query_name = value

1834 1835 @property

1836 - def match_columns(self):

1837 return self._match_columns

1838 @match_columns.setter

1839 - def match_columns(self, value):

1840 self._match_columns = value

1841 1842 @property

1843 - def no_of_seqs(self):

1844 return self._no_of_seqs

1845 @no_of_seqs.setter

1846 - def no_of_seqs(self, value):

1847 self._no_of_seqs = value

1848 1849 @property

1850 - def neff(self):

1851 return self._neff

1852 @neff.setter

1853 - def neff(self, value):

1854 self._neff = value

1855 1856 @property

1857 - def searched_hmms(self):

1858 return self._searched_hmms

1859 @searched_hmms.setter

1860 - def searched_hmms(self, value):

1861 self._searched_hmms = value

1862 1863 @property

1864 - def date(self):

1865 return self._date

1866 @date.setter

1867 - def date(self, value):

1868 self._date = value

1869 1870 @property

1871 - def command(self):

1872 return self._command

1873 @command.setter

1874 - def command(self, value):

1875 self._command = value

1876

1877 - def __str__(self):

1878 return "HHpredHitList\n\tquery={0.query_name}\n\tmatch_columns={0.match_columns}\n\tno_of_seqs={0.no_of_seqs}\n\tneff={0.neff}\n\tsearched_hmms={0.searched_hmms}\n\tdate={0.date}\n\tcommand={0.command}".format(self)

1879

1880 - def __repr__(self):

1881 return "<HHpredHitList: {0} hits>".format(len(self))

1882

1883 - def __getitem__(self, index):

1884 return self._hits[index]

1885

1886 - def __iter__(self):

1887 return iter(self._hits)

1888

1889 - def __len__(self):

1890 return len(self._hits)

1891

1892 - def sort(self):

1893 self._hits.sort(key=lambda i: i.rank)

1894

Source Code for Package csb.bio.hmm