Package csb :: Package bio :: Package structure
[frames] | no frames]

Source Code for Package csb.bio.structure

   1  """ 
   2  3D and secondary structure APIs. 
   3   
   4  This module defines some of the most fundamental abstractions in the library: 
   5  L{Structure}, L{Chain}, L{Residue} and L{Atom}. Instances of these objects may 
   6  exist independently and that is perfectly fine, but usually they are part of a 
   7  Composite aggregation. The root node in this Composite is a L{Structure} (or 
   8  L{Ensemble}). L{Structure}s are composed of L{Chain}s, and each L{Chain} is a 
   9  collection of L{Residue}s. The leaf node is L{Atom}.  
  10   
  11  All of these objects implement the base L{AbstractEntity} interface. Therefore, 
  12  every node in the Composite can be transformed: 
  13       
  14      >>> r, t = [rotation matrix], [translation vector] 
  15      >>> entity.transform(r, t) 
  16       
  17  and it knows its immediate children: 
  18   
  19      >>> entity.items 
  20      <iterator>    # over all immediate child entities 
  21       
  22  If you want to traverse the complete Composite tree, starting at arbitrary level, 
  23  and down to the lowest level, use one of the L{CompositeEntityIterator}s. Or just 
  24  call L{AbstractEntity.components}: 
  25   
  26      >>> entity.components() 
  27      <iterator>   # over all descendants, of any type, at any level 
  28      >>> entity.components(klass=Residue) 
  29      <iterator>   # over all Residue descendants 
  30       
  31  Some of the inner objects in this hierarchy behave just like dictionaries 
  32  (but are not): 
  33   
  34      >>> structure.chains['A']       # access chain A by ID 
  35      <Chain A: Protein> 
  36      >>> structure['A']              # the same 
  37      <Chain A: Protein> 
  38      >>> residue.atoms['CS']           
  39      <Atom: CA>                      # access an atom by its name 
  40      >>> residue.atoms['CS']           
  41      <Atom: CA>                      # the same 
  42           
  43  Others behave like list collections: 
  44   
  45      >>> chain.residues[10]               # 1-based access to the residues in the chain 
  46      <ProteinResidue [10]: PRO 10> 
  47      >>> chain[10]                        # 0-based, list-like access 
  48      <ProteinResidue [11]: GLY 11> 
  49       
  50  Step-wise building of L{Ensemble}s, L{Chain}s and L{Residue}s is supported through 
  51  a number of C{append} methods, for example: 
  52   
  53      >>> residue = ProteinResidue(401, ProteinAlphabet.ALA) 
  54      >>> s.chains['A'].residues.append(residue) 
  55       
  56  See L{EnsembleModelsCollection}, L{StructureChainsTable}, L{ChainResiduesCollection} 
  57  and L{ResidueAtomsTable} for more details. 
  58   
  59  Some other objects in this module of potential interest are the self-explanatory 
  60  L{SecondaryStructure} and L{TorsionAngles}.      
  61  """ 
  62   
  63  import os 
  64  import re 
  65  import copy 
  66  import math 
  67  import numpy 
  68   
  69  import csb.io 
  70  import csb.core 
  71  import csb.numeric 
  72  import csb.bio.utils 
  73   
  74  from abc import ABCMeta, abstractmethod, abstractproperty 
  75   
  76  from csb.bio.sequence import SequenceTypes, SequenceAlphabets, AlignmentTypes 
77 78 79 -class AngleUnits(csb.core.enum):
80 """ 81 Torsion angle unit types 82 """ 83 Degrees='deg'; Radians='rad'
84
85 -class SecStructures(csb.core.enum):
86 """ 87 Secondary structure types 88 """ 89 Helix='H'; Strand='E'; Coil='C'; Turn='T'; Bend='S'; 90 Helix3='G'; PiHelix='I'; BetaBridge='B'; Gap='-'
91
92 -class ChemElements(csb.core.enum):
93 """ 94 Periodic table elements 95 """ 96 H=1; He=2; Li=3; Be=4; B=5; C=6; N=7; O=8; F=9; Ne=10; Na=11; Mg=12; Al=13; Si=14; P=15; 97 S=16; Cl=17; Ar=18; K=19; Ca=20; Sc=21; Ti=22; V=23; Cr=24; Mn=25; Fe=26; Co=27; Ni=28; 98 Cu=29; Zn=30; Ga=31; Ge=32; As=33; Se=34; Br=35; Kr=36; Rb=37; Sr=38; Y=39; Zr=40; Nb=41; 99 Mo=42; Tc=43; Ru=44; Rh=45; Pd=46; Ag=47; Cd=48; In=49; Sn=50; Sb=51; Te=52; I=53; Xe=54; 100 Cs=55; Ba=56; Hf=72; Ta=73; W=74; Re=75; Os=76; Ir=77; Pt=78; Au=79; Hg=80; Tl=81; Pb=82; 101 Bi=83; Po=84; At=85; Rn=86; Fr=87; Ra=88; Rf=104; Db=105; Sg=106; Bh=107; Hs=108; Mt=109; 102 Ds=110; Rg=111; La=57; Ce=58; Pr=59; Nd=60; Pm=61; Sm=62; Eu=63; Gd=64; Tb=65; Dy=66; 103 Ho=67; Er=68; Tm=69; Yb=70; Lu=71; Ac=89; Th=90; Pa=91; U=92; Np=93; Pu=94; Am=95; Cm=96; 104 Bk=97; Cf=98; Es=99; Fm=100; Md=101; No=102; Lr=103; x=-1
105
106 107 -class Broken3DStructureError(ValueError):
108 pass
109
110 -class Missing3DStructureError(Broken3DStructureError):
111 pass
112
113 -class InvalidOperation(Exception):
114 pass
115
116 -class EntityNotFoundError(csb.core.ItemNotFoundError):
117 pass
118
119 -class ChainNotFoundError(EntityNotFoundError):
120 pass
121
122 -class AtomNotFoundError(EntityNotFoundError):
123 pass
124
125 -class EntityIndexError(csb.core.CollectionIndexError):
126 pass
127
128 -class DuplicateModelIDError(csb.core.DuplicateKeyError):
129 pass
130
131 -class DuplicateChainIDError(csb.core.DuplicateKeyError):
132 pass
133
134 -class DuplicateResidueIDError(csb.core.DuplicateKeyError):
135 pass
136
137 -class DuplicateAtomIDError(csb.core.DuplicateKeyError):
138 pass
139
140 -class AlignmentArgumentLengthError(ValueError):
141 pass
142
143 -class BrokenSecStructureError(ValueError):
144 pass
145
146 -class UnknownSecStructureError(BrokenSecStructureError):
147 pass
148
149 -class AbstractEntity(object):
150 """ 151 Base class for all protein structure entities. 152 153 This class defines uniform interface of all entities (e.g. L{Structure}, 154 L{Chain}, L{Residue}) according to the Composite pattern. 155 """ 156 157 __metaclass__ = ABCMeta 158 159 @abstractproperty
160 - def items(self):
161 """ 162 Iterator over all immediate children of the entity 163 @rtype: iterator of L{AbstractEntity} 164 """ 165 pass
166
167 - def components(self, klass=None):
168 """ 169 Return an iterator over all descendants of the entity. 170 171 @param klass: return entities of the specified L{AbstractEntity} subclass 172 only. If None, traverse the hierarchy down to the lowest level. 173 @param klass: class 174 """ 175 for entity in CompositeEntityIterator.create(self, klass): 176 if klass is None or isinstance(entity, klass): 177 yield entity
178
179 - def transform(self, rotation, translation):
180 """ 181 Apply in place RotationMatrix and translation Vector to all atoms. 182 183 @type rotation: numpy array 184 @type translation: numpy array 185 """ 186 for node in self.items: 187 node.transform(rotation, translation)
188
189 - def get_coordinates(self, what=None, skip=False):
190 """ 191 Extract the coordinates of the specified kind(s) of atoms and return 192 them as a list. 193 194 @param what: a list of atom kinds, e.g. ['N', 'CA', 'C'] 195 @type what: list or None 196 197 @return: a list of lists, each internal list corresponding to the coordinates 198 of a 3D vector 199 @rtype: list 200 201 @raise Broken3DStructureError: if a specific atom kind cannot be retrieved from a residue 202 """ 203 coords = [ ] 204 205 for residue in self.components(klass=Residue): 206 for atom_kind in (what or residue.atoms): 207 try: 208 coords.append(residue.atoms[atom_kind].vector) 209 except csb.core.ItemNotFoundError: 210 if skip: 211 continue 212 raise Broken3DStructureError('Could not retrieve {0} atom from the structure'.format(atom_kind)) 213 214 return numpy.array(coords)
215
216 -class CompositeEntityIterator(object):
217 """ 218 Iterates over composite L{AbstractEntity} hierarchies. 219 220 @param node: root entity to traverse 221 @type node: L{AbstractEntity} 222 """ 223
224 - def __init__(self, node):
225 226 if not isinstance(node, AbstractEntity): 227 raise TypeError(node) 228 229 self._node = node 230 self._stack = csb.core.Stack() 231 232 self._inspect(node)
233
234 - def __iter__(self):
235 return self
236
237 - def __next__(self):
238 return self.next()
239
240 - def next(self):
241 242 while True: 243 if self._stack.empty(): 244 raise StopIteration() 245 246 try: 247 current = self._stack.peek() 248 node = next(current) 249 self._inspect(node) 250 return node 251 252 except StopIteration: 253 self._stack.pop()
254
255 - def _inspect(self, node):
256 """ 257 Push C{node}'s children to the stack. 258 """ 259 self._stack.push(node.items)
260 261 @staticmethod
262 - def create(node, leaf=None):
263 """ 264 Create a new composite iterator. 265 266 @param leaf: if not None, return a L{ConfinedEntityIterator} 267 @type leaf: class 268 @rtype: L{CompositeEntityIterator} 269 """ 270 if leaf is None: 271 return CompositeEntityIterator(node) 272 else: 273 return ConfinedEntityIterator(node, leaf)
274
275 -class ConfinedEntityIterator(CompositeEntityIterator):
276 """ 277 Iterates over composite L{AbstractEntity} hierarchies, but terminates 278 the traversal of a branch once a specific node type is encountered. 279 280 @param node: root entity to traverse 281 @type node: L{AbstractEntity} 282 @param leaf: traverse the hierarchy down to the specified L{AbstractEntity} 283 @type leaf: class 284 """
285 - def __init__(self, node, leaf):
286 287 if not issubclass(leaf, AbstractEntity): 288 raise TypeError(leaf) 289 290 self._leaf = leaf 291 super(ConfinedEntityIterator, self).__init__(node)
292
293 - def _inspect(self, node):
294 295 if not isinstance(node, self._leaf): 296 self._stack.push(node.items)
297
298 -class Ensemble(csb.core.AbstractNIContainer, AbstractEntity):
299 """ 300 Represents an ensemble of multiple L{Structure} models. 301 Provides a list-like access to these models: 302 303 >>> ensemble[0] 304 <Structure Model 1: accn, x chains> 305 >>> ensemble.models[1] 306 <Structure Model 1: accn, x chains> 307 """ 308
309 - def __init__(self):
310 self._models = EnsembleModelsCollection()
311
312 - def __repr__(self):
313 return "<Ensemble: {0} models>".format(self.models.length)
314 315 @property
316 - def _children(self):
317 return self._models
318 319 @property
320 - def models(self):
321 """ 322 Access Ensembles's models by model ID 323 @rtype: L{EnsembleModelsCollection} 324 """ 325 return self._models
326 327 @property
328 - def items(self):
329 return iter(self._models)
330 331 @property
332 - def first_model(self):
333 """ 334 The first L{Structure} in the ensemble (if available) 335 @rtype: L{Structure} or None 336 """ 337 if len(self._models) > 0: 338 return self[0] 339 return None
340
341 - def to_pdb(self, output_file=None):
342 """ 343 Dump the ensemble in PDB format. 344 345 @param output_file: output file name or open stream 346 @type output_file: str or stream 347 """ 348 from csb.bio.io.wwpdb import PDBEnsembleFileBuilder 349 350 if self.models.length < 1: 351 raise InvalidOperation("Can't dump an empty ensemble") 352 353 temp = csb.io.MemoryStream() 354 355 builder = PDBEnsembleFileBuilder(temp) 356 builder.add_header(self.first_model) 357 358 for model in self.models: 359 builder.add_structure(model) 360 361 builder.finalize() 362 363 data = temp.getvalue() 364 temp.close() 365 366 if not output_file: 367 return data 368 else: 369 with csb.io.EntryWriter(output_file, close=False) as out: 370 out.write(data)
371
372 -class EnsembleModelsCollection(csb.core.CollectionContainer):
373
374 - def __init__(self):
375 376 super(EnsembleModelsCollection, self).__init__(type=Structure, start_index=1) 377 self._models = set()
378
379 - def append(self, structure):
380 """ 381 Add a new model 382 383 @param structure: model to append 384 @type structure: L{Structure} 385 """ 386 387 if not structure.model_id or not str(structure.model_id).strip(): 388 raise ValueError("Invalid model identifier: '{0.model_id}'".format(structure)) 389 if structure.model_id in self._models: 390 raise DuplicateModelIDError(structure.model_id) 391 else: 392 return super(EnsembleModelsCollection, self).append(structure)
393 394 @property
395 - def _exception(self):
396 return EntityIndexError
397
398 399 -class Structure(csb.core.AbstractNIContainer, AbstractEntity):
400 """ 401 Represents a single model of a PDB 3-Dimensional molecular structure. 402 Provides access to the L{Chain} objects, contained in the model: 403 404 >>> structure['A'] 405 <Chain A: Protein> 406 >>> structure.chains['A'] 407 <Chain A: Protein> 408 >>> structure.items 409 <iterator of Chain-s> 410 411 @param accession: accession number of the structure 412 @type accession: str 413 """
414 - def __init__(self, accession):
415 416 self._accession = None 417 self._chains = StructureChainsTable(self) 418 self._model_id = None 419 self._resolution = None 420 421 self.accession = accession
422
423 - def __repr__(self):
424 return "<Structure Model {0.model_id}: {0.accession}, {1} chains>".format(self, self.chains.length)
425 426 @property
427 - def _children(self):
428 return self._chains
429 430 @property
431 - def chains(self):
432 """ 433 Access chains by their chain identifiers 434 @rtype: L{StructureChainsTable} 435 """ 436 return self._chains
437 438 @property
439 - def items(self):
440 for chain in self._chains: 441 yield self._chains[chain]
442 443 @property
444 - def first_chain(self):
445 """ 446 The first L{Chain} in the structure (if available) 447 @rtype: L{Chain} or None 448 """ 449 if len(self._chains) > 0: 450 return next(self.items) 451 return None
452 453 @property
454 - def accession(self):
455 """ 456 Accession number 457 @rtype: str 458 """ 459 return self._accession
460 @accession.setter
461 - def accession(self, accession):
462 if accession is None: 463 raise ValueError(accession) 464 self._accession = str(accession).strip().lower() 465 for c in self.chains: 466 self.chains[c]._accession = self._accession
467 468 @property
469 - def model_id(self):
470 """ 471 Model ID 472 @rtype: int 473 """ 474 return self._model_id
475 @model_id.setter
476 - def model_id(self, value):
477 self._model_id = value
478 479 @property
480 - def resolution(self):
481 """ 482 Resolution in Angstroms 483 """ 484 return self._resolution
485 @resolution.setter
486 - def resolution(self, value):
487 if value is not None: 488 value = float(value) 489 self._resolution = value
490
491 - def to_fasta(self):
492 """ 493 Dump the structure in FASTA format. 494 495 @return: FASTA-formatted string with all chains in the structure 496 @rtype: str 497 498 @deprecated: this method will be removed soon. Use 499 L{csb.bio.sequence.ChainSequence.create} instead 500 """ 501 fasta = [] 502 503 for chain in self.items: 504 505 if chain.length > 0: 506 fasta.append('>{0}'.format(chain.header)) 507 fasta.append(chain.sequence) 508 509 return os.linesep.join(fasta)
510
511 - def to_pdb(self, output_file=None):
512 """ 513 Dump the whole structure in PDB format. 514 515 @param output_file: output file name or open stream 516 @type output_file: str or stream 517 """ 518 from csb.bio.io.wwpdb import PDBFileBuilder 519 520 temp = csb.io.MemoryStream() 521 builder = PDBFileBuilder(temp) 522 523 builder.add_header(self) 524 builder.add_structure(self) 525 builder.finalize() 526 527 data = temp.getvalue() 528 temp.close() 529 530 if not output_file: 531 return data 532 else: 533 with csb.io.EntryWriter(output_file, close=False) as out: 534 out.write(data)
535 536 @staticmethod
537 - def from_chain(chain):
538 """ 539 A Structure factory, which instantiates and returns a new Structure with 540 chain as deep cpoy of chain 541 542 @param chain: the chain which will comprise the new structure 543 @type chain: L{Chain} 544 545 @rtype: L{Structure} 546 """ 547 structure = Structure("NONE") 548 structure.chains.append(chain.clone()) 549 550 return structure
551
552 553 -class StructureChainsTable(csb.core.DictionaryContainer):
554
555 - def __init__(self, structure=None, chains=None):
556 self.__container = structure 557 super(StructureChainsTable, self).__init__() 558 559 if chains is not None: 560 for chain in chains: 561 self.append(chain)
562
563 - def __repr__(self):
564 if len(self) > 0: 565 return "<StructureChains: {0}>".format(', '.join(self)) 566 else: 567 return "<StructureChains: empty>"
568 569 @property
570 - def _exception(self):
571 return ChainNotFoundError
572
573 - def append(self, chain):
574 """ 575 Add a new Chain to the structure. 576 577 @param chain: the new chain to be appended 578 @type chain: L{Chain} 579 580 @raise DuplicateChainIDError: if a chain with same ID is already defined 581 @raise InvalidOperation: if the chain is already associated with a structure 582 """ 583 584 if chain._structure and chain._structure is not self.__container: 585 raise InvalidOperation('This chain is already part of another structure') 586 if chain.id in self: 587 raise DuplicateChainIDError('A chain with ID {0} is already defined'.format(chain.id)) 588 589 super(StructureChainsTable, self).append(chain.id, chain) 590 591 if self.__container: 592 chain._accession = self.__container.accession 593 chain._structure = self.__container
594
595 - def remove(self, id):
596 """ 597 Remove a chain from the structure. 598 599 @param id: ID of the chain to be detached 600 @type id: str 601 @raise ChainNotFoundError: if C{id} is not a valid chain ID 602 """ 603 chain = self[id] 604 self._remove(id) 605 chain._structure = None
606
607 - def _update_chain_id(self, chain, new_id):
608 609 if chain.id not in self or self[chain.id] is not chain: 610 raise InvalidOperation(chain) 611 612 self._remove(chain.id) 613 614 if new_id in self: 615 raise DuplicateChainIDError('Chain ID {0} is already defined'.format(id)) 616 617 super(StructureChainsTable, self).append(new_id, chain)
618
619 -class Chain(csb.core.AbstractNIContainer, AbstractEntity):
620 """ 621 Represents a polymeric chain. Provides list-like and rank-based access to 622 the residues in the chain: 623 624 >>> chain[0] 625 <ProteinResidue [1]: SER None> 626 >>> chain.residues[1] 627 <ProteinResidue [1]: SER None> 628 629 You can also access residues by their PDB sequence number: 630 631 >>> chain.find(sequence_number=5, insertion_code='A') 632 <ProteinResidue [1]: SER 5A> 633 634 @param chain_id: ID of the new chain 635 @type chain_id: str 636 @param type: sequence type (a member of the L{SequenceTypes} enum) 637 @type type: L{csb.core.EnumItem} 638 @param name: name of the chain 639 @type name: str 640 @param residues: initialization list of L{Residue}-s 641 @type residues: list 642 @param accession: accession number of the chain 643 @type accession: str 644 @param molecule_id: MOL ID of the chain, if part of a polymer 645 646 """
647 - def __init__(self, chain_id, type=SequenceTypes.Protein, name='', 648 residues=None, accession=None, molecule_id=None):
649 650 self._id = str(chain_id).strip() 651 self._accession = None 652 self._type = None 653 self._residues = ChainResiduesCollection(self, residues) 654 self._secondary_structure = None 655 self._molecule_id = molecule_id 656 self._torsion_computed = False 657 self._name = str(name).strip() 658 659 self._structure = None 660 661 self.type = type 662 if accession is not None: 663 self.accession = accession
664 665 @staticmethod
666 - def from_sequence(sequence, id="_"):
667 """ 668 Create a new chain from an existing sequence. 669 670 @param sequence: source sequence 671 @type sequence: L{csb.bio.sequence.AbstractSequence} 672 673 @rtype: L{Chain} 674 """ 675 676 chain = Chain(id, type=sequence.type) 677 678 for ri in sequence.residues: 679 residue = Residue.create(sequence.type, ri.rank, ri.type, sequence_number=ri.rank) 680 chain.residues.append(residue) 681 682 return chain
683 684 @property
685 - def _children(self):
686 return self._residues
687
688 - def __repr__(self):
689 return "<Chain {0.id}: {0.type!r}>".format(self)
690
691 - def __len__(self):
692 return self._residues.length
693 694 @property
695 - def id(self):
696 """ 697 Chain's ID 698 @rtype: str 699 """ 700 return self._id
701 @id.setter
702 - def id(self, id):
703 if not isinstance(id, csb.core.string): 704 raise ValueError(id) 705 id = id.strip() 706 if self._structure: 707 self._structure.chains._update_chain_id(self, id) 708 self._id = id
709 710 @property
711 - def accession(self):
712 """ 713 Accession number 714 @rtype: str 715 """ 716 return self._accession
717 @accession.setter
718 - def accession(self, accession):
719 if self._structure: 720 raise InvalidOperation("Only the accession of the parent structure can be altered") 721 if accession is None: 722 raise ValueError(accession) 723 self._accession = str(accession).strip()
724 725 @property
726 - def type(self):
727 """ 728 Chain type - any member of L{SequenceTypes} 729 @rtype: enum item 730 """ 731 return self._type
732 @type.setter
733 - def type(self, type):
734 if type.enum is not SequenceTypes: 735 raise TypeError(type) 736 self._type = type
737 738 @property
739 - def residues(self):
740 """ 741 Rank-based access to Chain's L{Residue}s 742 @rtype: L{ChainResiduesCollection} 743 """ 744 return self._residues
745 746 @property
747 - def items(self):
748 return iter(self._residues)
749 750 @property
751 - def torsion(self):
752 """ 753 Torsion angles 754 @rtype: L{TorsionAnglesCollection} 755 """ 756 if not self._torsion_computed: 757 raise InvalidOperation('The correctness of the data is not guaranteed ' 758 'until chain.compute_torsion() is invoked.') 759 760 torsion = TorsionAnglesCollection() 761 762 for r in self.residues: 763 if r.torsion is None: 764 torsion.append(TorsionAngles(None, None, None)) 765 else: 766 torsion.append(r.torsion) 767 768 return torsion
769 770 @property
771 - def has_torsion(self):
772 """ 773 True if C{Chain.compute_torsion} had been invoked 774 @rtype: bool 775 """ 776 return self._torsion_computed
777 778 @property
779 - def length(self):
780 """ 781 Number of residues 782 @rtype: int 783 """ 784 return self._residues.length
785 786 @property
787 - def entry_id(self):
788 """ 789 Accession number + chain ID 790 @rtype: str 791 """ 792 if self._accession and self._id: 793 return self._accession + self._id 794 else: 795 return None
796 797 @property
798 - def name(self):
799 """ 800 Chain name 801 @rtype: str 802 """ 803 return self._name
804 @name.setter
805 - def name(self, value):
806 if value is not None: 807 value = str(value).strip() 808 self._name = value
809 810 @property
811 - def molecule_id(self):
812 """ 813 PDB MOL ID of this chain 814 @rtype: int 815 """ 816 return self._molecule_id
817 @molecule_id.setter
818 - def molecule_id(self, value):
819 self._molecule_id = value
820 821 @property
822 - def header(self):
823 """ 824 FASTA header in PDB format 825 @rtype: str 826 """ 827 header = "{0._accession}_{0._id} mol:{1} length:{0.length} {0.name}" 828 return header.format(self, str(self.type).lower())
829 830 @property
831 - def sequence(self):
832 """ 833 Chain sequence 834 @rtype: str 835 """ 836 sequence = [] 837 gap = str(self.alphabet.GAP) 838 839 for residue in self.residues: 840 if residue and residue.type: 841 sequence.append(str(residue.type)) 842 else: 843 sequence.append(gap) 844 845 return ''.join(sequence)
846 847 @property
848 - def alphabet(self):
849 """ 850 Sequence alphabet corresponding to the current chain type 851 @rtype: L{csb.core.enum} 852 """ 853 return SequenceAlphabets.get(self.type)
854 855 @property
856 - def secondary_structure(self):
857 """ 858 Secondary structure (if available) 859 @rtype: L{SecondaryStructure} 860 """ 861 return self._secondary_structure
862 @secondary_structure.setter
863 - def secondary_structure(self, ss):
864 if not isinstance(ss, SecondaryStructure): 865 raise TypeError(ss) 866 if len(ss) > 0: 867 if (ss[ss.last_index].end > self._residues.last_index): 868 raise ValueError('Secondary structure out of range') 869 self._secondary_structure = ss
870
871 - def clone(self):
872 """ 873 Make a deep copy of the chain. If this chain is part of a structure, 874 detach from it. 875 876 @return: a deep copy of self 877 @rtype: L{Chain} 878 """ 879 start, end = self.residues.start_index, self.residues.last_index 880 return self.subregion(start, end, clone=True)
881
882 - def subregion(self, start, end, clone=False):
883 """ 884 Extract a subchain defined by [start, end]. If clone is True, this 885 is a deep copy of the chain. Otherwise same as: 886 887 >>> chain.residues[start : end + 1] 888 889 but coordinates are checked and a Chain instance is returned. 890 891 @param start: start position of the sub-region 892 @type start: int 893 @param end: end position 894 @type end: int 895 @param clone: if True, a deep copy of the sub-region is returned, 896 otherwise - a shallow one 897 @type clone: bool 898 899 900 @return: a new chain, made from the residues of the extracted region 901 @rtype: L{Chain} 902 903 @raise IndexError: if start/end positions are out of range 904 """ 905 if start < self.residues.start_index or start > self.residues.last_index: 906 raise IndexError('The start position is out of range {0.start_index} .. {0.last_index}'.format(self.residues)) 907 if end < self.residues.start_index or end > self.residues.last_index: 908 raise IndexError('The end position is out of range {0.start_index} .. {0.last_index}'.format(self.residues)) 909 910 residues = self.residues[start : end + 1] 911 912 if clone: 913 residues = [r.clone() for r in residues] 914 915 chain = Chain(self.id, accession=self.accession, name=self.name, 916 type=self.type, residues=residues, molecule_id=self.molecule_id) 917 if chain.secondary_structure: 918 chain.secondary_structure = self.secondary_structure.subregion(start, end) 919 chain._torsion_computed = self._torsion_computed 920 921 return chain
922
923 - def find(self, sequence_number, insertion_code=None):
924 """ 925 Get a residue by its original Residue Sequence Number and Insertion Code. 926 927 @param sequence_number: PDB sequence number of the residue 928 @type sequence_number: str 929 @param insertion_code: PDB insertion code of the residue (if any) 930 @type insertion_code: str 931 932 @return: the residue object with such an ID 933 @rtype: L{Residue} 934 935 @raise EntityNotFoundError: if no residue with that ID exists 936 """ 937 res_id = str(sequence_number).strip() 938 939 if insertion_code is not None: 940 insertion_code = str(insertion_code).strip() 941 res_id += insertion_code 942 943 return self.residues._get_residue(res_id)
944
945 - def compute_torsion(self):
946 """ 947 Iterate over all residues in the chain, compute and set their torsion property. 948 949 @raise Missing3DStructureError: when a 3D structure is absent 950 @raise Broken3DStructureError: when a given atom cannot be retrieved from any residue 951 """ 952 if self.type != SequenceTypes.Protein: 953 raise NotImplementedError() 954 955 for i, residue in enumerate(self.residues, start=self.residues.start_index): 956 957 prev_residue, next_residue = None, None 958 959 if i > self.residues.start_index: 960 prev_residue = self.residues[i - 1] 961 if i < self.residues.last_index: 962 next_residue = self.residues[i + 1] 963 964 residue.torsion = residue.compute_torsion(prev_residue, next_residue, strict=False) 965 966 self._torsion_computed = True
967
968 - def superimpose(self, other, what=['CA'], how=AlignmentTypes.Global):
969 """ 970 Find the optimal fit between C{self} and C{other}. Return L{SuperimposeInfo} 971 (RotationMatrix, translation Vector and RMSD), such that: 972 973 >>> other.transform(rotation_matrix, translation_vector) 974 975 will result in C{other}'s coordinates superimposed over C{self}. 976 977 @param other: the subject (movable) chain 978 @type other: L{Chain} 979 @param what: a list of atom kinds, e.g. ['CA'] 980 @type what: list 981 @param how: fitting method (global or local) - a member of the L{AlignmentTypes} enum 982 @type how: L{csb.core.EnumItem} 983 984 @return: superimposition info object, containing rotation matrix, translation 985 vector and computed RMSD 986 @rtype: L{SuperimposeInfo} 987 988 @raise AlignmentArgumentLengthError: when the lengths of the argument chains differ 989 """ 990 if self.length != other.length or self.length < 1: 991 raise AlignmentArgumentLengthError('Both chains must be of the same and positive length') 992 993 x = self.get_coordinates(what) 994 y = other.get_coordinates(what) 995 assert len(x) == len(y) 996 997 if how == AlignmentTypes.Global: 998 r, t = csb.bio.utils.fit(x, y) 999 else: 1000 r, t = csb.bio.utils.fit_wellordered(x, y) 1001 1002 rmsd = csb.bio.utils.rmsd(x, y) 1003 1004 return SuperimposeInfo(r, t, rmsd=rmsd) 1005
1006 - def align(self, other, what=['CA'], how=AlignmentTypes.Global):
1007 """ 1008 Align C{other}'s alpha carbons over self in space and return L{SuperimposeInfo}. 1009 Coordinates of C{other} are overwritten in place using the rotation matrix 1010 and translation vector in L{SuperimposeInfo}. Alias for:: 1011 1012 R, t = self.superimpose(other, what=['CA']) 1013 other.transform(R, t) 1014 1015 @param other: the subject (movable) chain 1016 @type other: L{Chain} 1017 @param what: a list of atom kinds, e.g. ['CA'] 1018 @type what: list 1019 @param how: fitting method (global or local) - a member of the L{AlignmentTypes} enum 1020 @type how: L{csb.core.EnumItem} 1021 1022 @return: superimposition info object, containing rotation matrix, translation 1023 vector and computed RMSD 1024 @rtype: L{SuperimposeInfo} 1025 """ 1026 result = self.superimpose(other, what=what, how=how) 1027 other.transform(result.rotation, result.translation) 1028 1029 return result 1030
1031 - def rmsd(self, other, what=['CA']):
1032 """ 1033 Compute the C-alpha RMSD against another chain (assuming equal length). 1034 Chains are superimposed with Least Squares Fit / Singular Value Decomposition. 1035 1036 @param other: the subject (movable) chain 1037 @type other: L{Chain} 1038 @param what: a list of atom kinds, e.g. ['CA'] 1039 @type what: list 1040 1041 @return: computed RMSD over the specified atom kinds 1042 @rtype: float 1043 """ 1044 1045 if self.length != other.length or self.length < 1: 1046 raise ValueError('Both chains must be of the same and positive length ' 1047 '(got {0} and {1})'.format(self.length, other.length)) 1048 1049 x = self.get_coordinates(what) 1050 y = other.get_coordinates(what) 1051 assert len(x) == len(y) 1052 1053 return csb.bio.utils.rmsd(x, y)
1054
1055 - def tm_superimpose(self, other, what=['CA'], how=AlignmentTypes.Global):
1056 """ 1057 Find the optimal fit between C{self} and C{other}. Return L{SuperimposeInfo} 1058 (RotationMatrix, translation Vector and TM-score), such that: 1059 1060 >>> other.transform(rotation_matrix, translation_vector) 1061 1062 will result in C{other}'s coordinates superimposed over C{self}. 1063 1064 @param other: the subject (movable) chain 1065 @type other: L{Chain} 1066 @param what: a list of atom kinds, e.g. ['CA'] 1067 @type what: list 1068 @param how: fitting method (global or local) - a member of the L{AlignmentTypes} enum 1069 @type how: L{csb.core.EnumItem} 1070 1071 @return: superimposition info object, containing rotation matrix, translation 1072 vector and computed TM-score 1073 @rtype: L{SuperimposeInfo} 1074 1075 @raise AlignmentArgumentLengthError: when the lengths of the argument chains differ 1076 """ 1077 1078 if self.length != other.length or self.length < 1: 1079 raise ValueError('Both chains must be of the same and positive length') 1080 1081 x = self.get_coordinates(what) 1082 y = other.get_coordinates(what) 1083 assert len(x) == len(y) 1084 1085 L_ini_min = 0 1086 if how == AlignmentTypes.Global: 1087 fit = csb.bio.utils.fit 1088 elif how == AlignmentTypes.Local: 1089 fit = csb.bio.utils.fit_wellordered 1090 else: 1091 # TMscore.f like search (slow) 1092 fit = csb.bio.utils.fit 1093 L_ini_min = 4 1094 1095 r, t, tm = csb.bio.utils.tm_superimpose(x, y, fit, None, None, L_ini_min) 1096 1097 return SuperimposeInfo(r,t, tm_score=tm) 1098
1099 - def tm_score(self, other, what=['CA']):
1100 """ 1101 Compute the C-alpha TM-Score against another chain (assuming equal chain length 1102 and optimal configuration - no fitting is done). 1103 1104 @param other: the subject (movable) chain 1105 @type other: L{Chain} 1106 @param what: a list of atom kinds, e.g. ['CA'] 1107 @type what: list 1108 1109 @return: computed TM-Score over the specified atom kinds 1110 @rtype: float 1111 """ 1112 1113 if self.length != other.length or self.length < 1: 1114 raise ValueError('Both chains must be of the same and positive length') 1115 1116 x = self.get_coordinates(what) 1117 y = other.get_coordinates(what) 1118 assert len(x) == len(y) 1119 1120 return csb.bio.utils.tm_score(x, y)
1121
1122 -class ChainResiduesCollection(csb.core.CollectionContainer):
1123
1124 - def __init__(self, chain, residues):
1125 super(ChainResiduesCollection, self).__init__(type=Residue, start_index=1) 1126 self.__container = chain 1127 self.__lookup = { } 1128 1129 if residues is not None: 1130 for residue in residues: 1131 self.append(residue)
1132
1133 - def __repr__(self):
1134 if len(self) > 0: 1135 return "<ChainResidues: {0} ... {1}>".format(self[self.start_index], self[self.last_index]) 1136 else: 1137 return "<ChainResidues: empty>"
1138 1139 @property
1140 - def _exception(self):
1141 return EntityIndexError
1142
1143 - def append(self, residue):
1144 """ 1145 Append a new residue to the chain. 1146 1147 @param residue: the new residue 1148 @type residue: L{Residue} 1149 1150 @raise DuplicateResidueIDError: if a residue with the same ID already exists 1151 """ 1152 if residue.id and residue.id in self.__lookup: 1153 raise DuplicateResidueIDError('A residue with ID {0} is already defined within the chain'.format(residue.id)) 1154 index = super(ChainResiduesCollection, self).append(residue) 1155 residue._container = self 1156 self.__container._torsion_computed = False 1157 self._add(residue) 1158 return index
1159
1160 - def _contains(self, id):
1161 return id in self.__lookup
1162
1163 - def _remove(self, id):
1164 if id in self.__lookup: 1165 del self.__lookup[id]
1166
1167 - def _add(self, residue):
1168 self.__lookup[residue.id] = residue
1169
1170 - def _get_residue(self, id):
1171 try: 1172 return self.__lookup[id] 1173 except KeyError: 1174 raise EntityNotFoundError(id)
1175
1176 -class Residue(csb.core.AbstractNIContainer, AbstractEntity):
1177 """ 1178 Base class representing a single residue. Provides a dictionary-like 1179 access to the atoms contained in the residue: 1180 1181 >>> residue['CA'] 1182 <Atom [3048]: CA> 1183 >>> residue.atoms['CA'] 1184 <Atom [3048]: CA> 1185 >>> residue.items 1186 <iterator of Atom-s> 1187 1188 @param rank: rank of the residue with respect to the chain 1189 @type rank: int 1190 @param type: residue type - a member of any L{SequenceAlphabets} 1191 @type type: L{csb.core.EnumItem} 1192 @param sequence_number: PDB sequence number of the residue 1193 @type sequence_number: str 1194 @param insertion_code: PDB insertion code, if any 1195 @type insertion_code: str 1196 """
1197 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1198 1199 self._type = None 1200 self._label = None 1201 self._rank = int(rank) 1202 self._atoms = ResidueAtomsTable(self) 1203 self._secondary_structure = None 1204 self._torsion = None 1205 self._sequence_number = None 1206 self._insertion_code = None 1207 self._container = None 1208 1209 self.type = type 1210 self.id = sequence_number, insertion_code 1211 self.label = repr(type)
1212 1213 @property
1214 - def _children(self):
1215 return self._atoms
1216
1217 - def __repr__(self):
1218 return '<{1} [{0.rank}]: {0.type!r} {0.id}>'.format(self, self.__class__.__name__)
1219 1220 @property
1221 - def label(self):
1222 """ 1223 Original residue label (different from C{Residue.type} for modified 1224 residues) 1225 @rtype: str 1226 """ 1227 return self._label
1228 @label.setter
1229 - def label(self, value):
1230 self._label = str(value)
1231 1232 @property
1233 - def is_modified(self):
1234 """ 1235 Return True id this is a modified residue 1236 @rtype: bool 1237 """ 1238 return self.label != repr(self.type)
1239 1240 @property
1241 - def type(self):
1242 """ 1243 Residue type - a member of any sequence alphabet 1244 @rtype: enum item 1245 """ 1246 return self._type
1247 @type.setter
1248 - def type(self, type):
1249 if type.enum not in (SequenceAlphabets.Protein, SequenceAlphabets.Nucleic, SequenceAlphabets.Unknown): 1250 raise TypeError(type) 1251 self._type = type
1252 1253 @property
1254 - def rank(self):
1255 """ 1256 Residue's position in the sequence (1-based) 1257 @rtype: int 1258 """ 1259 return self._rank
1260 1261 @property
1262 - def secondary_structure(self):
1263 """ 1264 Secondary structure element this residue is part of 1265 @rtype: L{SecondaryStructureElement} 1266 """ 1267 return self._secondary_structure
1268 @secondary_structure.setter
1269 - def secondary_structure(self, structure):
1270 if not isinstance(structure, SecondaryStructureElement): 1271 raise TypeError(structure) 1272 self._secondary_structure = structure
1273 1274 @property
1275 - def torsion(self):
1276 """ 1277 Torsion angles 1278 @rtype: L{TorsionAngles} 1279 """ 1280 return self._torsion
1281 @torsion.setter
1282 - def torsion(self, torsion):
1283 if not isinstance(torsion, TorsionAngles): 1284 raise TypeError(torsion) 1285 self._torsion = torsion
1286 1287 @property
1288 - def atoms(self):
1289 """ 1290 Access residue's atoms by atom name 1291 @rtype: L{ResidueAtomsTable} 1292 """ 1293 return self._atoms
1294 1295 @property
1296 - def items(self):
1297 for atom in self._atoms: 1298 yield self._atoms[atom]
1299 1300 @property
1301 - def sequence_number(self):
1302 """ 1303 PDB sequence number (if residue.has_structure is True) 1304 @rtype: int 1305 """ 1306 return self._sequence_number
1307 1308 @property
1309 - def insertion_code(self):
1310 """ 1311 PDB insertion code (if defined) 1312 @rtype: str 1313 """ 1314 return self._insertion_code
1315 1316 @property
1317 - def id(self):
1318 """ 1319 PDB sequence number [+ insertion code] 1320 @rtype: str 1321 """ 1322 if self._sequence_number is None: 1323 return None 1324 elif self._insertion_code is not None: 1325 return str(self._sequence_number) + self._insertion_code 1326 else: 1327 return str(self._sequence_number)
1328 @id.setter
1329 - def id(self, value):
1330 sequence_number, insertion_code = value 1331 old_id = self.id 1332 id = '' 1333 if sequence_number is not None: 1334 sequence_number = int(sequence_number) 1335 id = str(sequence_number) 1336 if insertion_code is not None: 1337 insertion_code = str(insertion_code).strip() 1338 id += insertion_code 1339 if sequence_number is None: 1340 raise InvalidOperation('sequence_number must be defined when an insertion_code is specified.') 1341 if old_id != id: 1342 if self._container: 1343 if self._container._contains(id): 1344 raise DuplicateResidueIDError('A residue with ID {0} is already defined within the chain'.format(id)) 1345 self._container._remove(old_id) 1346 self._sequence_number = sequence_number 1347 self._insertion_code = insertion_code 1348 if self._container: 1349 self._container._add(self)
1350 1351 @property
1352 - def has_structure(self):
1353 """ 1354 True if this residue has any atoms 1355 @rtype: bool 1356 """ 1357 return len(self.atoms) > 0
1358
1359 - def get_coordinates(self, what=None, skip=False):
1360 1361 coords = [] 1362 1363 if not self.has_structure: 1364 if skip: 1365 return numpy.array([]) 1366 raise Missing3DStructureError(self) 1367 1368 for atom_kind in (what or self.atoms): 1369 if atom_kind in self.atoms: 1370 coords.append(self.atoms[atom_kind].vector) 1371 else: 1372 if skip: 1373 continue 1374 raise Broken3DStructureError('Could not retrieve {0} atom'.format(atom_kind)) 1375 1376 return numpy.array(coords)
1377
1378 - def clone(self):
1379 1380 container = self._container 1381 self._container = None 1382 clone = copy.deepcopy(self) 1383 self._container = container 1384 1385 return clone
1386 1387 @staticmethod
1388 - def create(sequence_type, *a, **k):
1389 """ 1390 Residue factory method, which returns the proper L{Residue} instance based on 1391 the specified C{sequence_type}. All additional arguments are used to initialize 1392 the subclass by passing them automatically to the underlying constructor. 1393 1394 @param sequence_type: create a Residue of that SequenceType 1395 @type sequence_type: L{csb.core.EnumItem} 1396 1397 @return: a new residue of the proper subclass 1398 @rtype: L{Residue} subclass 1399 1400 @raise ValueError: if the sequence type is not known 1401 """ 1402 if sequence_type == SequenceTypes.Protein: 1403 return ProteinResidue(*a, **k) 1404 elif sequence_type == SequenceTypes.NucleicAcid: 1405 return NucleicResidue(*a, **k) 1406 elif sequence_type == SequenceTypes.Unknown: 1407 return UnknownResidue(*a, **k) 1408 else: 1409 raise ValueError(sequence_type)
1410
1411 -class ProteinResidue(Residue):
1412 """ 1413 Represents a single amino acid residue. 1414 1415 @param rank: rank of the residue with respect to the chain 1416 @type rank: int 1417 @param type: residue type - a member of 1418 L{csb.bio.sequence.SequenceAlphabets.Protein} 1419 @type type: L{csb.core.EnumItem} 1420 @param sequence_number: PDB sequence number of the residue 1421 @type sequence_number: str 1422 @param insertion_code: PDB insertion code, if any 1423 @type insertion_code: str 1424 """ 1425
1426 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1427 1428 if isinstance(type, csb.core.string): 1429 try: 1430 if len(type) == 3: 1431 type = csb.core.Enum.parsename(SequenceAlphabets.Protein, type) 1432 else: 1433 type = csb.core.Enum.parse(SequenceAlphabets.Protein, type) 1434 except (csb.core.EnumMemberError, csb.core.EnumValueError): 1435 raise ValueError("'{0}' is not a valid amino acid".format(type)) 1436 elif type.enum is not SequenceAlphabets.Protein: 1437 raise TypeError(type) 1438 1439 super(ProteinResidue, self).__init__(rank, type, sequence_number, insertion_code)
1440
1441 - def compute_torsion(self, prev_residue, next_residue, strict=True):
1442 """ 1443 Compute the torsion angles of the current residue with neighboring residues 1444 C{prev_residue} and C{next_residue}. 1445 1446 @param prev_residue: the previous residue in the chain 1447 @type prev_residue: L{Residue} 1448 @param next_residue: the next residue in the chain 1449 @type next_residue: L{Residue} 1450 @param strict: if True, L{Broken3DStructureError} is raised if either C{prev_residue} 1451 or C{next_residue} has a broken structure, else the error is silently 1452 ignored and an empty L{TorsionAngles} instance is created 1453 @type strict: bool 1454 1455 @return: a L{TorsionAngles} object, holding the phi, psi and omega values 1456 @rtype: L{TorsionAngles} 1457 1458 @raise Broken3DStructureError: when a specific atom cannot be found 1459 """ 1460 if prev_residue is None and next_residue is None: 1461 raise ValueError('At least one neighboring residue is required to compute the torsion.') 1462 1463 angles = TorsionAngles(None, None, None, units=AngleUnits.Degrees) 1464 1465 for residue in (self, prev_residue, next_residue): 1466 if residue is not None and not residue.has_structure: 1467 if strict: 1468 raise Missing3DStructureError(repr(residue)) 1469 elif residue is self: 1470 return angles 1471 1472 try: 1473 n = self._atoms['N'].vector 1474 ca = self._atoms['CA'].vector 1475 c = self._atoms['C'].vector 1476 except csb.core.ItemNotFoundError as missing_atom: 1477 if strict: 1478 raise Broken3DStructureError('Could not retrieve {0} atom from the current residue {1!r}.'.format( 1479 missing_atom, self)) 1480 else: 1481 return angles 1482 1483 try: 1484 if prev_residue is not None and prev_residue.has_structure: 1485 prev_c = prev_residue._atoms['C'].vector 1486 angles.phi = csb.numeric.dihedral_angle(prev_c, n, ca, c) 1487 except csb.core.ItemNotFoundError as missing_prevatom: 1488 if strict: 1489 raise Broken3DStructureError('Could not retrieve {0} atom from the i-1 residue {1!r}.'.format( 1490 missing_prevatom, prev_residue)) 1491 try: 1492 if next_residue is not None and next_residue.has_structure: 1493 next_n = next_residue._atoms['N'].vector 1494 angles.psi = csb.numeric.dihedral_angle(n, ca, c, next_n) 1495 next_ca = next_residue._atoms['CA'].vector 1496 angles.omega = csb.numeric.dihedral_angle(ca, c, next_n, next_ca) 1497 except csb.core.ItemNotFoundError as missing_nextatom: 1498 if strict: 1499 raise Broken3DStructureError('Could not retrieve {0} atom from the i+1 residue {1!r}.'.format( 1500 missing_nextatom, next_residue)) 1501 1502 return angles
1503
1504 -class NucleicResidue(Residue):
1505 """ 1506 Represents a single nucleotide residue. 1507 1508 @param rank: rank of the residue with respect to the chain 1509 @type rank: int 1510 @param type: residue type - a member of 1511 L{csb.bio.sequence.SequenceAlphabets.Nucleic} 1512 @type type: L{csb.core.EnumItem} 1513 @param sequence_number: PDB sequence number of the residue 1514 @type sequence_number: str 1515 @param insertion_code: PDB insertion code, if any 1516 @type insertion_code: str 1517 """ 1518
1519 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1520 1521 if isinstance(type, csb.core.string): 1522 try: 1523 if len(type) > 1: 1524 type = csb.core.Enum.parsename(SequenceAlphabets.Nucleic, type) 1525 else: 1526 type = csb.core.Enum.parse(SequenceAlphabets.Nucleic, type) 1527 except (csb.core.EnumMemberError, csb.core.EnumValueError): 1528 raise ValueError("'{0}' is not a valid nucleotide".format(type)) 1529 elif type.enum is not SequenceAlphabets.Nucleic: 1530 raise TypeError(type) 1531 1532 super(NucleicResidue, self).__init__(rank, type, sequence_number, insertion_code) 1533 self.label = str(type)
1534 1535 @property
1536 - def is_modified(self):
1537 return self.label != str(self.type)
1538
1539 -class UnknownResidue(Residue):
1540
1541 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1545
1546 -class ResidueAtomsTable(csb.core.DictionaryContainer):
1547 """ 1548 Represents a collection of atoms. Provides dictionary-like access, 1549 where PDB atom names are used for lookup. 1550 """
1551 - def __init__(self, residue, atoms=None):
1552 1553 self.__residue = residue 1554 super(ResidueAtomsTable, self).__init__() 1555 1556 if atoms is not None: 1557 for atom in atoms: 1558 self.append(atom)
1559
1560 - def __repr__(self):
1561 if len(self) > 0: 1562 return "<ResidueAtoms: {0}>".format(', '.join(self.keys())) 1563 else: 1564 return "<ResidueAtoms: empty>"
1565 1566 @property
1567 - def _exception(self):
1568 return AtomNotFoundError
1569
1570 - def append(self, atom):
1571 """ 1572 Append a new Atom to the catalog. 1573 1574 If the atom has an alternate position, a disordered proxy will be created instead and the 1575 atom will be appended to the L{DisorderedAtom}'s list of children. If a disordered atom 1576 with that name already exists, the atom will be appended to its children only. 1577 If an atom with the same name exists, but it was erroneously not marked as disordered, 1578 that terrible condition will be fixed too. 1579 1580 @param atom: the new atom to append 1581 @type atom: L{Atom} 1582 1583 @raise DuplicateAtomIDError: if an atom with the same sequence number and 1584 insertion code already exists in that residue 1585 """ 1586 if atom.residue and atom.residue is not self.__residue: 1587 raise InvalidOperation('This atom is part of another residue') 1588 if atom.alternate or (atom.name in self and isinstance(self[atom.name], DisorderedAtom)): 1589 if atom.name not in self: 1590 atom._residue = self.__residue 1591 dis_atom = DisorderedAtom(atom) 1592 super(ResidueAtomsTable, self).append(dis_atom.name, dis_atom) 1593 else: 1594 if not isinstance(self[atom.name], DisorderedAtom): 1595 buggy_atom = self[atom.name] 1596 assert buggy_atom.alternate in (None, False) 1597 buggy_atom.alternate = True 1598 self.update(atom.name, DisorderedAtom(buggy_atom)) 1599 if not atom.alternate: 1600 atom.alternate = True 1601 atom._residue = self.__residue 1602 self[atom.name].append(atom) 1603 else: 1604 if atom.name in self: 1605 raise DuplicateAtomIDError('Atom {0} is already defined for {1}'.format( 1606 atom.name, self.__residue)) 1607 else: 1608 super(ResidueAtomsTable, self).append(atom.name, atom) 1609 atom._residue = self.__residue
1610
1611 - def update(self, atom_name, atom):
1612 """ 1613 Update the atom with the specified name. 1614 1615 @param atom_name: update key 1616 @type atom_name: str 1617 @param atom: new value for this key 1618 @type atom: L{Atom} 1619 1620 @raise ValueError: if C{atom} has a different name than C{atom_name} 1621 """ 1622 if atom.name != atom_name: 1623 raise ValueError("Atom's name differs from the specified key.") 1624 if atom.residue is not self.__residue: 1625 atom._residue = self.__residue 1626 1627 super(ResidueAtomsTable, self)._update({atom_name: atom})
1628
1629 -class Atom(AbstractEntity):
1630 """ 1631 Represents a single atom in space. 1632 1633 @param serial_number: atom's UID 1634 @type serial_number: int 1635 @param name: atom's name 1636 @type name: str 1637 @param element: corresponding L{ChemElements} 1638 @type element: L{csb.core.EnumItem} 1639 @param vector: atom's coordinates 1640 @type vector: numpy array 1641 @param alternate: if True, means that this is a wobbling atom with multiple alternative 1642 locations 1643 @type alternate: bool 1644 """
1645 - def __init__(self, serial_number, name, element, vector, alternate=False):
1646 1647 self._serial_number = None 1648 self._name = None 1649 self._element = None 1650 self._residue = None 1651 self._vector = None 1652 self._alternate = False 1653 self._bfactor = None 1654 self._occupancy = None 1655 self._charge = None 1656 1657 if not isinstance(name, csb.core.string): 1658 raise TypeError(name) 1659 name_compact = name.strip() 1660 if len(name_compact) < 1: 1661 raise ValueError(name) 1662 self._name = name_compact 1663 self._full_name = name 1664 1665 if isinstance(element, csb.core.string): 1666 element = csb.core.Enum.parsename(ChemElements, element) 1667 elif element is None: 1668 pass 1669 elif element.enum is not ChemElements: 1670 raise TypeError(element) 1671 self._element = element 1672 1673 # pass type- and value-checking control to setters 1674 self.serial_number = serial_number 1675 self.vector = vector 1676 self.alternate = alternate
1677
1678 - def __repr__(self):
1679 return "<Atom [{0.serial_number}]: {0.name}>".format(self)
1680
1681 - def __lt__(self, other):
1682 return self.serial_number < other.serial_number
1683
1684 - def transform(self, rotation, translation):
1685 1686 vector = numpy.dot(self.vector, numpy.transpose(rotation)) + translation 1687 self.vector = vector
1688
1689 - def get_coordinates(self, what=None, skip=False):
1690 1691 if what is None: 1692 what = [self.name] 1693 1694 if self.name in what: 1695 return numpy.array([self.vector.copy()]) 1696 elif skip: 1697 return numpy.array([]) 1698 else: 1699 raise Missing3DStructureError()
1700
1701 - def clone(self):
1702 1703 residue = self._residue 1704 self._residue = None 1705 clone = copy.deepcopy(self) 1706 self._residue = residue 1707 1708 return clone
1709 1710 @property
1711 - def serial_number(self):
1712 """ 1713 PDB serial number 1714 @rtype: int 1715 """ 1716 return self._serial_number
1717 @serial_number.setter
1718 - def serial_number(self, number):
1719 if not isinstance(number, int) or number < 1: 1720 raise TypeError(number) 1721 self._serial_number = number
1722 1723 @property
1724 - def name(self):
1725 """ 1726 PDB atom name (trimmed) 1727 @rtype: str 1728 """ 1729 return self._name
1730 1731 @property
1732 - def element(self):
1733 """ 1734 Chemical element - a member of L{ChemElements} 1735 @rtype: enum item 1736 """ 1737 return self._element
1738 1739 @property
1740 - def residue(self):
1741 """ 1742 Residue instance that owns this atom (if available) 1743 @rtype: L{Residue} 1744 """ 1745 return self._residue
1746 @residue.setter
1747 - def residue(self, residue):
1748 if self._residue: 1749 raise InvalidOperation('This atom is already part of a residue.') 1750 if not isinstance(residue, Residue): 1751 raise TypeError(residue) 1752 self._residue = residue
1753 1754 @property
1755 - def vector(self):
1756 """ 1757 Atom's 3D coordinates (x, y, z) 1758 @rtype: numpy.array 1759 """ 1760 return self._vector
1761 @vector.setter
1762 - def vector(self, vector):
1763 if numpy.shape(vector) != (3,): 1764 raise ValueError("Three dimensional vector expected") 1765 self._vector = numpy.array(vector)
1766 1767 @property
1768 - def alternate(self):
1769 """ 1770 Alternative location flag 1771 @rtype: str 1772 """ 1773 return self._alternate
1774 @alternate.setter
1775 - def alternate(self, value):
1776 self._alternate = value
1777 1778 @property
1779 - def bfactor(self):
1780 """ 1781 Temperature factor 1782 @rtype: float 1783 """ 1784 return self._bfactor
1785 @bfactor.setter
1786 - def bfactor(self, value):
1787 self._bfactor = value
1788 1789 @property
1790 - def occupancy(self):
1791 """ 1792 Occupancy number 1793 @rtype: float 1794 """ 1795 return self._occupancy
1796 @occupancy.setter
1797 - def occupancy(self, value):
1798 self._occupancy = value
1799 1800 @property
1801 - def charge(self):
1802 """ 1803 Charge 1804 @rtype: int 1805 """ 1806 return self._charge
1807 @charge.setter
1808 - def charge(self, value):
1809 self._charge = value
1810 1811 @property
1812 - def items(self):
1813 return iter([])
1814
1815 -class DisorderedAtom(csb.core.CollectionContainer, Atom):
1816 """ 1817 A wobbling atom, which has alternative locations. Each alternative is represented 1818 as a 'normal' L{Atom}. The atom with a highest occupancy is selected as a representative, 1819 hence a DisorderedAtom behaves as a regular L{Atom} (proxy of the representative) as well 1820 as a collection of Atoms. 1821 1822 @param atom: the first atom to be appended to the collection of alternatives. It 1823 is automatically defined as a representative, until a new atom with 1824 higher occupancy is appended to the collection 1825 @type atom: L{Atom} 1826 """ 1827
1828 - def __init__(self, atom):
1829 1830 super(DisorderedAtom, self).__init__(type=Atom) 1831 1832 self.__rep = None 1833 self.__alt = {} 1834 1835 self.append(atom)
1836
1837 - def __getattr__(self, name):
1838 try: 1839 return object.__getattribute__(self, name) 1840 except AttributeError: 1841 subject = object.__getattribute__(self, '_DisorderedAtom__rep') 1842 return getattr(subject, name)
1843
1844 - def append(self, atom):
1845 """ 1846 Append a new atom to the collection of alternatives. 1847 1848 @param atom: the new alternative 1849 @type atom: L{Atom} 1850 """ 1851 self.__update_rep(atom) 1852 self.__alt[atom.alternate] = atom 1853 1854 super(DisorderedAtom, self).append(atom)
1855
1856 - def find(self, altloc):
1857 """ 1858 Retrieve a specific atom by its altloc identifier. 1859 1860 @param altloc: alternative location identifier 1861 @type altloc: str 1862 1863 @rtype: L{Atom} 1864 """ 1865 if altloc in self.__alt: 1866 return self.__alt[altloc] 1867 else: 1868 for atom in self: 1869 if atom.alternate == altloc: 1870 return Atom 1871 1872 raise EntityNotFoundError(altloc)
1873
1874 - def transform(self, rotation, translation):
1875 1876 for atom in self: 1877 atom.transform(rotation, translation)
1878
1879 - def __update_rep(self, atom):
1880 1881 if self.__rep is None or \ 1882 ((self.__rep.occupancy != atom.occupancy) and (self.__rep.occupancy < atom.occupancy)): 1883 1884 self.__rep = atom
1885
1886 - def __repr__(self):
1887 return "<DisorderedAtom: {0.length} alternative locations>".format(self)
1888
1889 -class SuperimposeInfo(object):
1890 """ 1891 Describes a structural alignment result. 1892 1893 @type rotation: Numpy Array 1894 @type translation: L{Vector} 1895 @type rmsd: float 1896 """
1897 - def __init__(self, rotation, translation, rmsd=None, tm_score=None):
1898 1899 self.rotation = rotation 1900 self.translation = translation 1901 self.rmsd = rmsd 1902 self.tm_score = tm_score
1903
1904 -class SecondaryStructureElement(object):
1905 """ 1906 Describes a Secondary Structure Element. 1907 1908 @param start: start position with reference to the chain 1909 @type start: float 1910 @param end: end position with reference to the chain 1911 @type end: float 1912 @param type: element type - a member of the L{SecStructures} enum 1913 @type type: csb.core.EnumItem 1914 @param score: secondary structure prediction confidence, if available 1915 @type score: int 1916 1917 @raise IndexError: if start/end positions are out of range 1918 """
1919 - def __init__(self, start, end, type, score=None):
1920 1921 if not (0 < start <= end): 1922 raise IndexError('Element coordinates are out of range: 1 <= start <= end.') 1923 1924 self._start = None 1925 self._end = None 1926 self._type = None 1927 self._score = None 1928 1929 self.start = start 1930 self.end = end 1931 self.type = type 1932 1933 if score is not None: 1934 self.score = score
1935
1936 - def __lt__(self, other):
1937 return self.start < other.start
1938
1939 - def __eq__(self, other):
1940 return (self.type == other.type 1941 and self.start == other.start 1942 and self.end == other.end)
1943
1944 - def __str__(self):
1945 return self.to_string()
1946
1947 - def __repr__(self):
1948 return "<{0.type!r}: {0.start}-{0.end}>".format(self)
1949 1950 @property
1951 - def start(self):
1952 """ 1953 Start position (1-based) 1954 @rtype: int 1955 """ 1956 return self._start
1957 @start.setter
1958 - def start(self, value):
1959 if value is not None: 1960 value = int(value) 1961 if value < 1: 1962 raise ValueError(value) 1963 self._start = value
1964 1965 @property
1966 - def end(self):
1967 """ 1968 End position (1-based) 1969 @rtype: int 1970 """ 1971 return self._end
1972 @end.setter
1973 - def end(self, value):
1974 if value is not None: 1975 value = int(value) 1976 if value < 1: 1977 raise ValueError(value) 1978 self._end = value
1979 1980 @property
1981 - def type(self):
1982 """ 1983 Secondary structure type - a member of L{SecStructures} 1984 @rtype: enum item 1985 """ 1986 return self._type
1987 @type.setter
1988 - def type(self, value):
1989 if isinstance(value, csb.core.string): 1990 value = csb.core.Enum.parse(SecStructures, value) 1991 if not value.enum is SecStructures: 1992 raise TypeError(value) 1993 self._type = value
1994 1995 @property
1996 - def length(self):
1997 """ 1998 Number of residues covered by this element 1999 @rtype: int 2000 """ 2001 return self.end - self.start + 1
2002 2003 @property
2004 - def score(self):
2005 """ 2006 Secondary structure confidence values for each residue 2007 @rtype: L{CollectionContainer} 2008 """ 2009 return self._score
2010 @score.setter
2011 - def score(self, scores):
2012 if not len(scores) == self.length: 2013 raise ValueError('There must be a score entry for each residue in the element.') 2014 self._score = csb.core.CollectionContainer( 2015 items=list(scores), type=int, start_index=self.start)
2016
2017 - def overlaps(self, other):
2018 """ 2019 Return True if C{self} overlaps with C{other}. 2020 2021 @type other: L{SecondaryStructureElement} 2022 @rtype: bool 2023 """ 2024 this = set(range(self.start, self.end + 1)) 2025 that = set(range(other.start, other.end + 1)) 2026 return not this.isdisjoint(that)
2027
2028 - def merge(self, other):
2029 """ 2030 Merge C{self} and C{other}. 2031 2032 @type other: L{SecondaryStructureElement} 2033 2034 @return: a new secondary structure element 2035 @rtype: L{SecondaryStructureElement} 2036 2037 @bug: confidence scores are lost 2038 """ 2039 if not self.overlaps(other): 2040 raise ValueError("Can't merge non-overlapping secondary structures") 2041 elif self.type != other.type: 2042 raise ValueError("Can't merge secondary structures of different type") 2043 2044 start = min(self.start, other.start) 2045 end = max(self.end, other.end) 2046 assert self.type == other.type 2047 2048 return SecondaryStructureElement(start, end, self.type)
2049
2050 - def to_string(self):
2051 """ 2052 Dump the element as a string. 2053 2054 @return: string representation of the element 2055 @rtype: str 2056 """ 2057 return str(self.type) * self.length
2058
2059 - def simplify(self):
2060 """ 2061 Convert to three-state secondary structure (Helix, Strand, Coil). 2062 """ 2063 if self.type in (SecStructures.Helix, SecStructures.Helix3, SecStructures.PiHelix): 2064 self.type = SecStructures.Helix 2065 elif self.type in (SecStructures.Strand, SecStructures.BetaBridge): 2066 self.type = SecStructures.Strand 2067 elif self.type in (SecStructures.Coil, SecStructures.Turn, SecStructures.Bend): 2068 self.type = SecStructures.Coil 2069 elif self.type == SecStructures.Gap or self.type is None: 2070 pass 2071 else: 2072 assert False, 'Unhandled SS type: ' + repr(self.type)
2073
2074 -class SecondaryStructure(csb.core.CollectionContainer):
2075 """ 2076 Describes the secondary structure of a chain. 2077 Provides an index-based access to the secondary structure elements of the chain. 2078 2079 @param string: a secondary structure string (e.g. a PSI-PRED output) 2080 @type string: str 2081 @param conf_string: secondary structure prediction confidence values, if available 2082 @type conf_string: str 2083 """
2084 - def __init__(self, string=None, conf_string=None):
2085 2086 super(SecondaryStructure, self).__init__(type=SecondaryStructureElement, start_index=1) 2087 2088 self._minstart = None 2089 self._maxend = None 2090 2091 if string is not None: 2092 for motif in SecondaryStructure.parse(string, conf_string): 2093 self.append(motif)
2094
2095 - def __str__(self):
2096 return self.to_string()
2097
2098 - def append(self, element):
2099 """ 2100 Add a new SecondaryStructureElement. Then sort all elements by 2101 their start position. 2102 """ 2103 super(SecondaryStructure, self).append(element) 2104 super(SecondaryStructure, self)._sort() 2105 2106 if self._minstart is None or element.start < self._minstart: 2107 self._minstart = element.start 2108 if self._maxend is None or element.end > self._maxend: 2109 self._maxend = element.end
2110 2111 @staticmethod
2112 - def parse(string, conf_string=None):
2113 """ 2114 Parse secondary structure from DSSP/PSI-PRED output string. 2115 2116 @param string: a secondary structure string (e.g. a PSI-PRED output) 2117 @type string: str 2118 @param conf_string: secondary structure prediction confidence values, if available 2119 @type conf_string: str 2120 2121 @return: a list of L{SecondaryStructureElement}s. 2122 @rtype: list 2123 2124 @raise ValueError: if the confidence string is not of the same length 2125 """ 2126 if not isinstance(string, csb.core.string): 2127 raise TypeError(string) 2128 2129 string = ''.join(re.split('\s+', string)) 2130 if conf_string is not None: 2131 conf_string = ''.join(re.split('\s+', conf_string)) 2132 if not len(string) == len(conf_string): 2133 raise ValueError('The confidence string has unexpected length.') 2134 motifs = [ ] 2135 2136 if not len(string) > 0: 2137 raise ValueError('Empty Secondary Structure string') 2138 2139 currel = string[0] 2140 start = 0 2141 2142 for i, char in enumerate(string + '.'): 2143 2144 if currel != char: 2145 try: 2146 type = csb.core.Enum.parse(SecStructures, currel) 2147 except csb.core.EnumValueError: 2148 raise UnknownSecStructureError(currel) 2149 confidence = None 2150 if conf_string is not None: 2151 confidence = list(conf_string[start : i]) 2152 confidence = list(map(int, confidence)) 2153 motif = SecondaryStructureElement(start + 1, i, type, confidence) 2154 motifs.append(motif) 2155 2156 currel = char 2157 start = i 2158 2159 return motifs
2160 2161 @property
2162 - def start(self):
2163 """ 2164 Start position of the leftmost element 2165 @rtype: int 2166 """ 2167 return self._minstart
2168 2169 @property
2170 - def end(self):
2171 """ 2172 End position of the rightmost element 2173 @rtype: int 2174 """ 2175 return self._maxend
2176
2177 - def clone(self):
2178 """ 2179 @return: a deep copy of the object 2180 """ 2181 return copy.deepcopy(self)
2182
2183 - def to_three_state(self):
2184 """ 2185 Convert to three-state secondary structure (Helix, Strand, Coil). 2186 """ 2187 for e in self: 2188 e.simplify()
2189
2190 - def to_string(self, chain_length=None):
2191 """ 2192 Get back the string representation of the secondary structure. 2193 2194 @return: a string of secondary structure elements 2195 @rtype: str 2196 2197 @bug: [CSB 0000003] If conflicting elements are found at a given rank, 2198 this position is represented as a coil. 2199 """ 2200 gap = str(SecStructures.Gap) 2201 coil = str(SecStructures.Coil) 2202 2203 if chain_length is None: 2204 chain_length = max(e.end for e in self) 2205 2206 ss = [] 2207 2208 for pos in range(1, chain_length + 1): 2209 elements = self.at(pos) 2210 if len(elements) > 0: 2211 if len(set(e.type for e in elements)) > 1: 2212 ss.append(coil) # [CSB 0000003] 2213 else: 2214 ss.append(elements[0].to_string()) 2215 else: 2216 ss.append(gap) 2217 2218 return ''.join(ss)
2219
2220 - def at(self, rank, type=None):
2221 """ 2222 @return: all secondary structure elements covering the specifid position 2223 @rtype: tuple of L{SecondaryStructureElement}s 2224 """ 2225 return self.scan(start=rank, end=rank, filter=type, loose=True, cut=True)
2226
2227 - def scan(self, start, end, filter=None, loose=True, cut=True):
2228 """ 2229 Get all secondary structure elements within the specified [start, end] region. 2230 2231 @param start: the start position of the region, 1-based, inclusive 2232 @type start: int 2233 @param end: the end position of the region, 1-based, inclusive 2234 @type end: int 2235 @param filter: return only elements of the specified L{SecStructures} kind 2236 @type filter: L{csb.core.EnumItem} 2237 @param loose: grab all fully or partially matching elements within the region. 2238 if False, return only the elements which strictly reside within 2239 the region 2240 @type loose: bool 2241 @param cut: if an element is partially overlapping with the start..end region, 2242 cut its start and/or end to make it fit into the region. If False, 2243 return the elements with their real lengths 2244 @type cut: bool 2245 2246 @return: a list of deep-copied L{SecondaryStructureElement}s, sorted by their 2247 start position 2248 @rtype: tuple of L{SecondaryStructureElement}s 2249 """ 2250 matches = [ ] 2251 2252 for m in self: 2253 if filter and m.type != filter: 2254 continue 2255 2256 if loose: 2257 if start <= m.start <= end or start <= m.end <= end or (m.start <= start and m.end >= end): 2258 partmatch = copy.deepcopy(m) 2259 if cut: 2260 if partmatch.start < start: 2261 partmatch.start = start 2262 if partmatch.end > end: 2263 partmatch.end = end 2264 if partmatch.score: 2265 partmatch.score = partmatch.score[start : end + 1] 2266 matches.append(partmatch) 2267 else: 2268 if m.start >= start and m.end <= end: 2269 matches.append(copy.deepcopy(m)) 2270 2271 matches.sort() 2272 return tuple(matches)
2273
2274 - def q3(self, reference, relaxed=True):
2275 """ 2276 Compute Q3 score. 2277 2278 @param reference: reference secondary structure 2279 @type reference: L{SecondaryStructure} 2280 @param relaxed: if True, treat gaps as coils 2281 @type relaxed: bool 2282 2283 @return: the percentage of C{reference} residues with identical 2284 3-state secondary structure. 2285 @rtype: float 2286 """ 2287 2288 this = self.clone() 2289 this.to_three_state() 2290 2291 ref = reference.clone() 2292 ref.to_three_state() 2293 2294 total = 0 2295 identical = 0 2296 2297 def at(ss, rank): 2298 elements = ss.at(rank) 2299 if len(elements) == 0: 2300 return None 2301 elif len(elements) > 1: 2302 raise ValueError('Flat secondary structure expected') 2303 else: 2304 return elements[0]
2305 2306 for rank in range(ref.start, ref.end + 1): 2307 q = at(this, rank) 2308 s = at(ref, rank) 2309 2310 if s: 2311 if relaxed or s.type != SecStructures.Gap: 2312 total += 1 2313 if q: 2314 if q.type == s.type: 2315 identical += 1 2316 elif relaxed: 2317 pair = set([q.type, s.type]) 2318 match = set([SecStructures.Gap, SecStructures.Coil]) 2319 if pair.issubset(match): 2320 identical += 1 2321 2322 if total == 0: 2323 return 0.0 2324 else: 2325 return identical * 100.0 / total
2326
2327 - def subregion(self, start, end):
2328 """ 2329 Same as C{ss.scan(...cut=True)}, but also shift the start-end positions 2330 of all motifs and return a L{SecondaryStructure} instance instead of a list. 2331 2332 @param start: start position of the subregion, with reference to the chain 2333 @type start: int 2334 @param end: start position of the subregion, with reference to the chain 2335 @type end: int 2336 2337 @return: a deep-copy sub-fragment of the original L{SecondaryStructure} 2338 @rtype: L{SecondaryStructure} 2339 """ 2340 sec_struct = SecondaryStructure() 2341 2342 for motif in self.scan(start, end, loose=True, cut=True): 2343 2344 motif.start = motif.start - start + 1 2345 motif.end = motif.end - start + 1 2346 if motif.score: 2347 motif.score = list(motif.score) # this will automatically fix the score indices in the setter 2348 sec_struct.append(motif) 2349 2350 return sec_struct
2351
2352 -class TorsionAnglesCollection(csb.core.CollectionContainer):
2353 """ 2354 Describes a collection of torsion angles. Provides 1-based list-like access. 2355 2356 @param items: an initialization list of L{TorsionAngles} 2357 @type items: list 2358 """
2359 - def __init__(self, items=None, start=1):
2363
2364 - def __repr__(self):
2365 if len(self) > 0: 2366 return "<TorsionAnglesList: {0} ... {1}>".format(self[self.start_index], self[self.last_index]) 2367 else: 2368 return "<TorsionAnglesList: empty>"
2369 2370 @property
2371 - def phi(self):
2372 """ 2373 List of all phi angles 2374 @rtype: list 2375 """ 2376 return [a.phi for a in self]
2377 2378 @property
2379 - def psi(self):
2380 """ 2381 List of all psi angles 2382 @rtype: list 2383 """ 2384 return [a.psi for a in self]
2385 2386 @property
2387 - def omega(self):
2388 """ 2389 List of all omega angles 2390 @rtype: list 2391 """ 2392 return [a.omega for a in self]
2393
2394 - def update(self, angles):
2395 self._update(angles)
2396
2397 - def rmsd(self, other):
2398 """ 2399 Calculate the Circular RSMD against another TorsionAnglesCollection. 2400 2401 @param other: subject (right-hand-term) 2402 @type other: L{TorsionAnglesCollection} 2403 2404 @return: RMSD based on torsion angles 2405 @rtype: float 2406 2407 @raise Broken3DStructureError: on discontinuous torsion angle collections 2408 (phi and psi values are still allowed to be absent at the termini) 2409 @raise ValueError: on mismatching torsion angles collection lengths 2410 """ 2411 if len(self) != len(other) or len(self) < 1: 2412 raise ValueError('Both collections must be of the same and positive length') 2413 2414 length = len(self) 2415 query, subject = [], [] 2416 2417 for n, (q, s) in enumerate(zip(self, other), start=1): 2418 2419 q = q.copy() 2420 q.to_radians() 2421 2422 s = s.copy() 2423 s.to_radians() 2424 2425 if q.phi is None or s.phi is None: 2426 if n == 1: 2427 q.phi = s.phi = 0.0 2428 else: 2429 raise Broken3DStructureError('Discontinuous torsion angles collection at {0}'.format(n)) 2430 2431 if q.psi is None or s.psi is None: 2432 if n == length: 2433 q.psi = s.psi = 0.0 2434 else: 2435 raise Broken3DStructureError('Discontinuous torsion angles collection at {0}'.format(n)) 2436 2437 query.append([q.phi, q.psi]) 2438 subject.append([s.phi, s.psi]) 2439 2440 return csb.bio.utils.torsion_rmsd(numpy.array(query), numpy.array(subject))
2441
2442 -class TorsionAngles(object):
2443 """ 2444 Describes a collection of phi, psi and omega backbone torsion angles. 2445 2446 It is assumed that the supplied values are either None, or fitting into 2447 the range of [-180, +180] for AngleUnites.Degrees and [0, 2pi] for Radians. 2448 2449 @param phi: phi angle value in C{units} 2450 @type phi: float 2451 @param psi: psi angle value in C{units} 2452 @type psi: float 2453 @param omega: omega angle value in C{units} 2454 @type omega: float 2455 @param units: any of L{AngleUnits}'s enum members 2456 @type units: L{csb.core.EnumItem} 2457 2458 @raise ValueError: on invalid/unknown units 2459 """ 2460
2461 - def __init__(self, phi, psi, omega, units=AngleUnits.Degrees):
2462 2463 try: 2464 if isinstance(units, csb.core.string): 2465 units = csb.core.Enum.parse(AngleUnits, units, ignore_case=True) 2466 else: 2467 if units.enum is not AngleUnits: 2468 raise TypeError(units) 2469 2470 except ValueError: 2471 raise ValueError('Unknown angle unit type {0}'.format(units)) 2472 2473 self._units = units 2474 2475 self._phi = None 2476 self._psi = None 2477 self._omega = None 2478 2479 self.phi = phi 2480 self.psi = psi 2481 self.omega = omega
2482
2483 - def __repr__(self):
2484 return "<TorsionAngles: phi={0.phi}, psi={0.psi}, omega={0.omega}>".format(self)
2485
2486 - def __nonzero__(self):
2487 return self.__bool__()
2488
2489 - def __bool__(self):
2490 return self.phi is not None \ 2491 or self.psi is not None \ 2492 or self.omega is not None
2493 2494 @property
2495 - def units(self):
2496 """ 2497 Current torsion angle units - a member of L{AngleUnits} 2498 @rtype: enum item 2499 """ 2500 return self._units
2501 2502 @property
2503 - def phi(self):
2504 return self._phi
2505 @phi.setter
2506 - def phi(self, phi):
2507 TorsionAngles.check_angle(phi, self._units) 2508 self._phi = phi
2509 2510 @property
2511 - def psi(self):
2512 return self._psi
2513 @psi.setter
2514 - def psi(self, psi):
2515 TorsionAngles.check_angle(psi, self._units) 2516 self._psi = psi
2517 2518 @property
2519 - def omega(self):
2520 return self._omega
2521 @omega.setter
2522 - def omega(self, omega):
2523 TorsionAngles.check_angle(omega, self._units) 2524 self._omega = omega
2525
2526 - def copy(self):
2527 """ 2528 @return: a deep copy of C{self} 2529 """ 2530 return TorsionAngles(self.phi, self.psi, self.omega, self.units)
2531
2532 - def to_degrees(self):
2533 """ 2534 Set angle measurement units to degrees. 2535 Convert the angles in this TorsionAngles instance to degrees. 2536 """ 2537 2538 if self._units != AngleUnits.Degrees: 2539 2540 phi = TorsionAngles.deg(self._phi) 2541 psi = TorsionAngles.deg(self._psi) 2542 omega = TorsionAngles.deg(self._omega) 2543 2544 # if no ValueError is raised by TorsionAngles.check_angle in TorsionAngles.deg: 2545 # (we assign directly to the instance variables to avoid check_angle being invoked again in setters) 2546 self._phi, self._psi, self._omega = phi, psi, omega 2547 self._units = AngleUnits.Degrees
2548 2549
2550 - def to_radians(self):
2551 """ 2552 Set angle measurement units to radians. 2553 Convert the angles in this TorsionAngles instance to radians. 2554 """ 2555 2556 if self._units != AngleUnits.Radians: 2557 2558 phi = TorsionAngles.rad(self._phi) 2559 psi = TorsionAngles.rad(self._psi) 2560 omega = TorsionAngles.rad(self._omega) 2561 2562 # if no ValueError is raised by TorsionAngles.check_angle in TorsionAngles.rad: 2563 # (we assign directly to the instance variables to avoid check_angle being invoked again in setters) 2564 self._phi, self._psi, self._omega = phi, psi, omega 2565 self._units = AngleUnits.Radians
2566 2567 @staticmethod
2568 - def check_angle(angle, units):
2569 """ 2570 Check the value of a torsion angle expressed in the specified units. 2571 """ 2572 if angle is None: 2573 return 2574 elif units == AngleUnits.Degrees: 2575 if not (-180 <= angle <= 180): 2576 raise ValueError('Torsion angle {0} is out of range -180..180'.format(angle)) 2577 elif units == AngleUnits.Radians: 2578 if not (0 <= angle <= (2 * math.pi)): 2579 raise ValueError('Torsion angle {0} is out of range 0..2pi'.format(angle)) 2580 else: 2581 raise ValueError('Unknown angle unit type {0}'.format(units))
2582 2583 @staticmethod
2584 - def rad(angle):
2585 """ 2586 Convert a torsion angle value, expressed in degrees, to radians. 2587 Negative angles are converted to their positive counterparts: rad(ang + 360deg). 2588 2589 Return the calculated value in the range of [0, 2pi] radians. 2590 """ 2591 TorsionAngles.check_angle(angle, AngleUnits.Degrees) 2592 2593 if angle is not None: 2594 if angle < 0: 2595 angle += 360. 2596 angle = math.radians(angle) 2597 return angle
2598 2599 @staticmethod
2600 - def deg(angle):
2601 """ 2602 Convert a torsion angle value, expressed in radians, to degrees. 2603 Negative angles are not accepted, it is assumed that negative torsion angles have been 2604 converted to their ang+2pi counterparts beforehand. 2605 2606 Return the calculated value in the range of [-180, +180] degrees. 2607 """ 2608 TorsionAngles.check_angle(angle, AngleUnits.Radians) 2609 2610 if angle is not None: 2611 if angle > math.pi: 2612 angle = -((2. * math.pi) - angle) 2613 angle = math.degrees(angle) 2614 2615 return angle
2616