csb.bio.nmr

1 """ 2 NMR related objects. 3 """ 4 5 import os 6 import numpy.linalg 7 import xml.dom.minidom 8 9 import csb.io.tsv 10 import csb.core as pu 11 12 from csb.statistics.pdf import GeneralizedNormal 13 from csb.bio.sequence import ProteinAlphabet 14 from csb.bio.structure import ChemElements

15 16 17 -class InvalidResidueError(ValueError):

18 pass

19

20 -class EntityNotSupportedError(KeyError):

21 pass

22

23 24 -class RandomCoil(object):

25 """ 26 Utility class containing all necessary data and methods for computing 27 secondary chemical shifts. 28 29 @note: You are supposed to obtain an instance of this object only via 30 the dedicated factory (see L{RandomCoil.get}). The factory 31 ensures a "singleton with lazy instantiation" behavior. This is 32 needed since this object loads static data from the file system. 33 """ 34 35 RESOURCES = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources') 36 37 _instance = None 38 39 40 @staticmethod

41 - def get():

42 """ 43 Get the current L{RandomCoil} instance (and create it, if this 44 method is called for the first time). 45 """ 46 if RandomCoil._instance is None: 47 RandomCoil._instance = RandomCoil() 48 49 return RandomCoil._instance

50

51 - def __init__(self):

52 53 if RandomCoil._instance is not None: 54 raise NotImplementedError("Can't instantiate a singleton") 55 56 RandomCoil._instance = self 57 58 self._offsets = (-2, -1, 1, 2) 59 self._reference = {} 60 self._corrections = {} 61 62 self._initialize()

63

64 - def _initialize(self):

65 66 ref = os.path.join(RandomCoil.RESOURCES, 'RandomCoil.Reference.tsv') 67 cor = os.path.join(RandomCoil.RESOURCES, 'RandomCoil.Corrections.tsv') 68 69 self._load(ref, cor)

70

71 - def _load(self, ref, cor):

72 73 self._reference = {} 74 self._corrections = {} 75 76 header = 'Residue:str Nucleus:str Value:float' 77 78 for row in csb.io.tsv.Table.from_tsv(ref, header): 79 residue = pu.Enum.parsename(ProteinAlphabet, row[0]) 80 nucleus, value = row[1:] 81 82 if residue not in self._reference: 83 self._reference[residue] = {} 84 85 self._reference[residue][nucleus] = value 86 87 header = 'Residue:str Nucleus:str CS1:float CS2:float CS3:float CS4:float' 88 89 for row in csb.io.tsv.Table.from_tsv(cor, header): 90 residue = pu.Enum.parsename(ProteinAlphabet, row[0]) 91 nucleus = row[1] 92 values = row[2:] 93 94 if residue not in self._corrections: 95 self._corrections[residue] = {} 96 97 self._corrections[residue][nucleus] = dict(zip(self._offsets, values))

98

99 - def simple_secondary_shift(self, residue, nucleus, value):

100 """ 101 Compute a secondary shift given a raw shift C{value}. 102 Residue neighborhood is not taken into account. 103 104 @param residue: residue type (amino acid code) 105 @type residue: str or L{EnumItem} 106 @param nucleus: atom name (PDB format) 107 @type nucleus: str 108 @param value: raw chemical shift value 109 @type value: float 110 111 @return: float 112 113 @raise EntityNotSupportedError: on unsupported residue or nucleus 114 """ 115 116 try: 117 if isinstance(residue, pu.string): 118 if len(residue) == 1: 119 residue = pu.Enum.parse(ProteinAlphabet, residue) 120 else: 121 residue = pu.Enum.parsename(ProteinAlphabet, residue) 122 else: 123 if residue.enum is not ProteinAlphabet: 124 raise TypeError(residue) 125 126 return value - self._reference[residue][nucleus] 127 128 except (pu.EnumValueError, pu.EnumMemberError): 129 raise InvalidResidueError('{0} is not a protein residue'.format(residue)) 130 131 except KeyError as ke: 132 raise EntityNotSupportedError('{0!s}, context: {1!r} {2}'.format(ke, residue, nucleus))

133

134 - def secondary_shift(self, chain, residue, nucleus, value):

135 """ 136 Compute a secondary shift given a raw shift C{value} for a specific 137 residue and its neighboring residues. 138 139 @param chain: the protein chain containing the C{nucleus} 140 @type chain: L{Chain} 141 @param residue: the residue containing the C{nucleus}. This can be 142 a residue object, id (sequence number + insertion 143 code, string) or rank (integer, 1-based) 144 @type residue: L{Residue}, str or int 145 @param nucleus: atom name (PDB format) 146 @type nucleus: str 147 @param value: raw chemical shift value 148 @type value: float 149 """ 150 try: 151 if isinstance(residue, int): 152 residue = chain.residues[residue] 153 elif isinstance(residue, pu.string): 154 residue = chain.find(residue) 155 else: 156 residue = chain.residues[residue.rank] 157 except (pu.ItemNotFoundError, pu.CollectionIndexError): 158 raise InvalidResidueError("Can't find residue {0} in {1}".format(residue, chain)) 159 160 shift = self.simple_secondary_shift(residue.type, nucleus, value) 161 162 for offset in self._offsets: 163 164 if 1 <= (residue.rank + offset) <= chain.length: 165 try: 166 neighbor = chain.residues[residue.rank + offset] 167 shift -= self._corrections[neighbor.type][nucleus][offset * -1] 168 169 except KeyError: 170 continue 171 172 return shift

173

174 175 -class AtomConnectivity(object):

176 177 RESOURCES = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources') 178 179 _instance = None 180 181 @staticmethod

182 - def get():

183 """ 184 Get the current L{AtomConnectivity} instance (and create it if this 185 method is invoked for the first time). 186 @rtype: L{AtomConnectivity} 187 """ 188 if AtomConnectivity._instance is None: 189 AtomConnectivity._instance = AtomConnectivity() 190 return AtomConnectivity._instance

191

192 - def __init__(self):

193 194 self._table = {} 195 self._initialize()

196

197 - def _initialize(self):

198 199 resource = os.path.join(AtomConnectivity.RESOURCES, 'AtomConnectivity.xml') 200 root = xml.dom.minidom.parse(resource) 201 202 for r in root.documentElement.getElementsByTagName('residue'): 203 residue = pu.Enum.parsename(ProteinAlphabet, r.getAttribute('type')) 204 self._table[residue] = {} 205 206 for a in r.getElementsByTagName('atom'): 207 atom = a.getAttribute('name') 208 self._table[residue][atom] = set() 209 210 for b in r.getElementsByTagName('bond'): 211 atom1 = b.getAttribute('atom1') 212 atom2 = b.getAttribute('atom2') 213 self._table[residue][atom1].add(atom2) 214 self._table[residue][atom2].add(atom1)

215

216 - def connected(self, residue, atom1, atom2):

217 """ 218 Return True if C{atom1} is covalently connected to C{atom2} in C{residue} 219 220 @param residue: residue type (a member of L{ProteinAlphabet}) 221 @type residue: L{EnumItem} 222 @param atom1: first atom name (IUPAC) 223 @type atom1: str 224 @param atom2: second atom name (IUPAC) 225 @type atom2: str 226 227 @rtype: boolean 228 """ 229 if residue in self._table: 230 r = self._table[residue] 231 if atom1 in r: 232 return atom2 in r[atom1] 233 234 return False

235

236 - def connected_atoms(self, residue, atom):

237 """ 238 Return all atoms covalently connected to C{atom} in C{residue}. 239 240 @param residue: residue type (a member of L{ProteinAlphabet}) 241 @type residue: L{EnumItem} 242 @param atom: source atom name (IUPAC) 243 @type atom: str 244 245 @rtype: tuple of str 246 """ 247 if residue in self._table: 248 r = self._table[residue] 249 if atom in r: 250 return tuple(r[atom]) 251 252 return tuple()

253

254 - def contains(self, residue, atom):

255 """ 256 Return True if C{atom} name is contained in C{residue}. 257 258 @param residue: residue type (a member of L{ProteinAlphabet}) 259 @type residue: L{EnumItem} 260 @param atom: atom name (IUPAC) 261 @type atom: str 262 263 @rtype: bool 264 """ 265 if residue in self._table: 266 return atom in self._table[residue] 267 268 return False

269

270 - def get_atoms(self, residue, prefix=''):

271 """ 272 Get all atoms contained in C{residue}. 273 274 @param residue: residue type (a member of L{ProteinAlphabet}) 275 @type residue: L{EnumItem} 276 @param prefix: atom name prefix wildcard (IUPAC) 277 @type prefix: str 278 279 @return: set of atom names 280 @rtype: frozenset of str 281 """ 282 t = self._table[residue] 283 if residue in self._table: 284 return frozenset(a for a in t if a.startswith(prefix)) 285 286 return frozenset()

287

288 289 -class Filters(object):

290 """ 291 Pre-built atom filters for L{ContactMap}s. 292 """ 293 294 @staticmethod

295 - def ALL(a):

296 return True

297 298 @staticmethod

299 - def HYDROGENS(a):

300 return a.element == ChemElements.H

301 302 @staticmethod

303 - def CARBONS(a):

304 return a.element == ChemElements.C

305 306 @staticmethod

307 - def CALPHAS(a):

308 return a.name == 'CA'

309

310 -class ContactMap(object):

311 """ 312 Describes a protein contact map. Atoms positioned at distance below 313 a given cutoff are considered to be in contact. 314 315 @param chain: source protein chain 316 @type chain: L{csb.bio.structure.Chain} 317 @param cutoff: distance cutoff in angstroms 318 @type cutoff: float 319 @param filter: a callable with signature 'bool def(csb.bio.structure.Atom)', 320 invoked for every atom, which determines whether a given atom 321 should be skipped (False) or considered (True). See L{Filters} 322 @type filter: lambda 323 """ 324 325 DISTANCE_CUTOFF = 6.0 326 327 @staticmethod

328 - def load(filename):

329 """ 330 Deserialize from a pickle. 331 """ 332 with open(filename, 'rb') as stream: 333 return csb.io.Pickle.load(stream)

334

335 - def __init__(self, chain, cutoff=DISTANCE_CUTOFF, filter=None):

336 337 self._cutoff = float(cutoff) 338 self._chain = chain 339 self._atoms = [] 340 self._atomset = set() 341 self._map = {} 342 self._coords = {} 343 344 if filter is None: 345 filter = lambda i: True 346 347 for residue in chain.residues: 348 self._coords[residue.rank] = {} 349 atoms = [a for a in residue.items if filter(a)] 350 351 if len(atoms) == 0: 352 continue 353 354 step = 1.0 / len(atoms) 355 n = 0 356 357 for atom in atoms: 358 self._atoms.append(atom) 359 self._atomset.add(atom) 360 self._coords[residue.rank][atom.name] = residue.rank + n * step 361 n += 1

362

363 - def __iter__(self):

364 return self.contacts

365

366 - def __contains__(self, atom):

367 return atom in self._atomset

368 369 @property

370 - def cutoff(self):

371 """ 372 Distance cutoff in Angstroms 373 @rtype: float 374 """ 375 return self._cutoff

376 377 @property

378 - def chain(self):

379 """ 380 Source protein chain 381 @rtype: L{Chain} 382 """ 383 return self._chain

384 385 @property

386 - def atoms(self):

387 """ 388 All atoms involved in this map, sorted by residue number 389 @rtype: tuple of L{Atom} 390 """ 391 return tuple(self._atoms)

392 393 @property

394 - def contacts(self):

395 """ 396 All atom contacts: an iterator over all contacting 397 (L{Atom}, L{Atom}) pairs. 398 @rtype: iterator of 2-tuples 399 """ 400 visited = set() 401 402 for a1 in self._map: 403 for a2 in self._map[a1]: 404 if (a1, a2) not in visited: 405 visited.add((a1, a2)) 406 visited.add((a2, a1)) 407 yield (a1, a2)

408

409 - def build(self):

410 """ 411 Extract all contacts from the chain using the current distance cutoff. 412 """ 413 414 self._map = {} 415 416 for atom1 in self._atoms: 417 for atom2 in self._atoms: 418 if atom1 is not atom2: 419 distance = numpy.linalg.norm(atom1.vector - atom2.vector) 420 if distance <= self._cutoff: 421 self._connect(atom1, atom2)

422

423 - def connect(self, atom1, atom2):

424 """ 425 Define a contact between C{atom1} and C{atom2}. 426 427 @param atom1: first atom 428 @type atom1: L{Atom} 429 @param atom2: second atom 430 @type atom2: L{Atom} 431 """ 432 for atom in [atom1, atom2]: 433 if atom not in self._atomset: 434 raise ValueError("No such atom in contact map: {0}".format(atom)) 435 436 self._connect(atom1, atom2)

437

438 - def _connect(self, atom1, atom2):

439 440 if atom1 not in self._map: 441 self._map[atom1] = set() 442 self._map[atom1].add(atom2) 443 444 if atom2 not in self._map: 445 self._map[atom2] = set() 446 self._map[atom2].add(atom1)

447

448 - def connected(self, atom1, atom2):

449 """ 450 Return True if the specified atoms are in contact. 451 452 @param atom1: first atom 453 @type atom1: L{Atom} 454 @param atom2: second atom 455 @type atom2: L{Atom} 456 """ 457 if atom1 in self._map: 458 return atom2 in self._map[atom1] 459 return False

460

461 - def atom_contacts(self, atom):

462 """ 463 Return all atoms within C{self.cutoff} angstroms of C{atom}. 464 465 @param atom: anchor atom 466 @type atom: L{Atom} 467 468 @rtype: frozenset of L{Atom} 469 """ 470 471 if atom in self._map: 472 return frozenset(self._map[atom]) 473 else: 474 return frozenset()

475

476 - def residue_contacts(self, residue):

477 """ 478 Return all residues, having neighboring atoms within C{self.cutoff} 479 angstroms from any of the C{residue}'s atoms. 480 481 @param residue: anchor residue 482 @type residue: L{Residue} 483 484 @rtype: frozenset of L{Residue} 485 """ 486 487 partners = set() 488 489 for atom in residue.items: 490 if atom in self._map: 491 for partner in self._map[atom]: 492 partners.add(partner.residue) 493 494 return frozenset(partners)

495

496 - def position(self, rank, atom_name):

497 """ 498 Compute the location of C{atom} on the contact map. 499 500 @param rank: residue rank (1-based) 501 @type rank: int 502 @param atom_name: atom name 503 @type atom_name: str 504 505 @rtype: float 506 """ 507 residue = self._chain.residues[rank] 508 atom = residue.atoms[atom_name] 509 510 try: 511 return self._coords[residue.rank][atom.name] 512 except KeyError: 513 msg = "No atom {0} at #{1} in contact map: {2}" 514 raise ValueError(msg.format(atom_name, rank, self._coords[residue.rank].values()))

515

516 - def atom_matrix(self):

517 """ 518 Build a 2D binary contact matrix (0=no contact, 1=contact). The order of elements 519 in each dimension will match the order of atoms in the contact map 520 (see L{ContactMap.atoms} and iter(L{ContactMap}). That means, the atoms in 521 each dimension are sorted by residue number first. 522 523 @deprecated: This method can be removed in future versions 524 525 @rtype: numpy.array (2D) 526 """ 527 528 matrix = [] 529 530 for i, atom1 in enumerate(self.atoms): 531 matrix.append([]) 532 533 for atom2 in self.atoms: 534 if atom1 in self._map and atom2 in self._map[atom1]: 535 matrix[i].append(1) 536 else: 537 matrix[i].append(0) 538 539 return numpy.array(matrix)

540

541 - def draw(self, plot, color="black"):

542 """ 543 Visualize this contact map. 544 545 @param plot: L{csb.io.plots.Chart}'s plot to draw on 546 @type plot: matplotlib.AxesSubplot 547 @param color: pixel color (must be a matplotlib color constant) 548 @type color: str 549 """ 550 551 x, y = [], [] 552 553 for atom1 in self.atoms: 554 for atom2 in self.atom_contacts(atom1): 555 pos1 = self.position(atom1.residue.rank, atom1.name) 556 pos2 = self.position(atom2.residue.rank, atom2.name) 557 558 assert None not in (pos1, pos2), (atom1, atom2) 559 x.append(pos1) 560 y.append(pos2) 561 562 plot.plot(x, y, color=color, marker=",", linestyle='none') 563 564 plot.set_xlim(0, self.chain.length) 565 plot.set_ylim(0, self.chain.length) 566 567 return plot

568 569 @staticmethod

570 - def compare(query, reference, min_distance=0):

571 """ 572 Compare a query contact map against a reference. 573 574 @type query: L{ContactMap} 575 @type reference: L{ContactMap} 576 577 @param min_distance: consider only contacts between atoms, separated by 578 the given minimum number of residues 579 @type min_distance: int 580 581 @return: precision and coverage 582 @rtype: L{ContactMapComparisonInfo} 583 """ 584 if query.chain is not reference.chain: 585 raise ValueError("Contact maps are not comparable") 586 if not query._map and not reference._map: 587 raise ValueError("Can't compare empty contact maps") 588 589 true_pos = 0.0 590 false_pos = 0.0 591 false_neg = 0.0 592 593 for a1, a2 in query.contacts: 594 if abs(a1.residue.rank - a2.residue.rank) >= min_distance: 595 if reference.connected(a1, a2): 596 true_pos += 1.0 597 else: 598 false_pos += 1.0 599 600 for a1, a2 in reference.contacts: 601 if abs(a1.residue.rank - a2.residue.rank) >= min_distance: 602 if not query.connected(a1, a2): 603 false_neg += 1.0 604 605 try: 606 precision = true_pos / (true_pos + false_pos) 607 coverage = true_pos / (true_pos + false_neg) 608 return ContactMapComparisonInfo(precision, coverage) 609 610 except ZeroDivisionError: 611 return ContactMapComparisonInfo(0, 0)

612

613 -class ContactMapComparisonInfo(object):

614

615 - def __init__(self, precision, coverage):

616 617 self.precision = precision 618 self.coverage = coverage

619

620 621 -class Label(object):

622 """ 623 Utility class for working with chemical shift labels. 624 625 @param residue: residue type 626 @type residue: L{EnumItem} 627 @param rank: residue position (1-based) 628 @type rank: int 629 @param atom_name: nucleus name 630 @type atom_name: str 631 """ 632 633 @staticmethod

634 - def build(residue_type, position, atom_name):

635 """ 636 Build a new string label by specifying its components. 637 @rtype: str 638 """ 639 return '{0!s}#{1}:{2}'.format(residue_type, position, atom_name)

640 641 @staticmethod

642 - def from_shift(shift):

643 """ 644 Build a new string label from a L{ChemShiftInfo}. 645 @rtype: str 646 """ 647 return Label.build(shift.residue, shift.position, shift.name)

648 649 @staticmethod

650 - def from_atom(atom):

651 """ 652 Build a new string label from an L{Atom}. 653 @rtype: str 654 """ 655 return Label.build(atom.residue.type, atom.residue.rank, atom.name)

656 657 @staticmethod

658 - def match(shift, atom):

659 """ 660 Return True if the labels of a L{ChemShiftInfo} and an L{Atom} match. 661 @rtype: bool 662 """ 663 664 l = Label.from_shift(shift) 665 r = Label.from_atom(atom) 666 667 return r == l

668 669 @staticmethod

670 - def get_atom(chain, label):

671 """ 672 Get the L{Atom} in a L{Chain}, designated by a given string label. 673 @rtype: L{Atom} 674 """ 675 dummy, rank, atom = Label.parse(label) 676 return chain.residues[rank].atoms[atom]

677 678 @staticmethod

679 - def parse(label):

680 """ 681 Parse the components of a string nucleus label. 682 @return: (residue, rank, atom) 683 @rtype: 3-tuple 684 """ 685 parts = label.split("#") 686 residue = parts[0] 687 688 subparts = parts[1].split(":") 689 rank = int(subparts[0]) 690 atom = subparts[1] 691 692 return (residue, rank, atom)

693 694 @staticmethod

695 - def from_string(label):

696 """ 697 Parse the a string nucleus label and create a new L{Label}. 698 @rtype: L{Label} 699 """ 700 residue, rank, atom = Label.parse(label) 701 return Label(residue, rank, atom)

702

703 - def __init__(self, residue, rank, atom_name):

704 705 self._residue = residue 706 self._rank = rank 707 self._atom = atom_name

708 709 @property

710 - def residue(self):

711 """ 712 Residue type (a L{ProteinAlphabet} member) 713 """ 714 return self._residue

715 716 @property

717 - def rank(self):

718 """ 719 Residue rank (1-based) 720 """ 721 return self._rank

722 723 @property

724 - def atom_name(self):

725 """ 726 Nucleus name 727 """ 728 return self._atom

729

730 - def __str__(self):

731 return Label.build(self._residue, self._rank, self._atom)

732

733 734 -class ChemShiftInfo(object):

735 """ 736 Chemical shift struct. 737 738 @param position: residue rank (1-based) 739 @type position: int 740 @param residue: amino acid type (a member of L{ProteinAlphabet}) 741 @type residue: str or L{EnumItem} 742 @param name: nucleus label 743 @type name: str 744 @param element: nucleus type (a member of L{ChemElements}) 745 @type element: str or L{EnumItem} 746 @param shift: chemical shift value 747 @type shift: float 748 """ 749

750 - def __init__(self, position, residue, name, element, shift):

751 752 if not isinstance(residue, pu.EnumItem) or residue.enum is not ProteinAlphabet: 753 residue = pu.Enum.parsename(ProteinAlphabet, str(residue)) 754 755 if not isinstance(element, pu.EnumItem) or element.enum is not ChemElements: 756 element = pu.Enum.parsename(ChemElements, str(element)) 757 758 self.position = int(position) 759 self.residue = residue 760 self.name = str(name) 761 self.element = element 762 self.shift = float(shift)

763

764 - def clone(self, name):

765 """ 766 Clone the current shift and create a new one with the specified 767 nucleus label. 768 769 @rtype: L{ChemShiftInfo} 770 """ 771 ni = self 772 return ChemShiftInfo(ni.position, repr(ni.residue), name, repr(ni.element), ni.shift)

773

774 - def __str__(self):

775 return "{0!s}#{1}:{2}".format(self.residue, self.position, self.name)

776 777 @property

778 - def label(self):

779 """ 780 String label representation 781 @rtype: str 782 """ 783 return str(self)

784

785 -class ChemicalShiftNetwork(object):

786 """ 787 Describes a network of covalently connected, chemical shift visible nuclei. 788 789 @param shifts: chemical shift instances 790 @type shifts: iterable of L{ChemShiftInfo} 791 """ 792

793 - def __init__(self, shifts):

794 795 self._neighbors = {} 796 797 labels = {} 798 799 for cs in shifts: 800 self._neighbors[cs] = set() 801 id = Label.from_shift(cs) 802 labels[id] = cs 803 804 conn = AtomConnectivity.get() 805 806 for cs in shifts: 807 for atom_name in conn.connected_atoms(cs.residue, cs.name): 808 target = Label.build(cs.residue, cs.position, atom_name) 809 if target in labels: 810 self.connect(cs, labels[target])

811

812 - def connect(self, cs1, cs2):

813 """ 814 Connect two nuclei. 815 816 @param cs1: first chemical shift instance 817 @type cs1: L{ChemShiftInfo} 818 @param cs2: second chemical shift instance 819 @type cs2: L{ChemShiftInfo} 820 """ 821 822 try: 823 self._neighbors[cs1].add(cs2) 824 self._neighbors[cs2].add(cs1) 825 except KeyError: 826 raise ValueError("Unknown chemical shift")

827

828 - def connected_shifts(self, source, element=None):

829 """ 830 Return an iterator over all covalently connected neuclei to a given 831 C{source}. 832 833 @param source: source chemical shift 834 @type source: L{ChemShiftInfo} 835 836 @rtype: iterator of L{ChemShiftInfo} 837 """ 838 839 840 if source not in self._neighbors: 841 raise ValueError("No such chemical shift in this network") 842 843 for cs in self._neighbors[source]: 844 if element is None or cs.element == element: 845 yield cs

846

847 - def __iter__(self):

848 return iter(self._neighbors)

849

850 -class ChemShiftScoringModel(object):

851 """ 852 Chemical shift similarity scoring model. See C{ScoringModel.NUCLEI} for 853 a list of supported chemical shift types. 854 """ 855 856 NUCLEI = ('CA', 'CB', 'C', 'N', 'HA') 857 858

859 - def __init__(self):

860 861 self._pos = {} 862 self._neg = {} 863 864 self._pos['CA'] = GeneralizedNormal(0.02, 1.32, 1.1) 865 self._neg['CA'] = GeneralizedNormal(-0.08, 4.23, 2.2) 866 867 self._pos['CB'] = GeneralizedNormal(0.06, 1.32, 1.0) 868 self._neg['CB'] = GeneralizedNormal(0.08, 2.41, 1.2) 869 870 self._pos['C'] = GeneralizedNormal(0.12, 1.52, 1.4) 871 self._neg['C'] = GeneralizedNormal(-0.13, 3.42, 2.1) 872 873 self._pos['N'] = GeneralizedNormal(0.23, 4.39, 1.4) 874 self._neg['N'] = GeneralizedNormal(0.17, 7.08, 1.9) 875 876 self._pos['HA'] = GeneralizedNormal(0.00, 0.27, 1.0) 877 self._neg['HA'] = GeneralizedNormal(-0.01, 0.66, 1.4) 878 879 assert set(self._pos) == set(ChemShiftScoringModel.NUCLEI) 880 assert set(self._neg) == set(ChemShiftScoringModel.NUCLEI)

881

882 - def positive(self, nucleus, deltas):

883 """ 884 Return the probability that a given chemical shift difference 885 indicates structural similarity (true positive match). 886 887 @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI}) 888 @type nucleus: str 889 @param deltas: chemical shift difference(s): q-s 890 @type deltas: float or list of floats 891 892 @return: the raw value of the probability density function 893 @rtype: float or array of floats 894 """ 895 results = self._pos[nucleus].evaluate([deltas]) 896 return results[0]

897

898 - def negative(self, nucleus, deltas):

899 """ 900 Return the probability that a given chemical shift difference 901 indicates no structural similarity (true negative match). 902 903 @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI}) 904 @type nucleus: str 905 @param deltas: chemical shift difference(s): q-s 906 @type deltas: float or list of floats 907 908 @return: the raw value of the probability density function 909 @rtype: float or array of floats 910 """ 911 results = self._neg[nucleus].evaluate([deltas]) 912 return results[0]

913

914 - def score(self, nucleus, deltas):

915 """ 916 Return the bit score for a given chemical shift difference. 917 918 @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI}) 919 @type nucleus: str 920 @param deltas: chemical shift difference(s): q-s 921 @type deltas: float or list of floats 922 923 @return: bit score 924 @rtype: float or array of floats 925 """ 926 pos = self.positive(nucleus, deltas) 927 neg = self.negative(nucleus, deltas) 928 929 return numpy.log2(pos / neg)

930

931 932 -class NOEPeak(object):

933 """ 934 Describes a single NOE peak. 935 936 @param intensity: peak intensity 937 @type intensity: float 938 @param dimensions: list of dimension values 939 @type dimensions: iterable of float 940 @param spectrum: owning NOE spectrum 941 @type spectrum: L{NOESpectrum} 942 """ 943

944 - def __init__(self, intensity, dimensions, spectrum):

945 946 self._dimensions = list(dimensions) 947 self._intensity = float(intensity) 948 self._spectrum = spectrum

949 950 @property

951 - def intensity(self):

952 """ 953 Peak intensity 954 @rtype: float 955 """ 956 return self._intensity

957 958 @property

959 - def num_dimensions(self):

960 """ 961 Number of dimensions 962 @rtype: int 963 """ 964 return len(self._dimensions)

965

966 - def has_element(self, e):

967 """ 968 Return True if the owning spectrum contains a dimension of the specified type 969 970 @param e: element (dimension) type (see L{ChemElements}) 971 @type e: L{EnumItem} 972 973 @rtype: bool 974 """ 975 return self._spectrum.has_element(e)

976

977 - def __getitem__(self, column):

978 return self.get(column)

979

980 - def __iter__(self):

981 return iter(self._dimensions)

982

983 - def __str__(self):

984 return '<NOEPeak: {0}, I={1}>'.format(self._dimensions, self._intensity)

985

986 - def element(self, i):

987 """ 988 Return the dimension (nucleus) type at dimension index i 989 990 @param i: dimension index (0-based) 991 @type i: int 992 993 @return: nucleus type 994 @rtype: L{EnumItem} 995 """ 996 return self._spectrum.element(i)

997

998 - def get(self, column):

999 """ 1000 Get the value of the specified dimension. 1001 1002 @param column: dimension index (0-based) 1003 @type column: int 1004 1005 @return: dimension value 1006 @rtype: float 1007 """ 1008 if 0 <= column < len(self._dimensions): 1009 return self._dimensions[column] 1010 else: 1011 raise IndexError("Dimension index out of range")

1012

1013 - def has_connected_dimensions(self, i):

1014 """ 1015 Return True of dimension index C{i} has covalently connected dimensions. 1016 1017 @param i: dimension index (0-based) 1018 @type i: int 1019 1020 @rtype: bool 1021 """ 1022 return self._spectrum.has_connected_dimensions(i)

1023

1024 - def connected_dimensions(self, i):

1025 """ 1026 Return a list of all dimension indices, covalently connected to 1027 dimension C{i}. 1028 1029 @param i: dimension index (0-based) 1030 @type i: int 1031 1032 @rtype: iterable of L{EnumItem} 1033 """ 1034 return self._spectrum.connected_dimensions(i)

1035

1036 1037 -class NOESpectrum(object):

1038 """ 1039 Describes an NOE spectrum. 1040 1041 @param elements: list of dimension (nucleus) types for each dimension 1042 @type elements: iterable of L{EnumItem} (L{ChemElements}) or str 1043 """

1044 - def __init__(self, elements):

1045 1046 self._elements = [] 1047 self._elemset = set() 1048 self._connected = {} 1049 self._protondim = set() 1050 self._peaks = [] 1051 self._min = float("inf") 1052 self._max = float("-inf") 1053 1054 for i, n in enumerate(elements): 1055 1056 if not isinstance(n, pu.EnumItem) or n.enum is not ChemElements: 1057 element = pu.Enum.parsename(ChemElements, n) 1058 else: 1059 element = n 1060 self._elements.append(element) 1061 1062 if element == ChemElements.H: 1063 self._protondim.add(i) 1064 1065 self._elemset = set(self._elements)

1066 1067 @staticmethod

1068 - def join(spectrum, *spectra):

1069 """ 1070 Merge multiple L{NOESpectrum} instances. All C{spectra} must have matching 1071 dimensions according to the master C{spectrum}. 1072 1073 @return: merged spectrum 1074 @rtype: L{NOESpectrum} 1075 """ 1076 elements = tuple(spectrum.dimensions) 1077 joint = NOESpectrum(map(repr, elements)) 1078 1079 for i, dummy in enumerate(elements): 1080 for j in spectrum.connected_dimensions(i): 1081 joint.connect(i, j) 1082 1083 for s in [spectrum] + list(spectra): 1084 if tuple(s.dimensions) != elements: 1085 raise ValueError("Incompatible spectrum: {0}".format(s)) 1086 for p in s: 1087 joint.add(p.intensity, list(p)) 1088 1089 return joint

1090 1091

1092 - def __iter__(self):

1093 return iter(self._peaks)

1094

1095 - def __len__(self):

1096 return len(self._peaks)

1097

1098 - def __str__(self):

1099 return '<NOESpectrum: {0}>'.format(self._elements)

1100

1101 - def __getitem__(self, i):

1102 try: 1103 return self._peaks[i] 1104 except IndexError: 1105 raise IndexError("Peak index out of range")

1106 1107 @property

1108 - def min_intensity(self):

1109 """ 1110 Minimum intensity 1111 @rtype: float 1112 """ 1113 return self._min

1114 1115 @property

1116 - def max_intensity(self):

1117 """ 1118 Maximum intensity 1119 @rtype: float 1120 """ 1121 return self._max

1122 1123 @property

1124 - def dimensions(self):

1125 """ 1126 Tuple of all dimensions (nucleus types) 1127 @rtype: tuple of L{EnumItem} 1128 """ 1129 return tuple(self._elements)

1130 1131 @property

1132 - def proton_dimensions(self):

1133 """ 1134 Tuple of all proton dimension indices 1135 @rtype: tuple of int 1136 """ 1137 return tuple(self._protondim)

1138 1139 @property

1140 - def num_dimensions(self):

1141 """ 1142 Number of dimensions 1143 @rtype: int 1144 """ 1145 return len(self._elements)

1146 1147 @property

1148 - def num_proton_dimensions(self):

1149 """ 1150 Number of proton dimensions 1151 @rtype: int 1152 """ 1153 return len(self._protondim)

1154

1155 - def has_element(self, e):

1156 """ 1157 Return True if the spectrum contains a dimension of the specified type 1158 1159 @param e: element (dimension) type (see L{ChemElements}) 1160 @type e: L{EnumItem} 1161 1162 @rtype: bool 1163 """ 1164 return e in self._elemset

1165

1166 - def connect(self, i1, i2):

1167 """ 1168 Mark dimensions with indices C{i1} and C{i2} as covalently connected. 1169 1170 @param i1: dimension index 1 (0-based) 1171 @type i1: int 1172 @param i2: dimension index 2 (0-based) 1173 @type i2: int 1174 """ 1175 1176 for i in [i1, i2]: 1177 if not 0 <= i < self.num_dimensions: 1178 raise IndexError("Dimension index out of range") 1179 1180 if i1 == i2: 1181 raise ValueError("Can't connect a dimension to itself") 1182 if not self._can_connect(i1, i2): 1183 raise ValueError("Only proton-nonproton bonds are allowed") 1184 1185 self._connected.setdefault(i1, set()).add(i2) 1186 self._connected.setdefault(i2, set()).add(i1)

1187

1188 - def _can_connect(self, i1, i2):

1189 1190 pair = set() 1191 1192 for i in [i1, i2]: 1193 is_proton = self.element(i) == ChemElements.H 1194 pair.add(is_proton) 1195 1196 if True in pair and False in pair: 1197 return True 1198 1199 return False

1200

1201 - def has_connected_dimensions(self, i):

1202 """ 1203 Return True of dimension index C{i} has covalently connected dimensions. 1204 1205 @param i: dimension index (0-based) 1206 @type i: int 1207 1208 @rtype: bool 1209 """ 1210 if i in self._connected: 1211 return len(self._connected[i]) > 0 1212 1213 return False

1214

1215 - def connected_dimensions(self, i):

1216 """ 1217 Return a list of all dimension indices, covalently connected to 1218 dimension C{i}. 1219 1220 @param i: dimension index (0-based) 1221 @type i: int 1222 1223 @rtype: iterable of int 1224 """ 1225 if i in self._connected: 1226 return tuple(self._connected[i]) 1227 1228 return tuple()

1229

1230 - def add(self, intensity, dimensions):

1231 """ 1232 Add a new NOE peak. 1233 1234 @param intensity: peak intensity 1235 @type intensity: float 1236 @param dimensions: list of dimension values 1237 @param dimensions: iterable of float 1238 """ 1239 1240 dimensions = list(dimensions) 1241 if len(dimensions) != self.num_dimensions: 1242 raise ValueError("Invalid number of dimensions") 1243 1244 peak = NOEPeak(intensity, dimensions, self) 1245 self._peaks.append(peak) 1246 1247 if peak.intensity < self._min: 1248 self._min = peak.intensity 1249 if peak.intensity > self._max: 1250 self._max = peak.intensity

1251

1252 - def element(self, i):

1253 """ 1254 Return the chemical element (nucleus) type at dimension index C{i}. 1255 @rtype: L{EnumItem} 1256 """ 1257 return self._elements[i]

1258

Source Code for Package csb.bio.nmr