1  """ 
   2  NMR related objects. 
   3  """ 
   4   
   5  import os 
   6  import numpy.linalg 
   7  import xml.dom.minidom 
   8   
   9  import csb.io.tsv 
  10  import csb.core as pu 
  11   
  12  from csb.statistics.pdf import GeneralizedNormal 
  13  from csb.bio.sequence import ProteinAlphabet 
  14  from csb.bio.structure import ChemElements 
  19   
  22   
  25      """ 
  26      Utility class containing all necessary data and methods for computing 
  27      secondary chemical shifts. 
  28       
  29      @note: You are supposed to obtain an instance of this object only via 
  30             the dedicated factory (see L{RandomCoil.get}). The factory 
  31             ensures a "singleton with lazy instantiation" behavior. This is 
  32             needed since this object loads static data from the file system. 
  33      """ 
  34       
  35      RESOURCES = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources') 
  36       
  37      _instance = None     
  38   
  39       
  40      @staticmethod 
  50           
  52           
  53          if RandomCoil._instance is not None: 
  54              raise NotImplementedError("Can't instantiate a singleton") 
  55   
  56          RandomCoil._instance = self 
  57   
  58          self._offsets = (-2, -1, 1, 2) 
  59          self._reference = {} 
  60          self._corrections = {} 
  61           
  62          self._initialize() 
   63       
  70   
  71 -    def _load(self, ref, cor): 
   98       
 133   
 135          """ 
 136          Compute a secondary shift given a raw shift C{value} for a specific 
 137          residue and its neighboring residues. 
 138           
 139          @param chain: the protein chain containing the C{nucleus} 
 140          @type chain: L{Chain} 
 141          @param residue: the residue containing the C{nucleus}. This can be 
 142                          a residue object, id (sequence number + insertion 
 143                          code, string) or rank (integer, 1-based) 
 144          @type residue: L{Residue}, str or int 
 145          @param nucleus: atom name (PDB format) 
 146          @type nucleus: str 
 147          @param value: raw chemical shift value 
 148          @type value: float             
 149          """ 
 150          try: 
 151              if isinstance(residue, int): 
 152                  residue = chain.residues[residue] 
 153              elif isinstance(residue, pu.string): 
 154                  residue = chain.find(residue) 
 155              else: 
 156                  residue = chain.residues[residue.rank] 
 157          except (pu.ItemNotFoundError, pu.CollectionIndexError): 
 158              raise InvalidResidueError("Can't find residue {0} in {1}".format(residue, chain)) 
 159               
 160          shift = self.simple_secondary_shift(residue.type, nucleus, value) 
 161           
 162          for offset in self._offsets: 
 163               
 164              if 1 <= (residue.rank + offset) <= chain.length: 
 165                  try: 
 166                      neighbor = chain.residues[residue.rank + offset] 
 167                      shift -= self._corrections[neighbor.type][nucleus][offset * -1] 
 168                                        
 169                  except KeyError: 
 170                      continue      
 171           
 172          return shift 
   173   
 176       
 177      RESOURCES = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources') 
 178       
 179      _instance = None 
 180       
 181      @staticmethod 
 191       
 193           
 194          self._table = {} 
 195          self._initialize() 
  196           
 198           
 199          resource = os.path.join(AtomConnectivity.RESOURCES, 'AtomConnectivity.xml') 
 200          root = xml.dom.minidom.parse(resource) 
 201           
 202          for r in root.documentElement.getElementsByTagName('residue'): 
 203              residue = pu.Enum.parsename(ProteinAlphabet, r.getAttribute('type')) 
 204              self._table[residue] = {} 
 205               
 206              for a in r.getElementsByTagName('atom'): 
 207                  atom = a.getAttribute('name') 
 208                  self._table[residue][atom] = set() 
 209               
 210              for b in r.getElementsByTagName('bond'): 
 211                  atom1 = b.getAttribute('atom1') 
 212                  atom2 = b.getAttribute('atom2') 
 213                  self._table[residue][atom1].add(atom2) 
 214                  self._table[residue][atom2].add(atom1) 
  215                   
 217          """ 
 218          Return True if C{atom1} is covalently connected to C{atom2} in C{residue} 
 219           
 220          @param residue: residue type (a member of L{ProteinAlphabet}) 
 221          @type residue: L{EnumItem}  
 222          @param atom1: first atom name (IUPAC) 
 223          @type atom1: str 
 224          @param atom2: second atom name (IUPAC) 
 225          @type atom2: str 
 226           
 227          @rtype: boolean 
 228          """ 
 229          if residue in self._table: 
 230              r = self._table[residue] 
 231              if atom1 in r: 
 232                  return atom2 in r[atom1] 
 233           
 234          return False 
  235       
 237          """ 
 238          Return all atoms covalently connected to C{atom} in C{residue}. 
 239   
 240          @param residue: residue type (a member of L{ProteinAlphabet}) 
 241          @type residue: L{EnumItem}          
 242          @param atom: source atom name (IUPAC) 
 243          @type atom: str 
 244           
 245          @rtype: tuple of str 
 246          """ 
 247          if residue in self._table: 
 248              r = self._table[residue] 
 249              if atom in r: 
 250                  return tuple(r[atom]) 
 251           
 252          return tuple() 
  253       
 255          """ 
 256          Return True if C{atom} name is contained in C{residue}. 
 257           
 258          @param residue: residue type (a member of L{ProteinAlphabet}) 
 259          @type residue: L{EnumItem}   
 260          @param atom: atom name (IUPAC) 
 261          @type atom: str 
 262           
 263          @rtype: bool         
 264          """ 
 265          if residue in self._table: 
 266              return atom in self._table[residue] 
 267           
 268          return False         
  269       
 271          """ 
 272          Get all atoms contained in C{residue}. 
 273   
 274          @param residue: residue type (a member of L{ProteinAlphabet}) 
 275          @type residue: L{EnumItem}          
 276          @param prefix: atom name prefix wildcard (IUPAC) 
 277          @type prefix: str 
 278           
 279          @return: set of atom names 
 280          @rtype: frozenset of str 
 281          """ 
 282          t = self._table[residue] 
 283          if residue in self._table: 
 284              return frozenset(a for a in t if a.startswith(prefix)) 
 285           
 286          return frozenset() 
   287       
 290      """ 
 291      Pre-built atom filters for L{ContactMap}s.  
 292      """ 
 293   
 294      @staticmethod 
 297           
 298      @staticmethod 
 301       
 302      @staticmethod 
 305       
 306      @staticmethod 
 308          return a.name == 'CA' 
   309   
 612           
 619           
 622      """ 
 623      Utility class for working with chemical shift labels. 
 624       
 625      @param residue: residue type 
 626      @type residue: L{EnumItem} 
 627      @param rank: residue position (1-based) 
 628      @type rank: int 
 629      @param atom_name: nucleus name 
 630      @type atom_name: str  
 631      """ 
 632       
 633      @staticmethod 
 634 -    def build(residue_type, position, atom_name): 
  635          """ 
 636          Build a new string label by specifying its components. 
 637          @rtype: str         
 638          """ 
 639          return '{0!s}#{1}:{2}'.format(residue_type, position, atom_name) 
  640   
 641      @staticmethod     
 643          """ 
 644          Build a new string label from a L{ChemShiftInfo}. 
 645          @rtype: str         
 646          """ 
 647          return Label.build(shift.residue, shift.position, shift.name) 
  648   
 649      @staticmethod     
 656       
 657      @staticmethod 
 659          """ 
 660          Return True if the labels of a L{ChemShiftInfo} and an L{Atom} match. 
 661          @rtype: bool         
 662          """           
 663           
 664          l = Label.from_shift(shift) 
 665          r = Label.from_atom(atom) 
 666           
 667          return r == l 
  668       
 669      @staticmethod 
 677   
 678      @staticmethod     
 680          """ 
 681          Parse the components of a string nucleus label. 
 682          @return: (residue, rank, atom) 
 683          @rtype: 3-tuple 
 684          """         
 685          parts = label.split("#") 
 686          residue = parts[0] 
 687           
 688          subparts = parts[1].split(":") 
 689          rank = int(subparts[0]) 
 690          atom = subparts[1] 
 691           
 692          return (residue, rank, atom) 
  693       
 694      @staticmethod 
 702       
 703 -    def __init__(self, residue, rank, atom_name): 
  708           
 709      @property 
 711          """ 
 712          Residue type (a L{ProteinAlphabet} member) 
 713          """ 
 714          return self._residue 
  715       
 716      @property 
 718          """ 
 719          Residue rank (1-based) 
 720          """ 
 721          return self._rank 
  722       
 723      @property 
 725          """ 
 726          Nucleus name 
 727          """         
 728          return self._atom 
  729       
 731          return Label.build(self._residue, self._rank, self._atom) 
   732       
 735      """ 
 736      Chemical shift struct. 
 737       
 738      @param position: residue rank (1-based) 
 739      @type position: int 
 740      @param residue: amino acid type (a member of L{ProteinAlphabet}) 
 741      @type residue: str or L{EnumItem} 
 742      @param name: nucleus label 
 743      @type name: str 
 744      @param element: nucleus type (a member of L{ChemElements}) 
 745      @type element: str or L{EnumItem} 
 746      @param shift: chemical shift value 
 747      @type shift: float 
 748      """ 
 749       
 750 -    def __init__(self, position, residue, name, element, shift): 
  763           
 765          """ 
 766          Clone the current shift and create a new one with the specified 
 767          nucleus label. 
 768           
 769          @rtype: L{ChemShiftInfo} 
 770          """ 
 771          ni = self 
 772          return ChemShiftInfo(ni.position, repr(ni.residue), name, repr(ni.element), ni.shift) 
  773           
 776       
 777      @property 
 779          """ 
 780          String label representation 
 781          @rtype: str 
 782          """ 
 783          return str(self) 
   784   
 786      """ 
 787      Describes a network of covalently connected, chemical shift visible nuclei. 
 788       
 789      @param shifts: chemical shift instances 
 790      @type shifts: iterable of L{ChemShiftInfo} 
 791      """ 
 792       
 811       
 813          """ 
 814          Connect two nuclei. 
 815           
 816          @param cs1: first chemical shift instance 
 817          @type cs1: L{ChemShiftInfo} 
 818          @param cs2: second chemical shift instance          
 819          @type cs2: L{ChemShiftInfo} 
 820          """ 
 821           
 822          try: 
 823              self._neighbors[cs1].add(cs2) 
 824              self._neighbors[cs2].add(cs1) 
 825          except KeyError: 
 826              raise ValueError("Unknown chemical shift") 
  827           
 829          """ 
 830          Return an iterator over all covalently connected neuclei to a given 
 831          C{source}. 
 832           
 833          @param source: source chemical shift 
 834          @type source: L{ChemShiftInfo} 
 835           
 836          @rtype: iterator of L{ChemShiftInfo} 
 837          """ 
 838           
 839           
 840          if source not in self._neighbors: 
 841              raise ValueError("No such chemical shift in this network") 
 842   
 843          for cs in self._neighbors[source]: 
 844              if element is None or cs.element == element: 
 845                  yield cs 
  846                   
 848          return iter(self._neighbors) 
   849       
 851      """ 
 852      Chemical shift similarity scoring model. See C{ScoringModel.NUCLEI} for 
 853      a list of supported chemical shift types.  
 854      """ 
 855   
 856      NUCLEI = ('CA', 'CB', 'C', 'N', 'HA') 
 857   
 858       
 860           
 861          self._pos = {} 
 862          self._neg = {} 
 863           
 864          self._pos['CA'] = GeneralizedNormal(0.02, 1.32, 1.1) 
 865          self._neg['CA'] = GeneralizedNormal(-0.08, 4.23, 2.2) 
 866                   
 867          self._pos['CB'] = GeneralizedNormal(0.06, 1.32, 1.0) 
 868          self._neg['CB'] = GeneralizedNormal(0.08, 2.41, 1.2) 
 869                   
 870          self._pos['C']  = GeneralizedNormal(0.12, 1.52, 1.4) 
 871          self._neg['C']  = GeneralizedNormal(-0.13, 3.42, 2.1) 
 872           
 873          self._pos['N']  = GeneralizedNormal(0.23, 4.39, 1.4) 
 874          self._neg['N']  = GeneralizedNormal(0.17, 7.08, 1.9) 
 875                   
 876          self._pos['HA'] = GeneralizedNormal(0.00, 0.27, 1.0) 
 877          self._neg['HA'] = GeneralizedNormal(-0.01, 0.66, 1.4) 
 878           
 879          assert set(self._pos) == set(ChemShiftScoringModel.NUCLEI) 
 880          assert set(self._neg) == set(ChemShiftScoringModel.NUCLEI)  
  881   
 883          """ 
 884          Return the probability that a given chemical shift difference 
 885          indicates structural similarity (true positive match). 
 886           
 887          @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI}) 
 888          @type nucleus:  str 
 889          @param deltas: chemical shift difference(s): q-s 
 890          @type deltas:  float or list of floats 
 891           
 892          @return: the raw value of the probability density function 
 893          @rtype: float or array of floats 
 894          """ 
 895          results = self._pos[nucleus].evaluate([deltas])  
 896          return results[0] 
  897   
 899          """ 
 900          Return the probability that a given chemical shift difference 
 901          indicates no structural similarity (true negative match). 
 902           
 903          @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI}) 
 904          @type nucleus:  str 
 905          @param deltas: chemical shift difference(s): q-s 
 906          @type deltas:  float or list of floats 
 907           
 908          @return: the raw value of the probability density function 
 909          @rtype: float or array of floats 
 910          """         
 911          results = self._neg[nucleus].evaluate([deltas])  
 912          return results[0] 
  913               
 914 -    def score(self, nucleus, deltas): 
  915          """ 
 916          Return the bit score for a given chemical shift difference. 
 917           
 918          @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI}) 
 919          @type nucleus:  str 
 920          @param deltas: chemical shift difference(s): q-s 
 921          @type deltas:  float or list of floats 
 922           
 923          @return: bit score 
 924          @rtype: float or array of floats 
 925          """ 
 926          pos = self.positive(nucleus, deltas) 
 927          neg = self.negative(nucleus, deltas) 
 928           
 929          return numpy.log2(pos / neg) 
   930   
 933      """ 
 934      Describes a single NOE peak. 
 935       
 936      @param intensity: peak intensity 
 937      @type intensity: float 
 938      @param dimensions: list of dimension values 
 939      @type dimensions: iterable of float 
 940      @param spectrum: owning NOE spectrum 
 941      @type spectrum: L{NOESpectrum} 
 942      """ 
 943       
 944 -    def __init__(self, intensity, dimensions, spectrum): 
  945           
 946          self._dimensions = list(dimensions) 
 947          self._intensity = float(intensity) 
 948          self._spectrum = spectrum 
  949           
 950      @property 
 952          """ 
 953          Peak intensity 
 954          @rtype: float 
 955          """ 
 956          return self._intensity 
  957       
 958      @property 
 960          """ 
 961          Number of dimensions 
 962          @rtype: int 
 963          """         
 964          return len(self._dimensions) 
  965       
 967          """ 
 968          Return True if the owning spectrum contains a dimension of the specified type 
 969           
 970          @param e: element (dimension) type (see L{ChemElements}) 
 971          @type e: L{EnumItem} 
 972           
 973          @rtype: bool 
 974          """         
 975          return self._spectrum.has_element(e)  
  976       
 979       
 981          return iter(self._dimensions) 
  982       
 984          return '<NOEPeak: {0}, I={1}>'.format(self._dimensions, self._intensity) 
  985       
 987          """ 
 988          Return the dimension (nucleus) type at dimension index i 
 989           
 990          @param i: dimension index (0-based) 
 991          @type i: int 
 992           
 993          @return: nucleus type 
 994          @rtype: L{EnumItem} 
 995          """ 
 996          return self._spectrum.element(i) 
  997       
 998 -    def get(self, column): 
  999          """ 
1000          Get the value of the specified dimension.  
1001           
1002          @param column: dimension index (0-based) 
1003          @type column: int 
1004           
1005          @return: dimension value         
1006          @rtype: float 
1007          """ 
1008          if 0 <= column < len(self._dimensions):   
1009              return self._dimensions[column] 
1010          else: 
1011              raise IndexError("Dimension index out of range") 
 1012           
1014          """ 
1015          Return True of dimension index C{i} has covalently connected dimensions. 
1016           
1017          @param i: dimension index (0-based) 
1018          @type i: int 
1019           
1020          @rtype: bool 
1021          """ 
1022          return self._spectrum.has_connected_dimensions(i) 
 1023           
1025          """ 
1026          Return a list of all dimension indices, covalently connected to 
1027          dimension C{i}. 
1028   
1029          @param i: dimension index (0-based) 
1030          @type i: int 
1031           
1032          @rtype: iterable of L{EnumItem}          
1033          """ 
1034          return self._spectrum.connected_dimensions(i) 
  1035   
1038      """ 
1039      Describes an NOE spectrum. 
1040       
1041      @param elements: list of dimension (nucleus) types for each dimension 
1042      @type elements: iterable of L{EnumItem} (L{ChemElements}) or str  
1043      """ 
1066           
1067      @staticmethod 
1068 -    def join(spectrum, *spectra): 
 1069          """ 
1070          Merge multiple L{NOESpectrum} instances. All C{spectra} must have matching 
1071          dimensions according to the master C{spectrum}. 
1072           
1073          @return: merged spectrum 
1074          @rtype: L{NOESpectrum} 
1075          """ 
1076          elements = tuple(spectrum.dimensions) 
1077          joint = NOESpectrum(map(repr, elements)) 
1078           
1079          for i, dummy in enumerate(elements): 
1080              for j in spectrum.connected_dimensions(i): 
1081                  joint.connect(i, j) 
1082           
1083          for s in [spectrum] + list(spectra): 
1084              if tuple(s.dimensions) != elements: 
1085                  raise ValueError("Incompatible spectrum: {0}".format(s)) 
1086              for p in s:  
1087                  joint.add(p.intensity, list(p)) 
1088                   
1089          return joint  
 1090       
1091           
1093          return iter(self._peaks) 
 1094       
1096          return len(self._peaks) 
 1097       
1099          return '<NOESpectrum: {0}>'.format(self._elements) 
 1100       
1102          try: 
1103              return self._peaks[i] 
1104          except IndexError: 
1105              raise IndexError("Peak index out of range") 
 1106       
1107      @property 
1109          """ 
1110          Minimum intensity 
1111          @rtype: float 
1112          """ 
1113          return self._min 
 1114   
1115      @property 
1117          """ 
1118          Maximum intensity 
1119          @rtype: float 
1120          """ 
1121          return self._max 
 1122               
1123      @property 
1125          """ 
1126          Tuple of all dimensions (nucleus types) 
1127          @rtype: tuple of L{EnumItem} 
1128          """ 
1129          return tuple(self._elements) 
 1130       
1131      @property 
1133          """ 
1134          Tuple of all proton dimension indices 
1135          @rtype: tuple of int 
1136          """ 
1137          return tuple(self._protondim)     
 1138   
1139      @property     
1141          """ 
1142          Number of dimensions 
1143          @rtype: int 
1144          """         
1145          return len(self._elements) 
 1146       
1147      @property     
1149          """ 
1150          Number of proton dimensions 
1151          @rtype: int 
1152          """              
1153          return len(self._protondim)     
 1154       
1156          """ 
1157          Return True if the spectrum contains a dimension of the specified type 
1158           
1159          @param e: element (dimension) type (see L{ChemElements}) 
1160          @type e: L{EnumItem} 
1161           
1162          @rtype: bool 
1163          """           
1164          return e in self._elemset 
 1165       
1167          """ 
1168          Mark dimensions with indices C{i1} and C{i2} as covalently connected. 
1169           
1170          @param i1: dimension index 1 (0-based) 
1171          @type i1: int 
1172          @param i2: dimension index 2 (0-based) 
1173          @type i2: int          
1174          """ 
1175   
1176          for i in [i1, i2]: 
1177              if not 0 <= i < self.num_dimensions: 
1178                  raise IndexError("Dimension index out of range") 
1179               
1180          if i1 == i2: 
1181              raise ValueError("Can't connect a dimension to itself") 
1182          if not self._can_connect(i1, i2): 
1183              raise ValueError("Only proton-nonproton bonds are allowed")         
1184               
1185          self._connected.setdefault(i1, set()).add(i2) 
1186          self._connected.setdefault(i2, set()).add(i1) 
 1187           
1189           
1190          pair = set() 
1191   
1192          for i in [i1, i2]: 
1193              is_proton = self.element(i) == ChemElements.H 
1194              pair.add(is_proton) 
1195               
1196          if True in pair and False in pair: 
1197              return True 
1198           
1199          return False         
 1200           
1202          """ 
1203          Return True of dimension index C{i} has covalently connected dimensions. 
1204           
1205          @param i: dimension index (0-based) 
1206          @type i: int 
1207           
1208          @rtype: bool 
1209          """ 
1210          if i in self._connected: 
1211              return len(self._connected[i]) > 0 
1212           
1213          return False 
 1214           
1216          """ 
1217          Return a list of all dimension indices, covalently connected to 
1218          dimension C{i}. 
1219   
1220          @param i: dimension index (0-based) 
1221          @type i: int 
1222           
1223          @rtype: iterable of int         
1224          """         
1225          if i in self._connected: 
1226              return tuple(self._connected[i]) 
1227           
1228          return tuple()     
 1229           
1230 -    def add(self, intensity, dimensions): 
 1231          """ 
1232          Add a new NOE peak. 
1233           
1234          @param intensity: peak intensity 
1235          @type intensity: float 
1236          @param dimensions: list of dimension values 
1237          @param dimensions: iterable of float 
1238          """ 
1239           
1240          dimensions = list(dimensions)        
1241          if len(dimensions) != self.num_dimensions: 
1242              raise ValueError("Invalid number of dimensions") 
1243           
1244          peak = NOEPeak(intensity, dimensions, self) 
1245          self._peaks.append(peak) 
1246           
1247          if peak.intensity < self._min: 
1248              self._min = peak.intensity             
1249          if peak.intensity > self._max: 
1250              self._max = peak.intensity             
 1251           
1253          """ 
1254          Return the chemical element (nucleus) type at dimension index C{i}. 
1255          @rtype: L{EnumItem} 
1256          """ 
1257          return self._elements[i] 
  1258