1 """
2 NMR related objects.
3 """
4
5 import os
6 import numpy.linalg
7 import xml.dom.minidom
8
9 import csb.io.tsv
10 import csb.core as pu
11
12 from csb.statistics.pdf import GeneralizedNormal
13 from csb.bio.sequence import ProteinAlphabet
14 from csb.bio.structure import ChemElements
19
22
25 """
26 Utility class containing all necessary data and methods for computing
27 secondary chemical shifts.
28
29 @note: You are supposed to obtain an instance of this object only via
30 the dedicated factory (see L{RandomCoil.get}). The factory
31 ensures a "singleton with lazy instantiation" behavior. This is
32 needed since this object loads static data from the file system.
33 """
34
35 RESOURCES = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources')
36
37 _instance = None
38
39
40 @staticmethod
50
52
53 if RandomCoil._instance is not None:
54 raise NotImplementedError("Can't instantiate a singleton")
55
56 RandomCoil._instance = self
57
58 self._offsets = (-2, -1, 1, 2)
59 self._reference = {}
60 self._corrections = {}
61
62 self._initialize()
63
70
71 - def _load(self, ref, cor):
98
133
135 """
136 Compute a secondary shift given a raw shift C{value} for a specific
137 residue and its neighboring residues.
138
139 @param chain: the protein chain containing the C{nucleus}
140 @type chain: L{Chain}
141 @param residue: the residue containing the C{nucleus}. This can be
142 a residue object, id (sequence number + insertion
143 code, string) or rank (integer, 1-based)
144 @type residue: L{Residue}, str or int
145 @param nucleus: atom name (PDB format)
146 @type nucleus: str
147 @param value: raw chemical shift value
148 @type value: float
149 """
150 try:
151 if isinstance(residue, int):
152 residue = chain.residues[residue]
153 elif isinstance(residue, pu.string):
154 residue = chain.find(residue)
155 else:
156 residue = chain.residues[residue.rank]
157 except (pu.ItemNotFoundError, pu.CollectionIndexError):
158 raise InvalidResidueError("Can't find residue {0} in {1}".format(residue, chain))
159
160 shift = self.simple_secondary_shift(residue.type, nucleus, value)
161
162 for offset in self._offsets:
163
164 if 1 <= (residue.rank + offset) <= chain.length:
165 try:
166 neighbor = chain.residues[residue.rank + offset]
167 shift -= self._corrections[neighbor.type][nucleus][offset * -1]
168
169 except KeyError:
170 continue
171
172 return shift
173
176
177 RESOURCES = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources')
178
179 _instance = None
180
181 @staticmethod
191
193
194 self._table = {}
195 self._initialize()
196
198
199 resource = os.path.join(AtomConnectivity.RESOURCES, 'AtomConnectivity.xml')
200 root = xml.dom.minidom.parse(resource)
201
202 for r in root.documentElement.getElementsByTagName('residue'):
203 residue = pu.Enum.parsename(ProteinAlphabet, r.getAttribute('type'))
204 self._table[residue] = {}
205
206 for a in r.getElementsByTagName('atom'):
207 atom = a.getAttribute('name')
208 self._table[residue][atom] = set()
209
210 for b in r.getElementsByTagName('bond'):
211 atom1 = b.getAttribute('atom1')
212 atom2 = b.getAttribute('atom2')
213 self._table[residue][atom1].add(atom2)
214 self._table[residue][atom2].add(atom1)
215
217 """
218 Return True if C{atom1} is covalently connected to C{atom2} in C{residue}
219
220 @param residue: residue type (a member of L{ProteinAlphabet})
221 @type residue: L{EnumItem}
222 @param atom1: first atom name (IUPAC)
223 @type atom1: str
224 @param atom2: second atom name (IUPAC)
225 @type atom2: str
226
227 @rtype: boolean
228 """
229 if residue in self._table:
230 r = self._table[residue]
231 if atom1 in r:
232 return atom2 in r[atom1]
233
234 return False
235
237 """
238 Return all atoms covalently connected to C{atom} in C{residue}.
239
240 @param residue: residue type (a member of L{ProteinAlphabet})
241 @type residue: L{EnumItem}
242 @param atom: source atom name (IUPAC)
243 @type atom: str
244
245 @rtype: tuple of str
246 """
247 if residue in self._table:
248 r = self._table[residue]
249 if atom in r:
250 return tuple(r[atom])
251
252 return tuple()
253
255 """
256 Return True if C{atom} name is contained in C{residue}.
257
258 @param residue: residue type (a member of L{ProteinAlphabet})
259 @type residue: L{EnumItem}
260 @param atom: atom name (IUPAC)
261 @type atom: str
262
263 @rtype: bool
264 """
265 if residue in self._table:
266 return atom in self._table[residue]
267
268 return False
269
271 """
272 Get all atoms contained in C{residue}.
273
274 @param residue: residue type (a member of L{ProteinAlphabet})
275 @type residue: L{EnumItem}
276 @param prefix: atom name prefix wildcard (IUPAC)
277 @type prefix: str
278
279 @return: set of atom names
280 @rtype: frozenset of str
281 """
282 t = self._table[residue]
283 if residue in self._table:
284 return frozenset(a for a in t if a.startswith(prefix))
285
286 return frozenset()
287
290 """
291 Pre-built atom filters for L{ContactMap}s.
292 """
293
294 @staticmethod
297
298 @staticmethod
301
302 @staticmethod
305
306 @staticmethod
308 return a.name == 'CA'
309
612
619
622 """
623 Utility class for working with chemical shift labels.
624
625 @param residue: residue type
626 @type residue: L{EnumItem}
627 @param rank: residue position (1-based)
628 @type rank: int
629 @param atom_name: nucleus name
630 @type atom_name: str
631 """
632
633 @staticmethod
634 - def build(residue_type, position, atom_name):
635 """
636 Build a new string label by specifying its components.
637 @rtype: str
638 """
639 return '{0!s}#{1}:{2}'.format(residue_type, position, atom_name)
640
641 @staticmethod
643 """
644 Build a new string label from a L{ChemShiftInfo}.
645 @rtype: str
646 """
647 return Label.build(shift.residue, shift.position, shift.name)
648
649 @staticmethod
656
657 @staticmethod
659 """
660 Return True if the labels of a L{ChemShiftInfo} and an L{Atom} match.
661 @rtype: bool
662 """
663
664 l = Label.from_shift(shift)
665 r = Label.from_atom(atom)
666
667 return r == l
668
669 @staticmethod
677
678 @staticmethod
680 """
681 Parse the components of a string nucleus label.
682 @return: (residue, rank, atom)
683 @rtype: 3-tuple
684 """
685 parts = label.split("#")
686 residue = parts[0]
687
688 subparts = parts[1].split(":")
689 rank = int(subparts[0])
690 atom = subparts[1]
691
692 return (residue, rank, atom)
693
694 @staticmethod
702
703 - def __init__(self, residue, rank, atom_name):
708
709 @property
711 """
712 Residue type (a L{ProteinAlphabet} member)
713 """
714 return self._residue
715
716 @property
718 """
719 Residue rank (1-based)
720 """
721 return self._rank
722
723 @property
725 """
726 Nucleus name
727 """
728 return self._atom
729
731 return Label.build(self._residue, self._rank, self._atom)
732
735 """
736 Chemical shift struct.
737
738 @param position: residue rank (1-based)
739 @type position: int
740 @param residue: amino acid type (a member of L{ProteinAlphabet})
741 @type residue: str or L{EnumItem}
742 @param name: nucleus label
743 @type name: str
744 @param element: nucleus type (a member of L{ChemElements})
745 @type element: str or L{EnumItem}
746 @param shift: chemical shift value
747 @type shift: float
748 """
749
750 - def __init__(self, position, residue, name, element, shift):
763
765 """
766 Clone the current shift and create a new one with the specified
767 nucleus label.
768
769 @rtype: L{ChemShiftInfo}
770 """
771 ni = self
772 return ChemShiftInfo(ni.position, repr(ni.residue), name, repr(ni.element), ni.shift)
773
776
777 @property
779 """
780 String label representation
781 @rtype: str
782 """
783 return str(self)
784
786 """
787 Describes a network of covalently connected, chemical shift visible nuclei.
788
789 @param shifts: chemical shift instances
790 @type shifts: iterable of L{ChemShiftInfo}
791 """
792
811
813 """
814 Connect two nuclei.
815
816 @param cs1: first chemical shift instance
817 @type cs1: L{ChemShiftInfo}
818 @param cs2: second chemical shift instance
819 @type cs2: L{ChemShiftInfo}
820 """
821
822 try:
823 self._neighbors[cs1].add(cs2)
824 self._neighbors[cs2].add(cs1)
825 except KeyError:
826 raise ValueError("Unknown chemical shift")
827
829 """
830 Return an iterator over all covalently connected neuclei to a given
831 C{source}.
832
833 @param source: source chemical shift
834 @type source: L{ChemShiftInfo}
835
836 @rtype: iterator of L{ChemShiftInfo}
837 """
838
839
840 if source not in self._neighbors:
841 raise ValueError("No such chemical shift in this network")
842
843 for cs in self._neighbors[source]:
844 if element is None or cs.element == element:
845 yield cs
846
848 return iter(self._neighbors)
849
851 """
852 Chemical shift similarity scoring model. See C{ScoringModel.NUCLEI} for
853 a list of supported chemical shift types.
854 """
855
856 NUCLEI = ('CA', 'CB', 'C', 'N', 'HA')
857
858
860
861 self._pos = {}
862 self._neg = {}
863
864 self._pos['CA'] = GeneralizedNormal(0.02, 1.32, 1.1)
865 self._neg['CA'] = GeneralizedNormal(-0.08, 4.23, 2.2)
866
867 self._pos['CB'] = GeneralizedNormal(0.06, 1.32, 1.0)
868 self._neg['CB'] = GeneralizedNormal(0.08, 2.41, 1.2)
869
870 self._pos['C'] = GeneralizedNormal(0.12, 1.52, 1.4)
871 self._neg['C'] = GeneralizedNormal(-0.13, 3.42, 2.1)
872
873 self._pos['N'] = GeneralizedNormal(0.23, 4.39, 1.4)
874 self._neg['N'] = GeneralizedNormal(0.17, 7.08, 1.9)
875
876 self._pos['HA'] = GeneralizedNormal(0.00, 0.27, 1.0)
877 self._neg['HA'] = GeneralizedNormal(-0.01, 0.66, 1.4)
878
879 assert set(self._pos) == set(ChemShiftScoringModel.NUCLEI)
880 assert set(self._neg) == set(ChemShiftScoringModel.NUCLEI)
881
883 """
884 Return the probability that a given chemical shift difference
885 indicates structural similarity (true positive match).
886
887 @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI})
888 @type nucleus: str
889 @param deltas: chemical shift difference(s): q-s
890 @type deltas: float or list of floats
891
892 @return: the raw value of the probability density function
893 @rtype: float or array of floats
894 """
895 results = self._pos[nucleus].evaluate([deltas])
896 return results[0]
897
899 """
900 Return the probability that a given chemical shift difference
901 indicates no structural similarity (true negative match).
902
903 @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI})
904 @type nucleus: str
905 @param deltas: chemical shift difference(s): q-s
906 @type deltas: float or list of floats
907
908 @return: the raw value of the probability density function
909 @rtype: float or array of floats
910 """
911 results = self._neg[nucleus].evaluate([deltas])
912 return results[0]
913
914 - def score(self, nucleus, deltas):
915 """
916 Return the bit score for a given chemical shift difference.
917
918 @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI})
919 @type nucleus: str
920 @param deltas: chemical shift difference(s): q-s
921 @type deltas: float or list of floats
922
923 @return: bit score
924 @rtype: float or array of floats
925 """
926 pos = self.positive(nucleus, deltas)
927 neg = self.negative(nucleus, deltas)
928
929 return numpy.log2(pos / neg)
930
933 """
934 Describes a single NOE peak.
935
936 @param intensity: peak intensity
937 @type intensity: float
938 @param dimensions: list of dimension values
939 @type dimensions: iterable of float
940 @param spectrum: owning NOE spectrum
941 @type spectrum: L{NOESpectrum}
942 """
943
944 - def __init__(self, intensity, dimensions, spectrum):
945
946 self._dimensions = list(dimensions)
947 self._intensity = float(intensity)
948 self._spectrum = spectrum
949
950 @property
952 """
953 Peak intensity
954 @rtype: float
955 """
956 return self._intensity
957
958 @property
960 """
961 Number of dimensions
962 @rtype: int
963 """
964 return len(self._dimensions)
965
967 """
968 Return True if the owning spectrum contains a dimension of the specified type
969
970 @param e: element (dimension) type (see L{ChemElements})
971 @type e: L{EnumItem}
972
973 @rtype: bool
974 """
975 return self._spectrum.has_element(e)
976
979
981 return iter(self._dimensions)
982
984 return '<NOEPeak: {0}, I={1}>'.format(self._dimensions, self._intensity)
985
987 """
988 Return the dimension (nucleus) type at dimension index i
989
990 @param i: dimension index (0-based)
991 @type i: int
992
993 @return: nucleus type
994 @rtype: L{EnumItem}
995 """
996 return self._spectrum.element(i)
997
998 - def get(self, column):
999 """
1000 Get the value of the specified dimension.
1001
1002 @param column: dimension index (0-based)
1003 @type column: int
1004
1005 @return: dimension value
1006 @rtype: float
1007 """
1008 if 0 <= column < len(self._dimensions):
1009 return self._dimensions[column]
1010 else:
1011 raise IndexError("Dimension index out of range")
1012
1014 """
1015 Return True of dimension index C{i} has covalently connected dimensions.
1016
1017 @param i: dimension index (0-based)
1018 @type i: int
1019
1020 @rtype: bool
1021 """
1022 return self._spectrum.has_connected_dimensions(i)
1023
1025 """
1026 Return a list of all dimension indices, covalently connected to
1027 dimension C{i}.
1028
1029 @param i: dimension index (0-based)
1030 @type i: int
1031
1032 @rtype: iterable of L{EnumItem}
1033 """
1034 return self._spectrum.connected_dimensions(i)
1035
1038 """
1039 Describes an NOE spectrum.
1040
1041 @param elements: list of dimension (nucleus) types for each dimension
1042 @type elements: iterable of L{EnumItem} (L{ChemElements}) or str
1043 """
1066
1067 @staticmethod
1068 - def join(spectrum, *spectra):
1069 """
1070 Merge multiple L{NOESpectrum} instances. All C{spectra} must have matching
1071 dimensions according to the master C{spectrum}.
1072
1073 @return: merged spectrum
1074 @rtype: L{NOESpectrum}
1075 """
1076 elements = tuple(spectrum.dimensions)
1077 joint = NOESpectrum(map(repr, elements))
1078
1079 for i, dummy in enumerate(elements):
1080 for j in spectrum.connected_dimensions(i):
1081 joint.connect(i, j)
1082
1083 for s in [spectrum] + list(spectra):
1084 if tuple(s.dimensions) != elements:
1085 raise ValueError("Incompatible spectrum: {0}".format(s))
1086 for p in s:
1087 joint.add(p.intensity, list(p))
1088
1089 return joint
1090
1091
1093 return iter(self._peaks)
1094
1096 return len(self._peaks)
1097
1099 return '<NOESpectrum: {0}>'.format(self._elements)
1100
1102 try:
1103 return self._peaks[i]
1104 except IndexError:
1105 raise IndexError("Peak index out of range")
1106
1107 @property
1109 """
1110 Minimum intensity
1111 @rtype: float
1112 """
1113 return self._min
1114
1115 @property
1117 """
1118 Maximum intensity
1119 @rtype: float
1120 """
1121 return self._max
1122
1123 @property
1125 """
1126 Tuple of all dimensions (nucleus types)
1127 @rtype: tuple of L{EnumItem}
1128 """
1129 return tuple(self._elements)
1130
1131 @property
1133 """
1134 Tuple of all proton dimension indices
1135 @rtype: tuple of int
1136 """
1137 return tuple(self._protondim)
1138
1139 @property
1141 """
1142 Number of dimensions
1143 @rtype: int
1144 """
1145 return len(self._elements)
1146
1147 @property
1149 """
1150 Number of proton dimensions
1151 @rtype: int
1152 """
1153 return len(self._protondim)
1154
1156 """
1157 Return True if the spectrum contains a dimension of the specified type
1158
1159 @param e: element (dimension) type (see L{ChemElements})
1160 @type e: L{EnumItem}
1161
1162 @rtype: bool
1163 """
1164 return e in self._elemset
1165
1167 """
1168 Mark dimensions with indices C{i1} and C{i2} as covalently connected.
1169
1170 @param i1: dimension index 1 (0-based)
1171 @type i1: int
1172 @param i2: dimension index 2 (0-based)
1173 @type i2: int
1174 """
1175
1176 for i in [i1, i2]:
1177 if not 0 <= i < self.num_dimensions:
1178 raise IndexError("Dimension index out of range")
1179
1180 if i1 == i2:
1181 raise ValueError("Can't connect a dimension to itself")
1182 if not self._can_connect(i1, i2):
1183 raise ValueError("Only proton-nonproton bonds are allowed")
1184
1185 self._connected.setdefault(i1, set()).add(i2)
1186 self._connected.setdefault(i2, set()).add(i1)
1187
1189
1190 pair = set()
1191
1192 for i in [i1, i2]:
1193 is_proton = self.element(i) == ChemElements.H
1194 pair.add(is_proton)
1195
1196 if True in pair and False in pair:
1197 return True
1198
1199 return False
1200
1202 """
1203 Return True of dimension index C{i} has covalently connected dimensions.
1204
1205 @param i: dimension index (0-based)
1206 @type i: int
1207
1208 @rtype: bool
1209 """
1210 if i in self._connected:
1211 return len(self._connected[i]) > 0
1212
1213 return False
1214
1216 """
1217 Return a list of all dimension indices, covalently connected to
1218 dimension C{i}.
1219
1220 @param i: dimension index (0-based)
1221 @type i: int
1222
1223 @rtype: iterable of int
1224 """
1225 if i in self._connected:
1226 return tuple(self._connected[i])
1227
1228 return tuple()
1229
1230 - def add(self, intensity, dimensions):
1231 """
1232 Add a new NOE peak.
1233
1234 @param intensity: peak intensity
1235 @type intensity: float
1236 @param dimensions: list of dimension values
1237 @param dimensions: iterable of float
1238 """
1239
1240 dimensions = list(dimensions)
1241 if len(dimensions) != self.num_dimensions:
1242 raise ValueError("Invalid number of dimensions")
1243
1244 peak = NOEPeak(intensity, dimensions, self)
1245 self._peaks.append(peak)
1246
1247 if peak.intensity < self._min:
1248 self._min = peak.intensity
1249 if peak.intensity > self._max:
1250 self._max = peak.intensity
1251
1253 """
1254 Return the chemical element (nucleus) type at dimension index C{i}.
1255 @rtype: L{EnumItem}
1256 """
1257 return self._elements[i]
1258