1 """
2 A Maximum-Entropy model for backbone torsion angles.
3 Reference: Rowicka and Otwinowski 2004
4 """
5
6 import numpy
7
8 from csb.statistics.pdf import BaseDensity
12 """
13 Fourier expansion of a biangular log-probability density
14 """
16 """
17
18 @param n: order of the fourier expansion
19 @type n: int
20
21 @param beta: inverse temperature
22 @type beta: float
23 """
24 super(MaxentModel, self).__init__()
25
26 self._n = int(n)
27
28 self._cc = numpy.zeros((self._n, self._n))
29 self._ss = numpy.zeros((self._n, self._n))
30 self._cs = numpy.zeros((self._n, self._n))
31 self._sc = numpy.zeros((self._n, self._n))
32
33 self._beta = float(beta)
34
35 @property
37 """
38 Inverse temperature
39
40 @rtype: float
41 """
42 return self._beta
43
44 @property
46 """
47 Order of the fourier expansion
48
49 @rtype: int
50 """
51 return self._n
52
54 """
55 Load set of expansion coefficients from isd.
56
57 @param aa: Amino acid type
58 @param f_name: File containing ramachandran definition
59 """
60
61 import os
62 params, _energies = eval(open(os.path.expanduser(f_name)).read())
63 params = params[self._n - 1]
64
65 for k, l, x, f, g in params[aa]:
66
67 if f == 'cos' and g == 'cos':
68 self._cc[k, l] = -x
69 elif f == 'cos' and g == 'sin':
70 self._cs[k, l] = -x
71 elif f == 'sin' and g == 'cos':
72 self._sc[k, l] = -x
73 elif f == 'sin' and g == 'sin':
74 self._ss[k, l] = -x
75
76 - def load(self, aa, f_name):
77 """
78 Load set of expansion coefficients from isd+.
79
80 @param aa: Amino acid type
81 @param f_name: File containing ramachandran definition
82 """
83 import os
84 from numpy import reshape, array
85 from csb.io import load
86
87 f_name = os.path.expanduser(f_name)
88 params, _energies = load(f_name)
89 params = params[self._n]
90
91 a, b, c, d = params[aa]
92 a, b, c, d = reshape(array(a), (self._n, self._n)).astype('d'), \
93 reshape(array(b), (self._n, self._n)).astype('d'), \
94 reshape(array(c), (self._n, self._n)).astype('d'), \
95 reshape(array(d), (self._n, self._n)).astype('d')
96
97 self._cc, self._cs, self._sc, self._ss = -a, -c, -b, -d
98
100 return numpy.arange(self._n)
101
103 """
104 Return the energy at positions (x,y).
105
106 @param x: x-coordinates for evaluation
107 @type x: array-like
108
109 @param y: y-coordinates for evaluation
110 @type y: array-like
111 """
112 return -self.energy(x, y)
113
114 - def set(self, coef):
115 """
116 Set the fourier expansion coefficients and calculations the
117 new partation function.
118
119 @param coef: expansion coefficents
120 @type coef: array like, with shape (4,n,n)
121 """
122 self._cc[:, :], self._ss[:, :], self._cs[:, :], self._sc[:, :] = \
123 numpy.reshape(coef, (4, self._n, self._n))
124 self.normalize()
125
127 """
128 Return current expansion coefficients.
129 """
130 return numpy.array([self._cc, self._ss, self._cs, self._sc])
131
133 """
134 Return the energy at positions (x,y).
135
136 @param x: x-coordinates for evaluation
137 @type x: array-like
138
139 @param y: y-coordinates for evaluation
140 @type y: array-like
141 """
142 from numpy import sin, cos, dot, multiply
143
144 k = self._periodicities()
145 cx, sx = cos(multiply.outer(k, x)), sin(multiply.outer(k, x))
146 if y is not None:
147 cy, sy = cos(multiply.outer(k, y)), sin(multiply.outer(k, y))
148 else:
149 cy, sy = cx, sx
150
151 return dot(dot(cx.T, self._cc), cy) + \
152 dot(dot(cx.T, self._cs), sy) + \
153 dot(dot(sx.T, self._sc), cy) + \
154 dot(dot(sx.T, self._ss), sy)
155
157 """
158 Create a random set of expansion coefficients.
159 """
160 from numpy import add
161 from numpy.random import standard_normal
162
163 k = self._periodicities()
164 k = add.outer(k ** 2, k ** 2)
165 self.set([standard_normal(k.shape) for i in range(4)])
166 self.normalize(True)
167
168 - def prob(self, x, y):
169 """
170 Return the probability of the configurations x cross y.
171 """
172 from csb.numeric import exp
173 return exp(-self.beta * self(x, y))
174
176 """
177 Calculate the partion function .
178 """
179 from scipy.integrate import dblquad
180 from numpy import pi
181
182 return dblquad(self.prob, 0., 2 * pi, lambda x: 0., lambda x: 2 * pi)
183
184 - def log_z(self, n=500, integration='simpson'):
185 """
186 Calculate the log partion function.
187 """
188 from numpy import pi, linspace, max
189 from csb.numeric import log, exp
190
191 if integration == 'simpson':
192 from csb.numeric import simpson_2d
193 x = linspace(0., 2 * pi, 2 * n + 1)
194 dx = x[1] - x[0]
195
196 f = -self.beta * self.energy(x)
197 f_max = max(f)
198 f -= f_max
199
200 I = simpson_2d(exp(f))
201 return log(I) + f_max + 2 * log(dx)
202
203 elif integration == 'trapezoidal':
204
205 from csb.numeric import trapezoidal_2d
206 x = linspace(0., 2 * pi, n)
207 dx = x[1] - x[0]
208
209 f = -self.beta * self.energy(x)
210 f_max = max(f)
211 f -= f_max
212 I = trapezoidal_2d(exp(f))
213 return log(I) + f_max + 2 * log(dx)
214 else:
215 raise NotImplementedError(
216 'Choose from trapezoidal and simpson-rule Integration')
217
240
242 """
243 Calculate the sufficient statistics for the data.
244 """
245 from numpy import cos, sin, dot, multiply
246
247 k = self._periodicities()
248 cx = cos(multiply.outer(k, data[:, 0]))
249 sx = sin(multiply.outer(k, data[:, 0]))
250 cy = cos(multiply.outer(k, data[:, 1]))
251 sy = sin(multiply.outer(k, data[:, 1]))
252
253 return dot(cx, cy.T), dot(sx, sy.T), dot(cx, sy.T), dot(sx, cy.T)
254
256 """
257 Remove parameter, which do not have any influence on the model
258 and compute the partition function.
259
260 @param normalize_full: compute partition function
261 @type normalize_full: boolean
262 """
263 self._cc[0, 0] = 0.
264 self._ss[:, 0] = 0.
265 self._ss[0, :] = 0.
266 self._cs[:, 0] = 0.
267 self._sc[0, :] = 0.
268
269 if normalize_full:
270 self._cc[0, 0] = self.log_z()
271
272
273 -class MaxentPosterior(object):
274 """
275 Object to hold and calculate the posterior (log)probability
276 given an exponential family model and corresponding data.
277 """
278
279 - def __init__(self, model, data):
280 """
281 @param model: MaxentModel
282 @param data: two dimensonal data
283 """
284
285 self._model = model
286 self._data = numpy.array(data)
287 self._stats = self.model.calculate_statistics(self._data)
288
289 self._log_likelihoods = []
290
291 @property
294
295 @model.setter
296 - def model(self, value):
297 self._model = value
298 self._stats = self.model.calculate_statistics(self._data)
299
300 @property
303
304 @data.setter
305 - def data(self, value):
306 self._data = numpy.array(value)
307 self._stats = self.model.calculate_statistics(value)
308
309 @property
312
313 - def __call__(self, weights=None, n=100):
314 """
315 Returns the log posterior likelihood
316
317 @param weights: optional expansion coefficients of the model,
318 if none are specified those of the model are used
319 @param n: number of integration point for calculating the partition function
320 """
321 from numpy import sum
322
323 if weights is not None:
324 self.model.set(weights)
325
326 a = sum(self._stats[0] * self.model._cc)
327 b = sum(self._stats[1] * self.model._ss)
328 c = sum(self._stats[2] * self.model._cs)
329 d = sum(self._stats[3] * self.model._sc)
330
331 log_z = self.data.shape[0] * self.model.log_z(n=n)
332
333 log_likelihood = -self.model.beta * (a + b + c + d) - log_z
334
335 self._log_likelihoods.append(log_likelihood)
336
337 return log_likelihood
338