Package csb :: Package statistics :: Module maxent
[frames] | no frames]

Source Code for Module csb.statistics.maxent

  1  """ 
  2  A Maximum-Entropy model for backbone torsion angles. 
  3  Reference: Rowicka and Otwinowski 2004 
  4  """ 
  5   
  6  import numpy 
  7   
  8  from csb.statistics.pdf import BaseDensity 
9 10 11 -class MaxentModel(BaseDensity):
12 """ 13 Fourier expansion of a biangular log-probability density 14 """
15 - def __init__(self, n, beta=1.):
16 """ 17 18 @param n: order of the fourier expansion 19 @type n: int 20 21 @param beta: inverse temperature 22 @type beta: float 23 """ 24 super(MaxentModel, self).__init__() 25 26 self._n = int(n) 27 28 self._cc = numpy.zeros((self._n, self._n)) 29 self._ss = numpy.zeros((self._n, self._n)) 30 self._cs = numpy.zeros((self._n, self._n)) 31 self._sc = numpy.zeros((self._n, self._n)) 32 33 self._beta = float(beta)
34 35 @property
36 - def beta(self):
37 """ 38 Inverse temperature 39 40 @rtype: float 41 """ 42 return self._beta
43 44 @property
45 - def n(self):
46 """ 47 Order of the fourier expansion 48 49 @rtype: int 50 """ 51 return self._n
52
53 - def load_old(self, aa, f_name):
54 """ 55 Load set of expansion coefficients from isd. 56 57 @param aa: Amino acid type 58 @param f_name: File containing ramachandran definition 59 """ 60 61 import os 62 params, _energies = eval(open(os.path.expanduser(f_name)).read()) 63 params = params[self._n - 1] 64 65 for k, l, x, f, g in params[aa]: 66 67 if f == 'cos' and g == 'cos': 68 self._cc[k, l] = -x 69 elif f == 'cos' and g == 'sin': 70 self._cs[k, l] = -x 71 elif f == 'sin' and g == 'cos': 72 self._sc[k, l] = -x 73 elif f == 'sin' and g == 'sin': 74 self._ss[k, l] = -x
75
76 - def load(self, aa, f_name):
77 """ 78 Load set of expansion coefficients from isd+. 79 80 @param aa: Amino acid type 81 @param f_name: File containing ramachandran definition 82 """ 83 import os 84 from numpy import reshape, array 85 from csb.io import load 86 87 f_name = os.path.expanduser(f_name) 88 params, _energies = load(f_name) 89 params = params[self._n] 90 91 a, b, c, d = params[aa] 92 a, b, c, d = reshape(array(a), (self._n, self._n)).astype('d'), \ 93 reshape(array(b), (self._n, self._n)).astype('d'), \ 94 reshape(array(c), (self._n, self._n)).astype('d'), \ 95 reshape(array(d), (self._n, self._n)).astype('d') 96 # Not a typo, I accidently swichted cos*sin and sin*cos 97 self._cc, self._cs, self._sc, self._ss = -a, -c, -b, -d
98
99 - def _periodicities(self):
100 return numpy.arange(self._n)
101
102 - def log_prob(self, x, y):
103 """ 104 Return the energy at positions (x,y). 105 106 @param x: x-coordinates for evaluation 107 @type x: array-like 108 109 @param y: y-coordinates for evaluation 110 @type y: array-like 111 """ 112 return -self.energy(x, y)
113
114 - def set(self, coef):
115 """ 116 Set the fourier expansion coefficients and calculations the 117 new partation function. 118 119 @param coef: expansion coefficents 120 @type coef: array like, with shape (4,n,n) 121 """ 122 self._cc[:, :], self._ss[:, :], self._cs[:, :], self._sc[:, :] = \ 123 numpy.reshape(coef, (4, self._n, self._n)) 124 self.normalize()
125
126 - def get(self):
127 """ 128 Return current expansion coefficients. 129 """ 130 return numpy.array([self._cc, self._ss, self._cs, self._sc])
131
132 - def energy(self, x, y=None):
133 """ 134 Return the energy at positions (x,y). 135 136 @param x: x-coordinates for evaluation 137 @type x: array-like 138 139 @param y: y-coordinates for evaluation 140 @type y: array-like 141 """ 142 from numpy import sin, cos, dot, multiply 143 144 k = self._periodicities() 145 cx, sx = cos(multiply.outer(k, x)), sin(multiply.outer(k, x)) 146 if y is not None: 147 cy, sy = cos(multiply.outer(k, y)), sin(multiply.outer(k, y)) 148 else: 149 cy, sy = cx, sx 150 151 return dot(dot(cx.T, self._cc), cy) + \ 152 dot(dot(cx.T, self._cs), sy) + \ 153 dot(dot(sx.T, self._sc), cy) + \ 154 dot(dot(sx.T, self._ss), sy)
155
156 - def sample_weights(self):
157 """ 158 Create a random set of expansion coefficients. 159 """ 160 from numpy import add 161 from numpy.random import standard_normal 162 163 k = self._periodicities() 164 k = add.outer(k ** 2, k ** 2) 165 self.set([standard_normal(k.shape) for i in range(4)]) 166 self.normalize(True)
167
168 - def prob(self, x, y):
169 """ 170 Return the probability of the configurations x cross y. 171 """ 172 from csb.numeric import exp 173 return exp(-self.beta * self(x, y))
174
175 - def z(self):
176 """ 177 Calculate the partion function . 178 """ 179 from scipy.integrate import dblquad 180 from numpy import pi 181 182 return dblquad(self.prob, 0., 2 * pi, lambda x: 0., lambda x: 2 * pi)
183
184 - def log_z(self, n=500, integration='simpson'):
185 """ 186 Calculate the log partion function. 187 """ 188 from numpy import pi, linspace, max 189 from csb.numeric import log, exp 190 191 if integration == 'simpson': 192 from csb.numeric import simpson_2d 193 x = linspace(0., 2 * pi, 2 * n + 1) 194 dx = x[1] - x[0] 195 196 f = -self.beta * self.energy(x) 197 f_max = max(f) 198 f -= f_max 199 200 I = simpson_2d(exp(f)) 201 return log(I) + f_max + 2 * log(dx) 202 203 elif integration == 'trapezoidal': 204 205 from csb.numeric import trapezoidal_2d 206 x = linspace(0., 2 * pi, n) 207 dx = x[1] - x[0] 208 209 f = -self.beta * self.energy(x) 210 f_max = max(f) 211 f -= f_max 212 I = trapezoidal_2d(exp(f)) 213 return log(I) + f_max + 2 * log(dx) 214 else: 215 raise NotImplementedError( 216 'Choose from trapezoidal and simpson-rule Integration')
217
218 - def entropy(self, n=500):
219 """ 220 Calculate the entropy of the model. 221 222 @param n: number of integration points for numerical integration 223 @type n: integer 224 """ 225 from csb.numeric import trapezoidal_2d 226 from numpy import pi, linspace, max 227 from csb.numeric import log, exp 228 229 x = linspace(0., 2 * pi, n) 230 dx = x[1] - x[0] 231 232 f = -self.beta * self.energy(x) 233 f_max = max(f) 234 235 log_z = log(trapezoidal_2d(exp(f - f_max))) + f_max + 2 * log(dx) 236 average_energy = trapezoidal_2d(f * exp(f - f_max))\ 237 * exp(f_max + 2 * log(dx) - log_z) 238 239 return -average_energy + log_z
240
241 - def calculate_statistics(self, data):
242 """ 243 Calculate the sufficient statistics for the data. 244 """ 245 from numpy import cos, sin, dot, multiply 246 247 k = self._periodicities() 248 cx = cos(multiply.outer(k, data[:, 0])) 249 sx = sin(multiply.outer(k, data[:, 0])) 250 cy = cos(multiply.outer(k, data[:, 1])) 251 sy = sin(multiply.outer(k, data[:, 1])) 252 253 return dot(cx, cy.T), dot(sx, sy.T), dot(cx, sy.T), dot(sx, cy.T)
254
255 - def normalize(self, normalize_full=True):
256 """ 257 Remove parameter, which do not have any influence on the model 258 and compute the partition function. 259 260 @param normalize_full: compute partition function 261 @type normalize_full: boolean 262 """ 263 self._cc[0, 0] = 0. 264 self._ss[:, 0] = 0. 265 self._ss[0, :] = 0. 266 self._cs[:, 0] = 0. 267 self._sc[0, :] = 0. 268 269 if normalize_full: 270 self._cc[0, 0] = self.log_z()
271
272 273 -class MaxentPosterior(object):
274 """ 275 Object to hold and calculate the posterior (log)probability 276 given an exponential family model and corresponding data. 277 """ 278
279 - def __init__(self, model, data):
280 """ 281 @param model: MaxentModel 282 @param data: two dimensonal data 283 """ 284 285 self._model = model 286 self._data = numpy.array(data) 287 self._stats = self.model.calculate_statistics(self._data) 288 289 self._log_likelihoods = []
290 291 @property
292 - def model(self):
293 return self._model
294 295 @model.setter
296 - def model(self, value):
297 self._model = value 298 self._stats = self.model.calculate_statistics(self._data)
299 300 @property
301 - def data(self):
302 return self._data
303 304 @data.setter
305 - def data(self, value):
306 self._data = numpy.array(value) 307 self._stats = self.model.calculate_statistics(value)
308 309 @property
310 - def stats(self):
311 return self._stats
312
313 - def __call__(self, weights=None, n=100):
314 """ 315 Returns the log posterior likelihood 316 317 @param weights: optional expansion coefficients of the model, 318 if none are specified those of the model are used 319 @param n: number of integration point for calculating the partition function 320 """ 321 from numpy import sum 322 323 if weights is not None: 324 self.model.set(weights) 325 326 a = sum(self._stats[0] * self.model._cc) 327 b = sum(self._stats[1] * self.model._ss) 328 c = sum(self._stats[2] * self.model._cs) 329 d = sum(self._stats[3] * self.model._sc) 330 331 log_z = self.data.shape[0] * self.model.log_z(n=n) 332 333 log_likelihood = -self.model.beta * (a + b + c + d) - log_z 334 335 self._log_likelihoods.append(log_likelihood) 336 337 return log_likelihood
338