1 """
2 Approximation of a distribution as a mixture of gaussians with a zero mean but different sigmas
3 """
4
5 import numpy.random
6
7 import csb.core
8 import csb.statistics.ars
9 import csb.statistics.rand
10
11 from csb.core import typedproperty
12 from abc import abstractmethod, ABCMeta
13
14 from csb.numeric import log, exp, approx_psi, inv_psi, d_approx_psi
15 from scipy.special import psi, kve
16 from csb.statistics import harmonic_mean, geometric_mean
17 from csb.statistics.pdf import AbstractEstimator, AbstractDensity, BaseDensity
18 from csb.statistics.pdf import Gamma, InverseGamma, NullEstimator
22 """
23 Solve y = psi(alpha) - log(alpha) for alpha by fixed point
24 integration.
25 """
26 if y >= -log(6.):
27 x = 1 / (2 * (1 - exp(y)))
28 else:
29 x = 1.e-10
30 for _i in range(n_iter):
31 z = approx_psi(x) - log(x) - y
32 if abs(z) < tol:
33 break
34 x -= x * z / (x * d_approx_psi(x) - 1)
35 x = abs(x)
36 return x
37
39 """
40 Prior on the scales of a L{ScaleMixture}, which determines how the scales
41 are estimated.
42 """
43
44 __metaclass__ = ABCMeta
45
46 @abstractmethod
48 """
49 Return an appropriate estimator for the scales of the mixture distribution
50 under this prior.
51 """
52 pass
53
56 """
57 Prior on the scales of a L{ScaleMixture}, which determines how the scales
58 are estimated.
59 """
60
65
66 @property
68 return self._scale_estimator
69
70 @property
72 return self._estimator
73
74 @estimator.setter
83
86 """
87 Robust probabilistic superposition and comparison of protein structures
88 Martin Mechelke and Michael Habeck
89
90 Represenation of a distribution as a mixture of gaussians with a mean of
91 zero and different inverse variances/scales. The number of scales equals
92 the number of datapoints.
93
94 The underlying family is determined by a prior L{ScaleMixturePrior} on the
95 scales. Choosing a L{GammaPrior} results in Stundent-t posterior, wheras a
96 L{InvGammaPrior} leads to a K-Distribution as posterior.
97 """
98
99 - def __init__(self, scales=numpy.array([1., 1.]), prior=None, d=3):
111
112 @property
114 return numpy.squeeze(self['scales'])
115
116 @scales.setter
118 if not isinstance(value, csb.core.string) and \
119 isinstance(value, (numpy.ndarray, list, tuple)):
120 self['scales'] = numpy.array(value)
121 else:
122 raise ValueError("numpy array expected")
123
124 @property
127
128 @prior.setter
134
136 from csb.numeric import log_sum_exp
137
138 dim = self._d
139 s = self.scales
140
141 log_p = numpy.squeeze(-numpy.multiply.outer(x * x, 0.5 * s)) + \
142 numpy.squeeze(dim * 0.5 * (log(s) - log(2 * numpy.pi)))
143
144 if self._prior is not None:
145 log_p += numpy.squeeze(self._prior.log_prob(s))
146 return log_sum_exp(log_p.T, 0)
147
148
149 - def random(self, shape=None):
162
163
164 -class ARSPosteriorAlpha(csb.statistics.ars.LogProb):
165 """
166 This class represents the posterior distribution of the alpha parameter
167 of the Gamma and Inverse Gamma prior, and allows sampling using adaptive
168 rejection sampling L{ARS}.
169 """
170
171 - def __init__(self, a, b, n):
172
173 self.a = float(a)
174 self.b = float(b)
175 self.n = float(n)
176
177 - def __call__(self, x):
178
179 from scipy.special import gammaln
180
181 return self.a * x - \
182 self.n * gammaln(numpy.clip(x, 1e-308, 1e308)) + \
183 self.b * log(x), \
184 self.a - self.n * psi(x) + self.b / x
185
186 - def initial_values(self, tol=1e-10):
187 """
188 Generate initial values by doing fixed point
189 iterations to solve for alpha
190 """
191 n, a, b = self.n, self.a, self.b
192
193 if abs(b) < 1e-10:
194 alpha = inv_psi(a / n)
195 else:
196 alpha = 1.
197
198 z = tol + 1.
199
200 while abs(z) > tol:
201
202 z = n * psi(alpha) - \
203 b / numpy.clip(alpha, 1e-300, 1e300) - a
204
205 alpha -= z / (n * d_approx_psi(alpha) - b
206 / (alpha ** 2 + 1e-300))
207 alpha = numpy.clip(alpha, 1e-100, 1e300)
208
209 return numpy.clip(alpha - 1 / (n + 1e-300), 1e-100, 1e300), \
210 alpha + 1 / (n + 1e-300), alpha
211
212
213 -class GammaPosteriorSampler(ScaleMixturePriorEstimator):
214
215 - def __init__(self):
216 super(GammaPosteriorSampler, self).__init__()
217 self.n_samples = 2
218
221
222 - def estimate(self, context, data):
223 """
224 Generate samples from the posterior of alpha and beta.
225
226 For beta the posterior is a gamma distribution and analytically
227 acessible.
228
229 The posterior of alpha can not be expressed analytically and is
230 aproximated using adaptive rejection sampling.
231 """
232 pdf = GammaPrior()
233
234
235
236 a = numpy.mean(data)
237 b = exp(numpy.mean(log(data)))
238 v = numpy.std(data) ** 2
239 n = len(data)
240
241 beta = a / v
242 alpha = beta * a
243 samples = []
244
245 for _i in range(self.n_samples):
246
247
248 beta = numpy.random.gamma(n * alpha + context._hyper_beta.alpha,
249 1 / (n * a + context._hyper_beta.beta))
250
251
252 logp = ARSPosteriorAlpha(n * log(beta * b)\
253 - context.hyper_alpha.beta,
254 context.hyper_alpha.alpha - 1., n)
255 ars = csb.statistics.ars.ARS(logp)
256 ars.initialize(logp.initial_values()[:2], z0=0.)
257 alpha = ars.sample()
258
259 if alpha is None:
260 raise ValueError("ARS failed")
261
262 samples.append((alpha, beta))
263
264 pdf.alpha, pdf.beta = samples[-1]
265
266 return pdf
267
268 -class GammaPosteriorMAP(ScaleMixturePriorEstimator):
269
270 - def __init__(self):
272
274 return GammaScaleMAP()
275
276 - def estimate(self, context, data):
277 """
278 Estimate alpha and beta from their posterior
279 """
280
281 pdf = GammaPrior()
282
283 s = data[0].mean()
284 y = data[1].mean() - log(s) - 1.
285
286 alpha = abs(inv_digamma_minus_log(numpy.clip(y,
287 - 1e308,
288 - 1e-300)))
289 beta = alpha / s
290
291
292 pdf.alpha, pdf.beta = alpha, beta
293 return pdf
294
295
296
297
298 -class InvGammaPosteriorSampler(ScaleMixturePriorEstimator):
299
300 - def __init__(self):
301 super(InvGammaPosteriorSampler, self).__init__()
302 self.n_samples = 2
303
306
307 - def estimate(self, context, data):
308 """
309 Generate samples from the posterior of alpha and beta.
310
311 For beta the posterior is a gamma distribution and analytically
312 acessible.
313
314 The posterior of alpha can not be expressed analytically and is
315 aproximated using adaptive rejection sampling.
316 """
317 pdf = GammaPrior()
318
319
320
321 h = harmonic_mean(numpy.clip(data, 1e-308, 1e308))
322 g = geometric_mean(numpy.clip(data, 1e-308, 1e308))
323
324 n = len(data)
325
326 samples = []
327
328 a = numpy.mean(1 / data)
329 v = numpy.std(1 / data) ** 2
330
331 beta = a / v
332 alpha = beta * a
333
334 for i in range(self.n_samples):
335
336
337
338 logp = ARSPosteriorAlpha(n * (log(beta) - log(g)) - context.hyper_alpha.beta,
339 context.hyper_alpha.alpha - 1., n)
340 ars = csb.statistics.ars.ARS(logp)
341 ars.initialize(logp.initial_values()[:2], z0=0.)
342
343 alpha = numpy.abs(ars.sample())
344
345 if alpha is None:
346 raise ValueError("Sampling failed")
347
348
349
350
351 beta = numpy.random.gamma(n * alpha + context.hyper_beta.alpha, \
352 1 / (n / h + context.hyper_beta.beta))
353
354 samples.append((alpha, beta))
355
356 pdf.alpha, pdf.beta = samples[-1]
357 return pdf
358
359
360 -class InvGammaPosteriorMAP(ScaleMixturePriorEstimator):
361
362 - def __init__(self):
364
367
368 - def estimate(self, context, data):
369 """
370 Generate samples from the posterior of alpha and beta.
371
372 For beta the posterior is a gamma distribution and analytically
373 acessible.
374
375 The posterior of alpha can not be expressed analytically and is
376 aproximated using adaptive rejection sampling.
377 """
378 pdf = GammaPrior()
379
380
381 y = log(data).mean() - log((data ** -1).mean())
382
383 alpha = inv_digamma_minus_log(numpy.clip(y,
384 - 1e308,
385 - 1e-300))
386 alpha = abs(alpha)
387
388 beta = numpy.clip(alpha /
389 (data ** (-1)).mean(),
390 1e-100, 1e100)
391
392 pdf.alpha, pdf.beta = alpha, beta
393 return pdf
394
398 """
399 Sample the scalies given the data
400 """
401
421
424 """
425 MAP estimator of the scales
426 """
427
448
452 """
453 Sample the scales given the data
454 """
455
477
481 """
482 MAP estimator of the scales
483 """
505
506
507
508 -class GammaPrior(ScaleMixturePrior, Gamma):
509 """
510 Gamma prior on mixture weights including a weak gamma prior on its parameter.
511 """
512
513 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.),
514 hyper_beta=(2., 1.)):
521
522 @typedproperty(AbstractDensity)
525
526 @typedproperty(AbstractDensity)
529
531
532 a, b = self['alpha'], self['beta']
533
534 l_a = self._hyper_alpha(a)
535 l_b = self._hyper_beta(b)
536
537 return super(GammaPrior, self).log_prob(x) + l_a + l_b
538
542 """
543 Inverse Gamma prior on mixture weights including a weak gamma
544 prior on its parameter.
545 """
546
547 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.),
548 hyper_beta=(2., 1.)):
555
556 @typedproperty(AbstractDensity)
559
560 @typedproperty(AbstractDensity)
563
565
566 a, b = self['alpha'], self['beta']
567
568 l_a = self._hyper_alpha(a)
569 l_b = self._hyper_beta(b)
570
571 return super(InvGammaPrior, self).log_prob(x) + l_a + l_b
572