Package csb :: Package statistics :: Module scalemixture
[frames] | no frames]

Source Code for Module csb.statistics.scalemixture

  1  """ 
  2  Approximation of a distribution as a mixture of gaussians with a zero mean but different sigmas 
  3  """ 
  4   
  5  import numpy.random 
  6   
  7  import csb.core 
  8  import csb.statistics.ars 
  9  import csb.statistics.rand 
 10   
 11  from csb.core import typedproperty 
 12  from abc import abstractmethod, ABCMeta 
 13   
 14  from csb.numeric import log, exp, approx_psi, inv_psi, d_approx_psi 
 15  from scipy.special import  psi, kve 
 16  from csb.statistics import harmonic_mean, geometric_mean 
 17  from csb.statistics.pdf import AbstractEstimator, AbstractDensity, BaseDensity 
 18  from csb.statistics.pdf import Gamma, InverseGamma, NullEstimator 
19 20 21 -def inv_digamma_minus_log(y, tol=1e-10, n_iter=100):
22 """ 23 Solve y = psi(alpha) - log(alpha) for alpha by fixed point 24 integration. 25 """ 26 if y >= -log(6.): 27 x = 1 / (2 * (1 - exp(y))) 28 else: 29 x = 1.e-10 30 for _i in range(n_iter): 31 z = approx_psi(x) - log(x) - y 32 if abs(z) < tol: 33 break 34 x -= x * z / (x * d_approx_psi(x) - 1) 35 x = abs(x) 36 return x
37
38 -class ScaleMixturePriorEstimator(AbstractEstimator):
39 """ 40 Prior on the scales of a L{ScaleMixture}, which determines how the scales 41 are estimated. 42 """ 43 44 __metaclass__ = ABCMeta 45 46 @abstractmethod
47 - def get_scale_estimator(self):
48 """ 49 Return an appropriate estimator for the scales of the mixture distribution 50 under this prior. 51 """ 52 pass
53
54 55 -class ScaleMixturePrior(object):
56 """ 57 Prior on the scales of a L{ScaleMixture}, which determines how the scales 58 are estimated. 59 """ 60
61 - def __init__(self, *args):
62 super(ScaleMixturePrior, self).__init__(*args) 63 self._scale = None 64 self._scale_estimator = NullEstimator()
65 66 @property
67 - def scale_estimator(self):
68 return self._scale_estimator
69 70 @property
71 - def estimator(self):
72 return self._estimator
73 74 @estimator.setter
75 - def estimator(self, strategy):
76 if not isinstance(strategy, AbstractEstimator): 77 raise TypeError(strategy) 78 self._estimator = strategy 79 if isinstance(strategy, ScaleMixturePriorEstimator): 80 self._scale_estimator = strategy.get_scale_estimator() 81 else: 82 self._scale_estimator = NullEstimator()
83
84 85 -class ScaleMixture(BaseDensity):
86 """ 87 Robust probabilistic superposition and comparison of protein structures 88 Martin Mechelke and Michael Habeck 89 90 Represenation of a distribution as a mixture of gaussians with a mean of 91 zero and different inverse variances/scales. The number of scales equals 92 the number of datapoints. 93 94 The underlying family is determined by a prior L{ScaleMixturePrior} on the 95 scales. Choosing a L{GammaPrior} results in Stundent-t posterior, wheras a 96 L{InvGammaPrior} leads to a K-Distribution as posterior. 97 """ 98
99 - def __init__(self, scales=numpy.array([1., 1.]), prior=None, d=3):
100 101 super(ScaleMixture, self).__init__() 102 103 self._register('scales') 104 105 self._d = d 106 self.set_params(scales=scales) 107 self._prior = prior 108 109 if self._prior is not None: 110 self.estimator = self._prior.scale_estimator
111 112 @property
113 - def scales(self):
114 return numpy.squeeze(self['scales'])
115 116 @scales.setter
117 - def scales(self, value):
118 if not isinstance(value, csb.core.string) and \ 119 isinstance(value, (numpy.ndarray, list, tuple)): 120 self['scales'] = numpy.array(value) 121 else: 122 raise ValueError("numpy array expected")
123 124 @property
125 - def prior(self):
126 return self._prior
127 128 @prior.setter
129 - def prior(self, value):
130 if not isinstance(value, ScaleMixturePrior): 131 raise TypeError(value) 132 self._prior = value 133 self.estimator = self._prior.scale_estimator
134
135 - def log_prob(self, x):
136 from csb.numeric import log_sum_exp 137 138 dim = self._d 139 s = self.scales 140 141 log_p = numpy.squeeze(-numpy.multiply.outer(x * x, 0.5 * s)) + \ 142 numpy.squeeze(dim * 0.5 * (log(s) - log(2 * numpy.pi))) 143 144 if self._prior is not None: 145 log_p += numpy.squeeze(self._prior.log_prob(s)) 146 return log_sum_exp(log_p.T, 0)
147 148
149 - def random(self, shape=None):
150 151 s = self.scales 152 153 if shape is None: 154 return numpy.random.normal() * s[numpy.random.randint(len(s))] 155 156 else: 157 #n = s.shape[0] 158 nrv = numpy.random.normal(size=shape) 159 indices = numpy.random.randint(len(s), size=shape) 160 161 return s[indices] * nrv
162
163 164 -class ARSPosteriorAlpha(csb.statistics.ars.LogProb):
165 """ 166 This class represents the posterior distribution of the alpha parameter 167 of the Gamma and Inverse Gamma prior, and allows sampling using adaptive 168 rejection sampling L{ARS}. 169 """ 170
171 - def __init__(self, a, b, n):
172 173 self.a = float(a) 174 self.b = float(b) 175 self.n = float(n)
176
177 - def __call__(self, x):
178 179 from scipy.special import gammaln 180 181 return self.a * x - \ 182 self.n * gammaln(numpy.clip(x, 1e-308, 1e308)) + \ 183 self.b * log(x), \ 184 self.a - self.n * psi(x) + self.b / x
185
186 - def initial_values(self, tol=1e-10):
187 """ 188 Generate initial values by doing fixed point 189 iterations to solve for alpha 190 """ 191 n, a, b = self.n, self.a, self.b 192 193 if abs(b) < 1e-10: 194 alpha = inv_psi(a / n) 195 else: 196 alpha = 1. 197 198 z = tol + 1. 199 200 while abs(z) > tol: 201 202 z = n * psi(alpha) - \ 203 b / numpy.clip(alpha, 1e-300, 1e300) - a 204 205 alpha -= z / (n * d_approx_psi(alpha) - b 206 / (alpha ** 2 + 1e-300)) 207 alpha = numpy.clip(alpha, 1e-100, 1e300) 208 209 return numpy.clip(alpha - 1 / (n + 1e-300), 1e-100, 1e300), \ 210 alpha + 1 / (n + 1e-300), alpha
211
212 213 -class GammaPosteriorSampler(ScaleMixturePriorEstimator):
214
215 - def __init__(self):
216 super(GammaPosteriorSampler, self).__init__() 217 self.n_samples = 2
218
219 - def get_scale_estimator(self):
220 return GammaScaleSampler()
221
222 - def estimate(self, context, data):
223 """ 224 Generate samples from the posterior of alpha and beta. 225 226 For beta the posterior is a gamma distribution and analytically 227 acessible. 228 229 The posterior of alpha can not be expressed analytically and is 230 aproximated using adaptive rejection sampling. 231 """ 232 pdf = GammaPrior() 233 234 ## sufficient statistics 235 236 a = numpy.mean(data) 237 b = exp(numpy.mean(log(data))) 238 v = numpy.std(data) ** 2 239 n = len(data) 240 241 beta = a / v 242 alpha = beta * a 243 samples = [] 244 245 for _i in range(self.n_samples): 246 247 ## sample beta from Gamma distribution 248 beta = numpy.random.gamma(n * alpha + context._hyper_beta.alpha, 249 1 / (n * a + context._hyper_beta.beta)) 250 251 ## sample alpha with ARS 252 logp = ARSPosteriorAlpha(n * log(beta * b)\ 253 - context.hyper_alpha.beta, 254 context.hyper_alpha.alpha - 1., n) 255 ars = csb.statistics.ars.ARS(logp) 256 ars.initialize(logp.initial_values()[:2], z0=0.) 257 alpha = ars.sample() 258 259 if alpha is None: 260 raise ValueError("ARS failed") 261 262 samples.append((alpha, beta)) 263 264 pdf.alpha, pdf.beta = samples[-1] 265 266 return pdf
267
268 -class GammaPosteriorMAP(ScaleMixturePriorEstimator):
269
270 - def __init__(self):
271 super(GammaPosteriorMAP, self).__init__()
272
273 - def get_scale_estimator(self):
274 return GammaScaleMAP()
275
276 - def estimate(self, context, data):
277 """ 278 Estimate alpha and beta from their posterior 279 """ 280 281 pdf = GammaPrior() 282 283 s = data[0].mean() 284 y = data[1].mean() - log(s) - 1. 285 286 alpha = abs(inv_digamma_minus_log(numpy.clip(y, 287 - 1e308, 288 - 1e-300))) 289 beta = alpha / s 290 291 292 pdf.alpha, pdf.beta = alpha, beta 293 return pdf
294
295 296 297 298 -class InvGammaPosteriorSampler(ScaleMixturePriorEstimator):
299
300 - def __init__(self):
301 super(InvGammaPosteriorSampler, self).__init__() 302 self.n_samples = 2
303
304 - def get_scale_estimator(self):
305 return InvGammaScaleSampler()
306
307 - def estimate(self, context, data):
308 """ 309 Generate samples from the posterior of alpha and beta. 310 311 For beta the posterior is a gamma distribution and analytically 312 acessible. 313 314 The posterior of alpha can not be expressed analytically and is 315 aproximated using adaptive rejection sampling. 316 """ 317 pdf = GammaPrior() 318 319 ## sufficient statistics 320 321 h = harmonic_mean(numpy.clip(data, 1e-308, 1e308)) 322 g = geometric_mean(numpy.clip(data, 1e-308, 1e308)) 323 324 n = len(data) 325 326 samples = [] 327 328 a = numpy.mean(1 / data) 329 v = numpy.std(1 / data) ** 2 330 331 beta = a / v 332 alpha = beta * a 333 334 for i in range(self.n_samples): 335 336 ## sample alpha with ARS 337 338 logp = ARSPosteriorAlpha(n * (log(beta) - log(g)) - context.hyper_alpha.beta, 339 context.hyper_alpha.alpha - 1., n) 340 ars = csb.statistics.ars.ARS(logp) 341 ars.initialize(logp.initial_values()[:2], z0=0.) 342 343 alpha = numpy.abs(ars.sample()) 344 345 if alpha is None: 346 raise ValueError("Sampling failed") 347 348 349 ## sample beta from Gamma distribution 350 351 beta = numpy.random.gamma(n * alpha + context.hyper_beta.alpha, \ 352 1 / (n / h + context.hyper_beta.beta)) 353 354 samples.append((alpha, beta)) 355 356 pdf.alpha, pdf.beta = samples[-1] 357 return pdf
358
359 360 -class InvGammaPosteriorMAP(ScaleMixturePriorEstimator):
361
362 - def __init__(self):
363 super(InvGammaPosteriorMAP, self).__init__()
364
365 - def get_scale_estimator(self):
366 return InvGammaScaleMAP()
367
368 - def estimate(self, context, data):
369 """ 370 Generate samples from the posterior of alpha and beta. 371 372 For beta the posterior is a gamma distribution and analytically 373 acessible. 374 375 The posterior of alpha can not be expressed analytically and is 376 aproximated using adaptive rejection sampling. 377 """ 378 pdf = GammaPrior() 379 380 381 y = log(data).mean() - log((data ** -1).mean()) 382 383 alpha = inv_digamma_minus_log(numpy.clip(y, 384 - 1e308, 385 - 1e-300)) 386 alpha = abs(alpha) 387 388 beta = numpy.clip(alpha / 389 (data ** (-1)).mean(), 390 1e-100, 1e100) 391 392 pdf.alpha, pdf.beta = alpha, beta 393 return pdf
394
395 396 397 -class GammaScaleSampler(AbstractEstimator):
398 """ 399 Sample the scalies given the data 400 """ 401
402 - def estimate(self, context, data):
403 pdf = ScaleMixture() 404 alpha = context.prior.alpha 405 beta = context.prior.beta 406 d = context._d 407 408 if len(data.shape) == 1: 409 data = data[:, numpy.newaxis] 410 411 a = alpha + 0.5 * d * len(data.shape) 412 b = beta + 0.5 * data.sum(-1) ** 2 413 414 s = numpy.clip(numpy.random.gamma(a, 1. / b), 1e-20, 1e10) 415 pdf.scales = s 416 417 context.prior.estimate(s) 418 pdf.prior = context.prior 419 420 return pdf
421
422 423 -class GammaScaleMAP(AbstractEstimator):
424 """ 425 MAP estimator of the scales 426 """ 427
428 - def estimate(self, context, data):
429 pdf = ScaleMixture() 430 alpha = context.prior.alpha 431 beta = context.prior.beta 432 d = context._d 433 434 if len(data.shape) == 1: 435 data = data[:, numpy.newaxis] 436 437 a = alpha + 0.5 * d * len(data.shape) 438 b = beta + 0.5 * data.sum(-1) ** 2 439 440 s = a / b 441 log_s = psi(a) - log(b) 442 443 pdf.scales = s 444 context.prior.estimate([s, log_s]) 445 pdf.prior = context.prior 446 447 return pdf
448
449 450 451 -class InvGammaScaleSampler(AbstractEstimator):
452 """ 453 Sample the scales given the data 454 """ 455
456 - def estimate(self, context, data):
457 pdf = ScaleMixture() 458 alpha = context.prior.alpha 459 beta = context.prior.beta 460 d = context._d 461 462 if len(data.shape) == 1: 463 data = data[:, numpy.newaxis] 464 465 p = -alpha + 0.5 * d 466 b = 2 * beta 467 a = 1e-5 + data.sum(-1) ** 2 468 469 s = csb.statistics.rand.gen_inv_gaussian(a, b, p) 470 s = numpy.clip(s, 1e-300, 1e300) 471 472 pdf.scales = s 473 context.prior.estimate(s) 474 pdf.prior = context.prior 475 476 return pdf
477
478 479 480 -class InvGammaScaleMAP(AbstractEstimator):
481 """ 482 MAP estimator of the scales 483 """
484 - def estimate(self, context, data):
485 pdf = ScaleMixture() 486 alpha = context.prior.alpha 487 beta = context.prior.beta 488 d = context._d 489 490 if len(data.shape) == 1: 491 data = data[:, numpy.newaxis] 492 493 p = -alpha + 0.5 * d 494 b = 2 * beta 495 a = 1e-5 + data.sum(-1) ** 2 496 497 s = numpy.clip((numpy.sqrt(b) * kve(p + 1, numpy.sqrt(a * b))) 498 / (numpy.sqrt(a) * kve(p, numpy.sqrt(a * b))), 499 1e-10, 1e10) 500 pdf.scales = s 501 context.prior.estimate(s) 502 pdf.prior = context.prior 503 504 return pdf
505
506 507 508 -class GammaPrior(ScaleMixturePrior, Gamma):
509 """ 510 Gamma prior on mixture weights including a weak gamma prior on its parameter. 511 """ 512
513 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.), 514 hyper_beta=(2., 1.)):
515 516 super(GammaPrior, self).__init__(alpha, beta) 517 518 self._hyper_alpha = Gamma(*hyper_alpha) 519 self._hyper_beta = Gamma(*hyper_beta) 520 self.estimator = GammaPosteriorSampler()
521 522 @typedproperty(AbstractDensity)
523 - def hyper_beta():
524 pass
525 526 @typedproperty(AbstractDensity)
527 - def hyper_alpha():
528 pass
529
530 - def log_prob(self, x):
531 532 a, b = self['alpha'], self['beta'] 533 534 l_a = self._hyper_alpha(a) 535 l_b = self._hyper_beta(b) 536 537 return super(GammaPrior, self).log_prob(x) + l_a + l_b
538
539 540 541 -class InvGammaPrior(ScaleMixturePrior, InverseGamma):
542 """ 543 Inverse Gamma prior on mixture weights including a weak gamma 544 prior on its parameter. 545 """ 546
547 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.), 548 hyper_beta=(2., 1.)):
549 550 super(InvGammaPrior, self).__init__(alpha, beta) 551 552 self._hyper_alpha = Gamma(*hyper_alpha) 553 self._hyper_beta = Gamma(*hyper_beta) 554 self.estimator = InvGammaPosteriorSampler()
555 556 @typedproperty(AbstractDensity)
557 - def hyper_beta():
558 pass
559 560 @typedproperty(AbstractDensity)
561 - def hyper_alpha():
562 pass
563
564 - def log_prob(self, x):
565 566 a, b = self['alpha'], self['beta'] 567 568 l_a = self._hyper_alpha(a) 569 l_b = self._hyper_beta(b) 570 571 return super(InvGammaPrior, self).log_prob(x) + l_a + l_b
572