csb.statistics.scalemixture

1 """ 2 Approximation of a distribution as a mixture of gaussians with a zero mean but different sigmas 3 """ 4 5 import numpy.random 6 7 import csb.core 8 import csb.statistics.ars 9 import csb.statistics.rand 10 11 from csb.core import typedproperty 12 from abc import abstractmethod, ABCMeta 13 14 from csb.numeric import log, exp, approx_psi, inv_psi, d_approx_psi 15 from scipy.special import psi, kve 16 from csb.statistics import harmonic_mean, geometric_mean 17 from csb.statistics.pdf import AbstractEstimator, AbstractDensity, BaseDensity 18 from csb.statistics.pdf import Gamma, InverseGamma, NullEstimator

19 20 21 -def inv_digamma_minus_log(y, tol=1e-10, n_iter=100):

22 """ 23 Solve y = psi(alpha) - log(alpha) for alpha by fixed point 24 integration. 25 """ 26 if y >= -log(6.): 27 x = 1 / (2 * (1 - exp(y))) 28 else: 29 x = 1.e-10 30 for _i in range(n_iter): 31 z = approx_psi(x) - log(x) - y 32 if abs(z) < tol: 33 break 34 x -= x * z / (x * d_approx_psi(x) - 1) 35 x = abs(x) 36 return x

37

38 -class ScaleMixturePriorEstimator(AbstractEstimator):

39 """ 40 Prior on the scales of a L{ScaleMixture}, which determines how the scales 41 are estimated. 42 """ 43 44 __metaclass__ = ABCMeta 45 46 @abstractmethod

47 - def get_scale_estimator(self):

48 """ 49 Return an appropriate estimator for the scales of the mixture distribution 50 under this prior. 51 """ 52 pass

53

54 55 -class ScaleMixturePrior(object):

56 """ 57 Prior on the scales of a L{ScaleMixture}, which determines how the scales 58 are estimated. 59 """ 60

61 - def __init__(self, *args):

62 super(ScaleMixturePrior, self).__init__(*args) 63 self._scale = None 64 self._scale_estimator = NullEstimator()

65 66 @property

67 - def scale_estimator(self):

68 return self._scale_estimator

69 70 @property

71 - def estimator(self):

72 return self._estimator

73 74 @estimator.setter

75 - def estimator(self, strategy):

76 if not isinstance(strategy, AbstractEstimator): 77 raise TypeError(strategy) 78 self._estimator = strategy 79 if isinstance(strategy, ScaleMixturePriorEstimator): 80 self._scale_estimator = strategy.get_scale_estimator() 81 else: 82 self._scale_estimator = NullEstimator()

83

84 85 -class ScaleMixture(BaseDensity):

86 """ 87 Robust probabilistic superposition and comparison of protein structures 88 Martin Mechelke and Michael Habeck 89 90 Represenation of a distribution as a mixture of gaussians with a mean of 91 zero and different inverse variances/scales. The number of scales equals 92 the number of datapoints. 93 94 The underlying family is determined by a prior L{ScaleMixturePrior} on the 95 scales. Choosing a L{GammaPrior} results in Stundent-t posterior, wheras a 96 L{InvGammaPrior} leads to a K-Distribution as posterior. 97 """ 98

99 - def __init__(self, scales=numpy.array([1., 1.]), prior=None, d=3):

100 101 super(ScaleMixture, self).__init__() 102 103 self._register('scales') 104 105 self._d = d 106 self.set_params(scales=scales) 107 self._prior = prior 108 109 if self._prior is not None: 110 self.estimator = self._prior.scale_estimator

111 112 @property

113 - def scales(self):

114 return numpy.squeeze(self['scales'])

115 116 @scales.setter

117 - def scales(self, value):

118 if not isinstance(value, csb.core.string) and \ 119 isinstance(value, (numpy.ndarray, list, tuple)): 120 self['scales'] = numpy.array(value) 121 else: 122 raise ValueError("numpy array expected")

123 124 @property

125 - def prior(self):

126 return self._prior

127 128 @prior.setter

129 - def prior(self, value):

130 if not isinstance(value, ScaleMixturePrior): 131 raise TypeError(value) 132 self._prior = value 133 self.estimator = self._prior.scale_estimator

134

135 - def log_prob(self, x):

136 from csb.numeric import log_sum_exp 137 138 dim = self._d 139 s = self.scales 140 141 log_p = numpy.squeeze(-numpy.multiply.outer(x * x, 0.5 * s)) + \ 142 numpy.squeeze(dim * 0.5 * (log(s) - log(2 * numpy.pi))) 143 144 if self._prior is not None: 145 log_p += numpy.squeeze(self._prior.log_prob(s)) 146 return log_sum_exp(log_p.T, 0)

147 148

149 - def random(self, shape=None):

150 151 s = self.scales 152 153 if shape is None: 154 return numpy.random.normal() * s[numpy.random.randint(len(s))] 155 156 else: 157 #n = s.shape[0] 158 nrv = numpy.random.normal(size=shape) 159 indices = numpy.random.randint(len(s), size=shape) 160 161 return s[indices] * nrv

162

163 164 -class ARSPosteriorAlpha(csb.statistics.ars.LogProb):

165 """ 166 This class represents the posterior distribution of the alpha parameter 167 of the Gamma and Inverse Gamma prior, and allows sampling using adaptive 168 rejection sampling L{ARS}. 169 """ 170

171 - def __init__(self, a, b, n):

172 173 self.a = float(a) 174 self.b = float(b) 175 self.n = float(n)

176

177 - def __call__(self, x):

178 179 from scipy.special import gammaln 180 181 return self.a * x - \ 182 self.n * gammaln(numpy.clip(x, 1e-308, 1e308)) + \ 183 self.b * log(x), \ 184 self.a - self.n * psi(x) + self.b / x

185

186 - def initial_values(self, tol=1e-10):

187 """ 188 Generate initial values by doing fixed point 189 iterations to solve for alpha 190 """ 191 n, a, b = self.n, self.a, self.b 192 193 if abs(b) < 1e-10: 194 alpha = inv_psi(a / n) 195 else: 196 alpha = 1. 197 198 z = tol + 1. 199 200 while abs(z) > tol: 201 202 z = n * psi(alpha) - \ 203 b / numpy.clip(alpha, 1e-300, 1e300) - a 204 205 alpha -= z / (n * d_approx_psi(alpha) - b 206 / (alpha ** 2 + 1e-300)) 207 alpha = numpy.clip(alpha, 1e-100, 1e300) 208 209 return numpy.clip(alpha - 1 / (n + 1e-300), 1e-100, 1e300), \ 210 alpha + 1 / (n + 1e-300), alpha

211

212 213 -class GammaPosteriorSampler(ScaleMixturePriorEstimator):

214

215 - def __init__(self):

216 super(GammaPosteriorSampler, self).__init__() 217 self.n_samples = 2

218

219 - def get_scale_estimator(self):

220 return GammaScaleSampler()

221

222 - def estimate(self, context, data):

223 """ 224 Generate samples from the posterior of alpha and beta. 225 226 For beta the posterior is a gamma distribution and analytically 227 acessible. 228 229 The posterior of alpha can not be expressed analytically and is 230 aproximated using adaptive rejection sampling. 231 """ 232 pdf = GammaPrior() 233 234 ## sufficient statistics 235 236 a = numpy.mean(data) 237 b = exp(numpy.mean(log(data))) 238 v = numpy.std(data) ** 2 239 n = len(data) 240 241 beta = a / v 242 alpha = beta * a 243 samples = [] 244 245 for _i in range(self.n_samples): 246 247 ## sample beta from Gamma distribution 248 beta = numpy.random.gamma(n * alpha + context._hyper_beta.alpha, 249 1 / (n * a + context._hyper_beta.beta)) 250 251 ## sample alpha with ARS 252 logp = ARSPosteriorAlpha(n * log(beta * b)\ 253 - context.hyper_alpha.beta, 254 context.hyper_alpha.alpha - 1., n) 255 ars = csb.statistics.ars.ARS(logp) 256 ars.initialize(logp.initial_values()[:2], z0=0.) 257 alpha = ars.sample() 258 259 if alpha is None: 260 raise ValueError("ARS failed") 261 262 samples.append((alpha, beta)) 263 264 pdf.alpha, pdf.beta = samples[-1] 265 266 return pdf

267

268 -class GammaPosteriorMAP(ScaleMixturePriorEstimator):

269

270 - def __init__(self):

271 super(GammaPosteriorMAP, self).__init__()

272

273 - def get_scale_estimator(self):

274 return GammaScaleMAP()

275

276 - def estimate(self, context, data):

277 """ 278 Estimate alpha and beta from their posterior 279 """ 280 281 pdf = GammaPrior() 282 283 s = data[0].mean() 284 y = data[1].mean() - log(s) - 1. 285 286 alpha = abs(inv_digamma_minus_log(numpy.clip(y, 287 - 1e308, 288 - 1e-300))) 289 beta = alpha / s 290 291 292 pdf.alpha, pdf.beta = alpha, beta 293 return pdf

294

295 296 297 298 -class InvGammaPosteriorSampler(ScaleMixturePriorEstimator):

299

300 - def __init__(self):

301 super(InvGammaPosteriorSampler, self).__init__() 302 self.n_samples = 2

303

304 - def get_scale_estimator(self):

305 return InvGammaScaleSampler()

306

307 - def estimate(self, context, data):

308 """ 309 Generate samples from the posterior of alpha and beta. 310 311 For beta the posterior is a gamma distribution and analytically 312 acessible. 313 314 The posterior of alpha can not be expressed analytically and is 315 aproximated using adaptive rejection sampling. 316 """ 317 pdf = GammaPrior() 318 319 ## sufficient statistics 320 321 h = harmonic_mean(numpy.clip(data, 1e-308, 1e308)) 322 g = geometric_mean(numpy.clip(data, 1e-308, 1e308)) 323 324 n = len(data) 325 326 samples = [] 327 328 a = numpy.mean(1 / data) 329 v = numpy.std(1 / data) ** 2 330 331 beta = a / v 332 alpha = beta * a 333 334 for i in range(self.n_samples): 335 336 ## sample alpha with ARS 337 338 logp = ARSPosteriorAlpha(n * (log(beta) - log(g)) - context.hyper_alpha.beta, 339 context.hyper_alpha.alpha - 1., n) 340 ars = csb.statistics.ars.ARS(logp) 341 ars.initialize(logp.initial_values()[:2], z0=0.) 342 343 alpha = numpy.abs(ars.sample()) 344 345 if alpha is None: 346 raise ValueError("Sampling failed") 347 348 349 ## sample beta from Gamma distribution 350 351 beta = numpy.random.gamma(n * alpha + context.hyper_beta.alpha, \ 352 1 / (n / h + context.hyper_beta.beta)) 353 354 samples.append((alpha, beta)) 355 356 pdf.alpha, pdf.beta = samples[-1] 357 return pdf

358

359 360 -class InvGammaPosteriorMAP(ScaleMixturePriorEstimator):

361

362 - def __init__(self):

363 super(InvGammaPosteriorMAP, self).__init__()

364

365 - def get_scale_estimator(self):

366 return InvGammaScaleMAP()

367

368 - def estimate(self, context, data):

369 """ 370 Generate samples from the posterior of alpha and beta. 371 372 For beta the posterior is a gamma distribution and analytically 373 acessible. 374 375 The posterior of alpha can not be expressed analytically and is 376 aproximated using adaptive rejection sampling. 377 """ 378 pdf = GammaPrior() 379 380 381 y = log(data).mean() - log((data ** -1).mean()) 382 383 alpha = inv_digamma_minus_log(numpy.clip(y, 384 - 1e308, 385 - 1e-300)) 386 alpha = abs(alpha) 387 388 beta = numpy.clip(alpha / 389 (data ** (-1)).mean(), 390 1e-100, 1e100) 391 392 pdf.alpha, pdf.beta = alpha, beta 393 return pdf

394

395 396 397 -class GammaScaleSampler(AbstractEstimator):

398 """ 399 Sample the scalies given the data 400 """ 401

402 - def estimate(self, context, data):

403 pdf = ScaleMixture() 404 alpha = context.prior.alpha 405 beta = context.prior.beta 406 d = context._d 407 408 if len(data.shape) == 1: 409 data = data[:, numpy.newaxis] 410 411 a = alpha + 0.5 * d * len(data.shape) 412 b = beta + 0.5 * data.sum(-1) ** 2 413 414 s = numpy.clip(numpy.random.gamma(a, 1. / b), 1e-20, 1e10) 415 pdf.scales = s 416 417 context.prior.estimate(s) 418 pdf.prior = context.prior 419 420 return pdf

421

422 423 -class GammaScaleMAP(AbstractEstimator):

424 """ 425 MAP estimator of the scales 426 """ 427

428 - def estimate(self, context, data):

429 pdf = ScaleMixture() 430 alpha = context.prior.alpha 431 beta = context.prior.beta 432 d = context._d 433 434 if len(data.shape) == 1: 435 data = data[:, numpy.newaxis] 436 437 a = alpha + 0.5 * d * len(data.shape) 438 b = beta + 0.5 * data.sum(-1) ** 2 439 440 s = a / b 441 log_s = psi(a) - log(b) 442 443 pdf.scales = s 444 context.prior.estimate([s, log_s]) 445 pdf.prior = context.prior 446 447 return pdf

448

449 450 451 -class InvGammaScaleSampler(AbstractEstimator):

452 """ 453 Sample the scales given the data 454 """ 455

456 - def estimate(self, context, data):

457 pdf = ScaleMixture() 458 alpha = context.prior.alpha 459 beta = context.prior.beta 460 d = context._d 461 462 if len(data.shape) == 1: 463 data = data[:, numpy.newaxis] 464 465 p = -alpha + 0.5 * d 466 b = 2 * beta 467 a = 1e-5 + data.sum(-1) ** 2 468 469 s = csb.statistics.rand.gen_inv_gaussian(a, b, p) 470 s = numpy.clip(s, 1e-300, 1e300) 471 472 pdf.scales = s 473 context.prior.estimate(s) 474 pdf.prior = context.prior 475 476 return pdf

477

478 479 480 -class InvGammaScaleMAP(AbstractEstimator):

481 """ 482 MAP estimator of the scales 483 """

484 - def estimate(self, context, data):

485 pdf = ScaleMixture() 486 alpha = context.prior.alpha 487 beta = context.prior.beta 488 d = context._d 489 490 if len(data.shape) == 1: 491 data = data[:, numpy.newaxis] 492 493 p = -alpha + 0.5 * d 494 b = 2 * beta 495 a = 1e-5 + data.sum(-1) ** 2 496 497 s = numpy.clip((numpy.sqrt(b) * kve(p + 1, numpy.sqrt(a * b))) 498 / (numpy.sqrt(a) * kve(p, numpy.sqrt(a * b))), 499 1e-10, 1e10) 500 pdf.scales = s 501 context.prior.estimate(s) 502 pdf.prior = context.prior 503 504 return pdf

505

506 507 508 -class GammaPrior(ScaleMixturePrior, Gamma):

509 """ 510 Gamma prior on mixture weights including a weak gamma prior on its parameter. 511 """ 512

513 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.), 514 hyper_beta=(2., 1.)):

515 516 super(GammaPrior, self).__init__(alpha, beta) 517 518 self._hyper_alpha = Gamma(*hyper_alpha) 519 self._hyper_beta = Gamma(*hyper_beta) 520 self.estimator = GammaPosteriorSampler()

521 522 @typedproperty(AbstractDensity)

523 - def hyper_beta():

524 pass

525 526 @typedproperty(AbstractDensity)

527 - def hyper_alpha():

528 pass

529

530 - def log_prob(self, x):

531 532 a, b = self['alpha'], self['beta'] 533 534 l_a = self._hyper_alpha(a) 535 l_b = self._hyper_beta(b) 536 537 return super(GammaPrior, self).log_prob(x) + l_a + l_b

538

539 540 541 -class InvGammaPrior(ScaleMixturePrior, InverseGamma):

542 """ 543 Inverse Gamma prior on mixture weights including a weak gamma 544 prior on its parameter. 545 """ 546

547 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.), 548 hyper_beta=(2., 1.)):

549 550 super(InvGammaPrior, self).__init__(alpha, beta) 551 552 self._hyper_alpha = Gamma(*hyper_alpha) 553 self._hyper_beta = Gamma(*hyper_beta) 554 self.estimator = InvGammaPosteriorSampler()

555 556 @typedproperty(AbstractDensity)

557 - def hyper_beta():

558 pass

559 560 @typedproperty(AbstractDensity)

561 - def hyper_alpha():

562 pass

563

564 - def log_prob(self, x):

565 566 a, b = self['alpha'], self['beta'] 567 568 l_a = self._hyper_alpha(a) 569 l_b = self._hyper_beta(b) 570 571 return super(InvGammaPrior, self).log_prob(x) + l_a + l_b

572

Source Code for Module csb.statistics.scalemixture