| Home | Trees | Indices | Help |
|
|---|
|
|
1 """
2 Probability density functions.
3
4 This module defines L{AbstractDensity}: a common interface for all PDFs.
5 Each L{AbstractDensity} describes a specific type of probability distribution,
6 for example L{Normal} is an implementation of the Gaussian distribution:
7
8 >>> pdf = Normal(mu=10, sigma=1.1)
9 >>> pdf.mu, pdf['sigma']
10 10.0, 1.1
11
12 Every PDF provides an implementation of the L{AbstractDensity.evaluate}
13 method, which evaluates the PDF for a list of input data points:
14
15 >>> pdf.evaluate([10, 9, 11, 12])
16 array([ 0.3626748 , 0.2399147 , 0.2399147 , 0.06945048])
17
18 PDF instances also behave like functions:
19
20 >>> pdf(data) # the same as pdf.evaluate(data)
21
22 Some L{AbstractDensity} implementations may support drawing random numbers from
23 the distribution (or raise an exception otherwise):
24
25 >>> pdf.random(2)
26 array([ 9.86257083, 9.73760515])
27
28 Each implementation of L{AbstractDensity} may support infinite number of estimators,
29 used to estimate and re-initialize the PDF parameters from a set of observed data
30 points:
31
32 >>> pdf.estimate([5, 5, 10, 10])
33 >>> pdf.mu, pdf.sigma
34 (7.5, 2.5)
35 >>> pdf.estimator
36 <csb.statistics.pdf.GaussianMLEstimator>
37
38 Estimators implement the L{AbstractEstimator} interface. They are treated as
39 pluggable tools, which can be exchanged through the L{AbstractDensity.estimator}
40 property (you could create, initialize and plug your own estimator as well).
41 This is a classic Strategy pattern.
42 """
43
44 import numpy.random
45 import scipy.special
46 import csb.core
47
48 from abc import ABCMeta, abstractmethod
49 from csb.core import OrderedDict
50
51 from csb.numeric import log, exp, psi, inv_psi, EULER_MASCHERONI
52 from scipy.special import gammaln
53 from numpy import array, fabs, power, sqrt, pi, mean, median, clip
58
61
63
65
66 self.param = param
67 self.value = value
68
69 super(ParameterValueError, self).__init__(param, value)
70
73
76
78 """
79 Density parameter estimation strategy.
80 """
81
82 __metaclass__ = ABCMeta
83
84 @abstractmethod
86 """
87 Estimate the parameters of the distribution from same {data}.
88
89 @param context: context distribution
90 @type context: L{AbstractDensity}
91 @param data: sample values
92 @type data: array
93
94 @return: a new distribution, initialized with the estimated parameters
95 @rtype: L{AbstractDensity}
96
97 @raise EstimationFailureError: if estimation is not possible
98 """
99 pass
100
107
118
129
143
163
165
167
168 self._minbeta = minbeta
169 self._maxbeta = maxbeta
170 self._step = step
171
172 super(GenNormalBruteForceEstimator, self).__init__()
173
175
176 pdf = GeneralizedNormal(1, 1, 1)
177 data = array(data)
178 logl = []
179
180 for beta in numpy.arange(self._minbeta, self._maxbeta, self._step):
181
182 self.update(pdf, data, beta)
183
184 l = pdf.log_prob(data).sum()
185 logl.append([beta, l])
186
187 logl = numpy.array(logl)
188
189 # optimal parameters:
190 beta = logl[ numpy.argmax(logl[:, 1]) ][0]
191 self.update(pdf, data, beta)
192
193 return pdf
194
196
197 mu = median(data)
198 v = mean((data - mu) ** 2)
199 alpha = sqrt(v * exp(gammaln(1. / beta) - gammaln(3. / beta)))
200
201 return mu, alpha
202
212
220
222
227
229
230 log_p = numpy.mean(log(data), 0)
231
232 e = numpy.mean(data, 0)
233 v = numpy.mean(data ** 2, 0)
234 q = (e[0] - v[0]) / (v[0] - e[0] ** 2)
235
236 a = e * q
237 y = a * 0
238 k = 0
239 while(sum(abs(y - a)) > self.tol and k < self.n_iter):
240 y = psi(sum(a)) + log_p
241 a = numpy.array(list(map(inv_psi, y)))
242 k += 1
243
244 return Dirichlet(a)
245
256
267
270 """
271 Defines the interface and common operations for all probability density
272 functions. This is a generic class which can operate on parameters of
273 any type (e.g. simple floats or custom parameter objects).
274
275 Subclasses must complete the implementation by implementing the
276 L{AbstractDensity.log_prob} method. Subclasses could also consider--but
277 are not obliged to--override the L{AbstractDensity.random} method. If
278 any of the density parameters need validation, subclasses are expected to
279 override the L{AbstractDensity._validate} method and raise
280 L{ParameterValueError} on validation failure. Note that implementing
281 parameter validation in property setters has almost no effect and is
282 discouraged.
283 """
284
285 __metaclass__ = ABCMeta
286
287
289
290 self._params = OrderedDict()
291 self._estimator = None
292
293 self.estimator = NullEstimator()
294
296
297 if param in self._params:
298 return self._params[param]
299 else:
300 raise ParameterNotFoundError(param)
301
303
304 if param in self._params:
305 self._validate(param, value)
306 self._set(param, value)
307 else:
308 raise ParameterNotFoundError(param)
309
315
316 @property
319 @estimator.setter
321 if not isinstance(strategy, AbstractEstimator):
322 raise TypeError(strategy)
323 self._estimator = strategy
324
327
329 name = self.__class__.__name__
330 params = ', '.join([ '{0}={1}'.format(p, v) for p, v in self._params.items() ])
331
332 return '{0}({1})'.format(name, params)
333
335 """
336 Register a new parameter name.
337 """
338 if name not in self._params:
339 self._params[name] = None
340
342 """
343 Parameter value validation hook.
344 @raise ParameterValueError: on failed validation (value not accepted)
345 """
346 pass
347
350
352
353 for p, v in zip(self.parameters, values):
354 self[p] = v
355
356 for p in named_params:
357 self[p] = named_params[p]
358
359 @property
365
366 @abstractmethod
368 """
369 Evaluate the logarithm of the probability of observing values C{x}.
370
371 @param x: values
372 @type x: array
373 @rtype: array
374 """
375 pass
376
378 """
379 Evaluate the probability of observing values C{x}.
380
381 @param x: values
382 @type x: array
383 @rtype: array
384 """
385 x = numpy.array(x)
386 return exp(self.log_prob(x))
387
389 """
390 Generate random samples from the probability distribution.
391
392 @param size: number of values to sample
393 @type size: int
394 """
395 raise NotImplementedError()
396
398 """
399 Estimate and load the parameters of the distribution from sample C{data}
400 using the current L{AbstractEstimator} strategy.
401
402 @param data: sample values
403 @type data: array
404
405 @raise NotImplementedError: when no estimator is available for this
406 distribution
407 @raise IncompatibleEstimatorError: when the current estimator is not
408 compatible with this pdf
409 """
410
411 try:
412 pdf = self.estimator.estimate(self, data)
413
414 for param in pdf.parameters:
415 self[param] = pdf[param]
416
417 except ParameterNotFoundError as e:
418 raise IncompatibleEstimatorError(self.estimator)
419
420 except ParameterValueError as e:
421 raise EstimationFailureError(e.param, e.value)
422
425 """
426 Base abstract class for all PDFs, which operate on simple float
427 or array-of-float parameters. Parameters of any other type will trigger
428 TypeError-s.
429 """
430
442
444
446
447 super(Laplace, self).__init__()
448
449 self._register('mu')
450 self._register('b')
451
452 self.set_params(b=b, mu=mu)
453 self.estimator = LaplaceMLEstimator()
454
459
460 @property
463 @b.setter
465 self['b'] = value
466
467 @property
470 @mu.setter
472 self['mu'] = value
473
480
487
489
491
492 super(Normal, self).__init__()
493
494 self._register('mu')
495 self._register('sigma')
496
497 self.set_params(mu=mu, sigma=sigma)
498 self.estimator = GaussianMLEstimator()
499
500 @property
503 @mu.setter
505 self['mu'] = value
506
507 @property
510 @sigma.setter
512 self['sigma'] = value
513
515
516 mu = self.mu
517 sigma = self.sigma
518
519 return log(1.0 / sqrt(2 * pi * sigma ** 2)) - (x - mu) ** 2 / (2 * sigma ** 2)
520
527
529
531
532 super(InverseGaussian, self).__init__()
533
534 self._register('mu')
535 self._register('shape')
536
537 self.set_params(mu=mu, shape=shape)
538 self.estimator = InverseGaussianMLEstimator()
539
544
545 @property
548 @mu.setter
550 self['mu'] = value
551
552 @property
555 @shape.setter
557 self['shape'] = value
558
560
561 mu = self.mu
562 scale = self.shape
563 x = numpy.array(x)
564
565 if numpy.min(x) <= 0:
566 raise ValueError('InverseGaussian is defined for x > 0')
567
568 y = -0.5 * scale * (x - mu) ** 2 / (mu ** 2 * x)
569 z = 0.5 * (log(scale) - log(2 * pi * x ** 3))
570 return z + y
571
572
574
575 mu = self.mu
576 shape = self.shape
577
578 mu_2l = mu / shape / 2.
579 Y = numpy.random.standard_normal(size)
580 Y = mu * Y ** 2
581 X = mu + mu_2l * (Y - sqrt(4 * shape * Y + Y ** 2))
582 U = numpy.random.random(size)
583
584 m = numpy.less_equal(U, mu / (mu + X))
585
586 return m * X + (1 - m) * mu ** 2 / X
587
589
591
592 super(GeneralizedNormal, self).__init__()
593
594 self._register('mu')
595 self._register('alpha')
596 self._register('beta')
597
598 self.set_params(mu=mu, alpha=alpha, beta=beta)
599 self.estimator = GenNormalBruteForceEstimator()
600
605
606 @property
609 @mu.setter
611 self['mu'] = value
612
613 @property
616 @alpha.setter
618 self['alpha'] = value
619
620 @property
623 @beta.setter
625 self['beta'] = value
626
634
636
638 super(GeneralizedInverseGaussian, self).__init__()
639
640 self._register('a')
641 self._register('b')
642 self._register('p')
643 self.set_params(a=a, b=b, p=p)
644
645 self.estimator = NullEstimator()
646
651
652 @property
655 @a.setter
657 self['a'] = value
658
659 @property
662 @b.setter
664 self['b'] = value
665
666 @property
669 @p.setter
671 self['p'] = value
672
674
675 a = self['a']
676 b = self['b']
677 p = self['p']
678
679 lz = 0.5 * p * (log(a) - log(b)) - log(2 * scipy.special.kv(p, sqrt(a * b)))
680
681 return lz + (p - 1) * log(x) - 0.5 * (a * x + b / x)
682
684
685 from csb.statistics.rand import inv_gaussian
686
687 rvs = []
688 burnin = 10
689 a = self['a']
690 b = self['b']
691 p = self['p']
692
693 s = a * 0. + 1.
694
695 if p < 0:
696 a, b = b, a
697
698 if size == None:
699 size = 1
700 for i in range(int(size)):
701 for j in range(burnin):
702
703 l = b + 2 * s
704 m = sqrt(l / a)
705
706 x = inv_gaussian(m, l, shape=m.shape)
707 s = numpy.random.gamma(abs(p) + 0.5, x)
708
709 if p >= 0:
710 rvs.append(x)
711 else:
712 rvs.append(1 / x)
713
714 return numpy.array(rvs)
715
717
719 super(Gamma, self).__init__()
720
721 self._register('alpha')
722 self._register('beta')
723
724 self.set_params(alpha=alpha, beta=beta)
725 self.estimator = GammaMLEstimator()
726
731
732 @property
735 @alpha.setter
737 self['alpha'] = value
738
739 @property
742 @beta.setter
744 self['beta'] = value
745
747
748 a, b = self['alpha'], self['beta']
749
750 return a * log(b) - gammaln(clip(a, 1e-308, 1e308)) + \
751 (a - 1) * log(clip(x, 1e-308, 1e308)) - b * x
752
755
757
759 super(InverseGamma, self).__init__()
760
761 self._register('alpha')
762 self._register('beta')
763
764 self.set_params(alpha=alpha, beta=beta)
765 self.estimator = NullEstimator()
766
771
772 @property
775 @alpha.setter
777 self['alpha'] = value
778
779 @property
782 @beta.setter
784 self['beta'] = value
785
787 a, b = self['alpha'], self['beta']
788 return a * log(b) - gammaln(a) - (a + 1) * log(x) - b / x
789
792
794
796
797 super(MultivariateGaussian, self).__init__(mu, sigma)
798 self.estimator = MultivariateGaussianMLEstimator()
799
802
804
805 from numpy.linalg import det
806
807 mu = self.mu
808 S = self.sigma
809 D = len(mu)
810 q = self.__q(x)
811 return -0.5 * (D * log(2 * pi) + log(abs(det(S)))) - 0.5 * q ** 2
812
814 from numpy import sum, dot, reshape
815 from numpy.linalg import inv
816
817 mu = self.mu
818 S = self.sigma
819
820 return sqrt(clip(sum(reshape((x - mu) * dot(x - mu, inv(S).T.squeeze()), (-1, len(mu))), -1), 0., 1e308))
821
823 """
824 Return the distribution along the dimensions
825 dims conditioned on x
826
827 @param x: conditional values
828 @param dims: new dimensions
829 """
830 from numpy import take, dot
831 from numpy.linalg import inv
832
833 dims2 = [i for i in range(self['mu'].shape[0]) if not i in dims]
834
835 mu1 = take(self['mu'], dims)
836 mu2 = take(self['mu'], dims2)
837
838 # x1 = take(x, dims)
839 x2 = take(x, dims2)
840
841 A = take(take(self['Sigma'], dims, 0), dims, 1)
842 B = take(take(self['Sigma'], dims2, 0), dims2, 1)
843 C = take(take(self['Sigma'], dims, 0), dims2, 1)
844
845 mu = mu1 + dot(C, dot(inv(B), x2 - mu2))
846 Sigma = A - dot(C, dot(inv(B), C.T))
847
848 return MultivariateGaussian((mu, Sigma))
849
851
853 super(Dirichlet, self).__init__()
854
855 self._register('alpha')
856
857 self.set_params(alpha=alpha)
858 self.estimator = DirichletEstimator()
859
860 @property
863
864 @alpha.setter
866 self['alpha'] = numpy.ravel(value)
867
869 #TODO check wether x is in the probability simplex
870 alpha = self.alpha
871 return gammaln(sum(alpha)) - sum(gammaln(alpha)) \
872 + numpy.dot((alpha - 1).T, log(x).T)
873
876
879
881 super(GumbelMinimum, self).__init__()
882
883 self._register('mu')
884 self._register('beta')
885
886 self.set_params(mu=mu, beta=beta)
887 self.estimator = GumbelMinMomentsEstimator()
888
893
894 @property
897 @mu.setter
899 self['mu'] = value
900
901 @property
904 @beta.setter
906 self['beta'] = value
907
909
910 mu = self.mu
911 beta = self.beta
912
913 z = (x - mu) / beta
914 return log(1. / beta) + z - exp(z)
915
922
944
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Jul 4 20:19:11 2017 | http://epydoc.sourceforge.net |