1 """
2 Probability density functions.
3
4 This module defines L{AbstractDensity}: a common interface for all PDFs.
5 Each L{AbstractDensity} describes a specific type of probability distribution,
6 for example L{Normal} is an implementation of the Gaussian distribution:
7
8 >>> pdf = Normal(mu=10, sigma=1.1)
9 >>> pdf.mu, pdf['sigma']
10 10.0, 1.1
11
12 Every PDF provides an implementation of the L{AbstractDensity.evaluate}
13 method, which evaluates the PDF for a list of input data points:
14
15 >>> pdf.evaluate([10, 9, 11, 12])
16 array([ 0.3626748 , 0.2399147 , 0.2399147 , 0.06945048])
17
18 PDF instances also behave like functions:
19
20 >>> pdf(data) # the same as pdf.evaluate(data)
21
22 Some L{AbstractDensity} implementations may support drawing random numbers from
23 the distribution (or raise an exception otherwise):
24
25 >>> pdf.random(2)
26 array([ 9.86257083, 9.73760515])
27
28 Each implementation of L{AbstractDensity} may support infinite number of estimators,
29 used to estimate and re-initialize the PDF parameters from a set of observed data
30 points:
31
32 >>> pdf.estimate([5, 5, 10, 10])
33 >>> pdf.mu, pdf.sigma
34 (7.5, 2.5)
35 >>> pdf.estimator
36 <csb.statistics.pdf.GaussianMLEstimator>
37
38 Estimators implement the L{AbstractEstimator} interface. They are treated as
39 pluggable tools, which can be exchanged through the L{AbstractDensity.estimator}
40 property (you could create, initialize and plug your own estimator as well).
41 This is a classic Strategy pattern.
42 """
43
44 import numpy.random
45 import scipy.special
46 import csb.core
47
48 from abc import ABCMeta, abstractmethod
49 from csb.core import OrderedDict
50
51 from csb.numeric import log, exp, psi, inv_psi, EULER_MASCHERONI
52 from scipy.special import gammaln
53 from numpy import array, fabs, power, sqrt, pi, mean, median, clip
58
61
73
76
78 """
79 Density parameter estimation strategy.
80 """
81
82 __metaclass__ = ABCMeta
83
84 @abstractmethod
86 """
87 Estimate the parameters of the distribution from same {data}.
88
89 @param context: context distribution
90 @type context: L{AbstractDensity}
91 @param data: sample values
92 @type data: array
93
94 @return: a new distribution, initialized with the estimated parameters
95 @rtype: L{AbstractDensity}
96
97 @raise EstimationFailureError: if estimation is not possible
98 """
99 pass
100
102 """
103 Does not estimate anything.
104 """
106 raise NotImplementedError()
107
118
129
143
163
165
166 - def __init__(self, minbeta=0.5, maxbeta=8.0, step=0.1):
173
175
176 pdf = GeneralizedNormal(1, 1, 1)
177 data = array(data)
178 logl = []
179
180 for beta in numpy.arange(self._minbeta, self._maxbeta, self._step):
181
182 self.update(pdf, data, beta)
183
184 l = pdf.log_prob(data).sum()
185 logl.append([beta, l])
186
187 logl = numpy.array(logl)
188
189
190 beta = logl[ numpy.argmax(logl[:, 1]) ][0]
191 self.update(pdf, data, beta)
192
193 return pdf
194
202
203 - def update(self, pdf, data, beta):
212
220
222
227
229
230 log_p = numpy.mean(log(data), 0)
231
232 e = numpy.mean(data, 0)
233 v = numpy.mean(data ** 2, 0)
234 q = (e[0] - v[0]) / (v[0] - e[0] ** 2)
235
236 a = e * q
237 y = a * 0
238 k = 0
239 while(sum(abs(y - a)) > self.tol and k < self.n_iter):
240 y = psi(sum(a)) + log_p
241 a = numpy.array(list(map(inv_psi, y)))
242 k += 1
243
244 return Dirichlet(a)
245
256
267
270 """
271 Defines the interface and common operations for all probability density
272 functions. This is a generic class which can operate on parameters of
273 any type (e.g. simple floats or custom parameter objects).
274
275 Subclasses must complete the implementation by implementing the
276 L{AbstractDensity.log_prob} method. Subclasses could also consider--but
277 are not obliged to--override the L{AbstractDensity.random} method. If
278 any of the density parameters need validation, subclasses are expected to
279 override the L{AbstractDensity._validate} method and raise
280 L{ParameterValueError} on validation failure. Note that implementing
281 parameter validation in property setters has almost no effect and is
282 discouraged.
283 """
284
285 __metaclass__ = ABCMeta
286
287
294
301
309
310 - def _set(self, param, value):
311 """
312 Update the C{value} of C{param}.
313 """
314 self._params[param] = value
315
316 @property
318 return self._estimator
319 @estimator.setter
321 if not isinstance(strategy, AbstractEstimator):
322 raise TypeError(strategy)
323 self._estimator = strategy
324
327
333
335 """
336 Register a new parameter name.
337 """
338 if name not in self._params:
339 self._params[name] = None
340
342 """
343 Parameter value validation hook.
344 @raise ParameterValueError: on failed validation (value not accepted)
345 """
346 pass
347
350
352
353 for p, v in zip(self.parameters, values):
354 self[p] = v
355
356 for p in named_params:
357 self[p] = named_params[p]
358
359 @property
361 """
362 Get a list of all distribution parameter names.
363 """
364 return tuple(self._params)
365
366 @abstractmethod
368 """
369 Evaluate the logarithm of the probability of observing values C{x}.
370
371 @param x: values
372 @type x: array
373 @rtype: array
374 """
375 pass
376
378 """
379 Evaluate the probability of observing values C{x}.
380
381 @param x: values
382 @type x: array
383 @rtype: array
384 """
385 x = numpy.array(x)
386 return exp(self.log_prob(x))
387
389 """
390 Generate random samples from the probability distribution.
391
392 @param size: number of values to sample
393 @type size: int
394 """
395 raise NotImplementedError()
396
422
425 """
426 Base abstract class for all PDFs, which operate on simple float
427 or array-of-float parameters. Parameters of any other type will trigger
428 TypeError-s.
429 """
430
431 - def _set(self, param, value):
442
444
454
459
460 @property
463 @b.setter
464 - def b(self, value):
466
467 @property
470 @mu.setter
471 - def mu(self, value):
473
480
487
489
499
500 @property
503 @mu.setter
504 - def mu(self, value):
506
507 @property
510 @sigma.setter
512 self['sigma'] = value
513
520
527
529
539
544
545 @property
548 @mu.setter
549 - def mu(self, value):
551
552 @property
555 @shape.setter
557 self['shape'] = value
558
560
561 mu = self.mu
562 scale = self.shape
563 x = numpy.array(x)
564
565 if numpy.min(x) <= 0:
566 raise ValueError('InverseGaussian is defined for x > 0')
567
568 y = -0.5 * scale * (x - mu) ** 2 / (mu ** 2 * x)
569 z = 0.5 * (log(scale) - log(2 * pi * x ** 3))
570 return z + y
571
572
587
589
590 - def __init__(self, mu=0, alpha=1, beta=1):
600
605
606 @property
609 @mu.setter
610 - def mu(self, value):
612
613 @property
616 @alpha.setter
618 self['alpha'] = value
619
620 @property
623 @beta.setter
624 - def beta(self, value):
626
634
636
646
651
652 @property
655 @a.setter
656 - def a(self, value):
658
659 @property
662 @b.setter
663 - def b(self, value):
665
666 @property
669 @p.setter
670 - def p(self, value):
672
674
675 a = self['a']
676 b = self['b']
677 p = self['p']
678
679 lz = 0.5 * p * (log(a) - log(b)) - log(2 * scipy.special.kv(p, sqrt(a * b)))
680
681 return lz + (p - 1) * log(x) - 0.5 * (a * x + b / x)
682
715
716 -class Gamma(BaseDensity):
717
726
731
732 @property
735 @alpha.setter
737 self['alpha'] = value
738
739 @property
742 @beta.setter
743 - def beta(self, value):
745
747
748 a, b = self['alpha'], self['beta']
749
750 return a * log(b) - gammaln(clip(a, 1e-308, 1e308)) + \
751 (a - 1) * log(clip(x, 1e-308, 1e308)) - b * x
752
755
757
766
771
772 @property
775 @alpha.setter
777 self['alpha'] = value
778
779 @property
782 @beta.setter
783 - def beta(self, value):
785
787 a, b = self['alpha'], self['beta']
788 return a * log(b) - gammaln(a) - (a + 1) * log(x) - b / x
789
792
794
795 - def __init__(self, mu=numpy.zeros(2), sigma=numpy.eye(2)):
799
802
804
805 from numpy.linalg import det
806
807 mu = self.mu
808 S = self.sigma
809 D = len(mu)
810 q = self.__q(x)
811 return -0.5 * (D * log(2 * pi) + log(abs(det(S)))) - 0.5 * q ** 2
812
814 from numpy import sum, dot, reshape
815 from numpy.linalg import inv
816
817 mu = self.mu
818 S = self.sigma
819
820 return sqrt(clip(sum(reshape((x - mu) * dot(x - mu, inv(S).T.squeeze()), (-1, len(mu))), -1), 0., 1e308))
821
823 """
824 Return the distribution along the dimensions
825 dims conditioned on x
826
827 @param x: conditional values
828 @param dims: new dimensions
829 """
830 from numpy import take, dot
831 from numpy.linalg import inv
832
833 dims2 = [i for i in range(self['mu'].shape[0]) if not i in dims]
834
835 mu1 = take(self['mu'], dims)
836 mu2 = take(self['mu'], dims2)
837
838
839 x2 = take(x, dims2)
840
841 A = take(take(self['Sigma'], dims, 0), dims, 1)
842 B = take(take(self['Sigma'], dims2, 0), dims2, 1)
843 C = take(take(self['Sigma'], dims, 0), dims2, 1)
844
845 mu = mu1 + dot(C, dot(inv(B), x2 - mu2))
846 Sigma = A - dot(C, dot(inv(B), C.T))
847
848 return MultivariateGaussian((mu, Sigma))
849
851
859
860 @property
863
864 @alpha.setter
866 self['alpha'] = numpy.ravel(value)
867
873
876
879
888
893
894 @property
897 @mu.setter
898 - def mu(self, value):
900
901 @property
904 @beta.setter
905 - def beta(self, value):
907
915
922
944