1  """ 
  2  Statistics root package. 
  3   
  4  This package contains a number of common statistical utilities. Sub-packages 
  5  provide more specialized APIs, for example L{csb.statistics.pdf} defines the 
  6  probability density object model. 
  7  """ 
  8   
 10   
 11      total_mem = 1e8 
 12   
 16   
 18   
 19          from numpy import greater, reshape, concatenate 
 20           
 21          c = [] 
 22          x = reshape(x, (-1,)) 
 23   
 24          if nchunks is None: 
 25   
 26              total_size = len(x) * len(self.data) 
 27              nchunks = total_size / self.total_mem + int(total_size % self.total_mem != 0) 
 28               
 29          size = int(len(x) / nchunks + int(len(x) % nchunks != 0)) 
 30           
 31          while len(x): 
 32   
 33              y = x[:size] 
 34              x = x[size:] 
 35   
 36              c = concatenate((c, greater.outer(y, self.data).sum(1) / float(len(self.data)))) 
 37   
 38          return c 
  39       
  42   
 44      """ 
 45      @param x: 
 46      @param axis: compute the geometric mean along this axis  
 47   
 48      @return: geometric mean of x 
 49      """ 
 50      from numpy import exp, log, clip, mean 
 51   
 52      return exp(mean(log(clip(x, 1e-300, 1e300)), axis)) 
  53   
 55      """ 
 56      @param x: 
 57      @param axis: compute the harmonic mean along this axis  
 58   
 59      @return: harmonic mean of x 
 60      """ 
 61      from numpy import mean 
 62   
 63      return 1 / mean(1 / x, axis) 
  64   
 66      """ 
 67      @param x: random variables 
 68      @param axis: compute the kurtosis along this axis 
 69   
 70      @return: Sample kurtosis of x 
 71      """ 
 72      from numpy import mean, std 
 73   
 74      m = x.mean(axis) 
 75      a = mean((x - m) ** 4, axis) 
 76      s = std(x, axis) 
 77   
 78      return a / s ** 4 - 3 
  79   
 81      """ 
 82      @param x: random variables 
 83      @param axis: compute the skewness along this axis 
 84   
 85      @return: Sample skewness of x 
 86      """ 
 87      from numpy import mean, std 
 88   
 89      s = std(x) 
 90      return mean((x - x.mean()) ** 3, axis) / s ** 3 
  91       
 93      """ 
 94      auto-correlation of a times series 
 95   
 96      @param x: time series 
 97      @type x: numpy.array 
 98      @param n: Maximal lag for which to compute the auto-correlation 
 99      @type n: int 
100      """ 
101      from numpy import array, mean, std 
102      x = x - x.mean() 
103      return array([mean(x[i:] * x[:len(x) - i]) for i in range(n)]) / std(x)**2 
 104   
106      """ 
107      Probabilistic version of AND 
108      """ 
109      from numpy import array, multiply 
110      return multiply.reduce(array(p), axis=axis) 
 111   
118   
137   
139      """ 
140      Reconstruction of a multidimensional configuration that 
141      optimally reproduces the input distance matrix. 
142   
143      See: Gower, J (1966) 
144      """ 
145      from numpy import clip, sqrt, take, argsort, sort 
146      from csb.numeric import reverse 
147      from scipy.linalg import eigh 
148       
149       
150       
151      B = -clip(D, 1e-150, 1e150) ** 2 / 2. 
152   
153      b = B.mean(0) 
154   
155      B = B - b 
156      B = (B.T - b).T 
157      B += b.mean() 
158   
159       
160   
161      v, U = eigh(B) 
162      v = v.real 
163      U = U.real 
164   
165      U = take(U, argsort(v), 1) 
166      v = sort(v) 
167   
168      U = reverse(U, 1) 
169      v = reverse(v) 
170       
171      if nd is None: nd = len(v) 
172   
173      X = U[:, :nd] * sqrt(clip(v[:nd], 0., 1e300)) 
174   
175      return X 
 176   
178      """ 
179      Calculate the entropy of p.  
180      @return: entropy of p 
181      """ 
182      from csb.numeric import log 
183      from numpy import sum 
184       
185      return -sum(p * log(p)) 
 186   
187 -def histogram2D(x, nbins=100, axes=None, nbatch=1000, normalize=True): 
 188      """ 
189      Non-greedy two-dimensional histogram. 
190   
191      @param x: input array of rank two 
192      @type x: numpy array 
193      @param nbins: number of bins 
194      @type nbins: integer 
195      @param axes: x- and y-axes used for binning the data (if provided this will be used instead of <nbins>) 
196      @type axes: tuple of two one-dimensional numpy arrays 
197      @param nbatch: size of batch that is used to sort the data into the 2D grid 
198      @type nbatch: integer 
199      @param normalize: specifies whether histogram should be normalized 
200      @type normalize: boolean 
201   
202      @return: 2-rank array storing histogram, tuple of x- and y-axis 
203      """ 
204      from numpy import linspace, zeros, argmin, fabs, subtract, transpose 
205       
206      if axes is None: 
207           
208          lower, upper = x.min(0), x.max(0) 
209          axes = [linspace(lower[i], upper[i], nbins) for i in range(lower.shape[0])] 
210   
211      H = zeros((len(axes[0]), len(axes[1]))) 
212   
213      while len(x): 
214   
215          y = x[:nbatch] 
216          x = x[nbatch:] 
217   
218          I = transpose([argmin(fabs(subtract.outer(y[:, i], axes[i])), 1) for i in range(2)]) 
219   
220          for i, j in I: H[i, j] += 1 
221   
222      if normalize: 
223          H = H / H.sum() / (axes[0][1] - axes[0][0]) / (axes[1][1] - axes[1][0]) 
224   
225      return H, axes 
 226   
227 -def histogram_nd(x, nbins=100, axes=None, nbatch=1000, normalize=True): 
 228      """ 
229      Non-greedy n-dimemsional histogram. 
230   
231      @param x: input array of rank (-1,n) 
232      @type x: numpy array 
233      @param nbins: number of bins 
234      @type nbins: integer 
235      @param axes: axes used for binning the data (if provided this will be used instead of <nbins>) 
236      @type axes: tuple of two one-dimensional numpy arrays 
237      @param nbatch: size of batch that is used to sort the data into the nD grid 
238      @type nbatch: integer 
239      @param normalize: specifies whether histogram should be normalized 
240      @type normalize: boolean 
241   
242      @return: n-rank array storing histogram, tuple of axes 
243      """ 
244      import numpy as np 
245   
246      if len(x.shape) == 1: 
247          x = np.reshape(x, (-1,1)) 
248   
249      d = x.shape[1] 
250       
251      if axes is None: 
252           
253          lower, upper = x.min(0), x.max(0) 
254          axes = [np.linspace(lower[i], upper[i], nbins) for i in range(d)] 
255   
256      shape = tuple(map(len, axes)) 
257   
258      H = np.zeros(shape) 
259       
260       
261      s = np.multiply.accumulate(np.array((1,) + H.shape[1:]))[::-1] 
262      H = H.flatten() 
263       
264      while len(x): 
265   
266          y = x[:nbatch] 
267          x = x[nbatch:] 
268   
269          I = np.transpose([np.argmin(np.fabs(np.subtract.outer(y[:, i], axes[i])), 1) 
270                            for i in range(d)]) 
271          I = np.dot(I, s) 
272          I = np.sort(I) 
273   
274          i = list(set(I.tolist())) 
275          n = np.equal.outer(I, i).sum(0) 
276           
277          H[i] += n 
278           
279      if normalize: 
280          H = H / H.sum() / np.multiply.reduce([axes[i][1] - axes[i][0] for i in range(d)]) 
281   
282      H = np.reshape(H, shape) 
283   
284      return H, axes 
 285   
286 -def density(x, nbins, normalize=True): 
 287      """ 
288      Histogram of univariate input data: basically calls numpy's histogram method and 
289      does a proper normalization. 
290   
291      @param x: input numpy array 
292      @param nbins: number of bins 
293      @type nbins: integer 
294      @param normalize: if true, histogram will be normalized 
295      """ 
296      from numpy import histogram 
297       
298      hy, hx = histogram(x, nbins) 
299      hx = 0.5 * (hx[1:] + hx[:-1]) 
300      hy = hy.astype('d') 
301      if normalize: 
302          hy /= (hx[1] - hx[0]) * hy.sum() 
303   
304      return hx, hy 
 305   
307      """ 
308      Calculate circular variance of a circular variable. 
309   
310      @param a: input array 
311      @param axis: axis along which mean is calculated 
312      @type axis: None or integer     
313      """ 
314      from numpy import average, cos, sin 
315      return 1 - average(cos(a), axis) ** 2 - average(sin(a), axis) ** 2 
 316   
318      """ 
319      Estimate mean of a circular variable 
320   
321      @param a: input array 
322      @param axis: axis along which mean is calculated 
323      @type axis: None or integer 
324      """ 
325      from numpy import sin, cos, arctan2, average 
326      return arctan2(average(sin(a), axis), average(cos(a), axis)) 
 327   
329      """ 
330      Calculates a running average for given window size 
331   
332      @param x: input array 
333      @param w: window size 
334      @type w: integer 
335      @param axis: axis along which mean is calculated 
336      """ 
337      from numpy import array, mean 
338      return array([mean(x[i:i + w], axis) for i in range(len(x) - w)]) 
 339   
356