Package csb :: Package bio :: Package utils
Source Code for Package csb.bio.utils

  1  """ 
  2  Computational utility functions.  
  3   
  4  This module defines a number of low-level, numerical, high-performance utility 
  5  functions like L{rmsd} for example. 
  6  """ 
  7   
  8  import numpy 
  9  import numpy.random 
 10   
 11  import csb.numeric 
 12   
 13 -def fit(X, Y): 
 14      """ 
 15      Return the translation vector and the rotation matrix 
 16      minimizing the RMSD between two sets of d-dimensional 
 17      vectors, i.e. if 
 18        
 19          >>> R,t = fit(X,Y) 
 20           
 21      then 
 22       
 23          >>> Y = dot(Y, transpose(R)) + t 
 24           
 25      will be the fitted configuration. 
 26   
 27      @param X: (n, d) input vector 
 28      @type X: numpy array 
 29   
 30      @param Y: (n, d) input vector 
 31      @type Y: numpy array 
 32   
 33      @return: (d, d) rotation matrix and (d,) translation vector 
 34      @rtype: tuple 
 35      """ 
 36   
 37      from numpy.linalg import svd, det 
 38      from numpy import dot 
 39   
 40      ## center configurations 
 41   
 42      x = X.mean(0) 
 43      y = Y.mean(0) 
 44   
 45      ## SVD of correlation matrix 
 46   
 47      V, _L, U = svd(dot((X - x).T, Y - y)) 
 48   
 49      ## calculate rotation and translation 
 50   
 51      R = dot(V, U) 
 52   
 53      if det(R) < 0.: 
 54          U[-1] *= -1 
 55          R = dot(V, U) 
 56   
 57      t = x - dot(R, y) 
 58   
 59      return R, t 
 60   
 61 -def wfit(X, Y, w): 
 62      """ 
 63      Return the translation vector and the rotation matrix 
 64      minimizing the weighted RMSD between two sets of d-dimensional 
 65      vectors, i.e. if 
 66        
 67          >>> R,t = fit(X,Y) 
 68           
 69      then 
 70       
 71          >>> Y = dot(Y, transpose(R)) + t 
 72           
 73      will be the fitted configuration. 
 74   
 75      @param X: (n, d) input vector 
 76      @type X: numpy array 
 77   
 78      @param Y: (n, d) input vector 
 79      @type Y: numpy array 
 80   
 81      @param w: input weights 
 82      @type w: numpy array 
 83   
 84      @return: (d, d) rotation matrix and (d,) translation vector 
 85      @rtype: tuple 
 86      """ 
 87   
 88      from numpy.linalg import svd, det 
 89      from numpy import dot, sum, average 
 90   
 91      ## center configurations 
 92   
 93      norm = sum(w) 
 94      x = dot(w, X) / norm 
 95      y = dot(w, Y) / norm 
 96   
 97      ## SVD of correlation matrix 
 98   
 99      V, _L, U = svd(dot((X - x).T * w, Y - y)) 
100   
101      ## calculate rotation and translation 
102   
103      R = dot(V, U) 
104   
105      if det(R) < 0.: 
106          U[2] *= -1 
107          R = dot(V, U) 
108   
109      t = x - dot(R, y) 
110   
111      return R, t 
112   
113 -def scale_and_fit(X, Y, check_mirror_image=False): 
114      """ 
115      Return the translation vector, the rotation matrix and a 
116      global scaling factor minimizing the RMSD between two sets 
117      of d-dimensional vectors, i.e. if 
118        
119          >>> R, t, s = scale_and_fit(X, Y) 
120           
121      then 
122       
123          >>> Y = s * (dot(Y, R.T) + t) 
124           
125      will be the fitted configuration. 
126   
127      @param X: (n, d) input vector 
128      @type X: numpy array 
129   
130      @param Y: (n, d) input vector 
131      @type Y: numpy array 
132   
133      @return: (d, d) rotation matrix and (d,) translation vector 
134      @rtype: tuple 
135      """ 
136      from numpy.linalg import svd, det 
137      from numpy import dot, trace 
138   
139      ## centers 
140   
141      x, y = X.mean(0), Y.mean(0) 
142   
143      ## SVD of correlation matrix 
144   
145      V, L, U = svd(dot((X - x).T, Y - y)) 
146   
147      ## calculate rotation, scale and translation 
148   
149      R = dot(V, U) 
150       
151      if check_mirror_image and det(R) < 0: 
152   
153          U[-1] *= -1 
154          L[-1] *= -1 
155          R = dot(V, U) 
156           
157      s = (L.sum() / ((Y-y)**2).sum()) 
158      t = x / s - dot(R, y) 
159   
160      return R, t, s 
161   
162 -def probabilistic_fit(X, Y, w=None, niter=10): 
163      """ 
164      Generate a superposition of X, Y where:: 
165       
166          R ~ exp(trace(dot(transpose(dot(transpose(X-t), Y)), R))) 
167          t ~ N(t_opt, 1 / sqrt(N)) 
168   
169      @rtype: tuple 
170      """ 
171           
172      from csb.statistics.rand import random_rotation 
173      from numpy import dot, transpose, average 
174       
175      if w is None: 
176          R, t = fit(X, Y) 
177      else: 
178          R, t = wfit(X, Y, w) 
179               
180      N = len(X) 
181   
182      for i in range(niter): 
183          ## sample rotation 
184          if w is None: 
185              A = dot(transpose(X - t), Y) 
186          else: 
187              A = dot(transpose(X - t) * w, Y) 
188   
189          R = random_rotation(A) 
190   
191          ## sample translation (without prior so far) 
192          if w is None: 
193              mu = average(X - dot(Y, transpose(R)), 0) 
194              t = numpy.random.standard_normal(len(mu)) / numpy.sqrt(N) + mu 
195          else: 
196              mu = dot(w, X - dot(Y, transpose(R))) / numpy.sum(w) 
197              t = numpy.random.standard_normal(len(mu)) / numpy.sqrt(numpy.sum(w)) + mu 
198   
199      return R, t 
200       
201 -def fit_wellordered(X, Y, n_iter=None, n_stdv=2, tol_rmsd=.5, 
202                      tol_stdv=0.05, full_output=False): 
203      """ 
204      Match two arrays onto each other by iteratively throwing out 
205      highly deviating entries. 
206       
207      (Reference: Nilges et al.: Delineating well-ordered regions in 
208      protein structure ensembles). 
209   
210      @param X: (n, d) input vector 
211      @type X: numpy array 
212   
213      @param Y: (n, d) input vector 
214      @type Y: numpy array 
215   
216      @param n_stdv: number of standard deviations above which points are considered to be outliers 
217      @param tol_rmsd: tolerance in rmsd 
218      @param tol_stdv: tolerance in standard deviations 
219      @param full_output: also return full history of values calculated by the algorithm 
220   
221      @rtype: tuple 
222      """ 
223   
224      from numpy import ones, compress, dot, sqrt, sum, nonzero, std, average 
225   
226      rmsd_list = [] 
227   
228      rmsd_old = 0. 
229      stdv_old = 0. 
230   
231      n = 0 
232      converged = False 
233   
234      mask = ones(X.shape[0]) 
235   
236      while not converged: 
237   
238          ## find transformation for best match 
239   
240          R, t = fit(compress(mask, X, 0), compress(mask, Y, 0)) 
241   
242          ## calculate RMSD profile 
243   
244          d = sqrt(sum((X - dot(Y, R.T) - t) ** 2, 1)) 
245   
246          ## calculate rmsd and stdv 
247   
248          rmsd = sqrt(average(compress(mask, d) ** 2, 0)) 
249          stdv = std(compress(mask, d)) 
250   
251          ## check conditions for convergence 
252   
253          if stdv < 1e-10: break 
254   
255          d_rmsd = abs(rmsd - rmsd_old) 
256          d_stdv = abs(1 - stdv_old / stdv) 
257   
258          if d_rmsd < tol_rmsd: 
259              if d_stdv < tol_stdv: 
260                  converged = 1 
261              else: 
262                  stdv_old = stdv 
263          else: 
264              rmsd_old = rmsd 
265              stdv_old = stdv 
266   
267          ## store result 
268   
269          perc = average(1. * mask) 
270   
271          ## throw out non-matching rows 
272   
273          new_mask = mask * (d < rmsd + n_stdv * stdv) 
274          outliers = nonzero(mask - new_mask) 
275          rmsd_list.append([perc, rmsd, outliers]) 
276   
277          mask = new_mask 
278   
279          if n_iter and n >= n_iter: break 
280   
281          n += 1 
282   
283      if full_output: 
284          return (R, t), rmsd_list 
285      else: 
286          return (R, t) 
287   
288 -def bfit(X, Y, n_iter=10, distribution='student', em=False, full_output=False): 
289      """ 
290      Robust superposition of two coordinate arrays. Models non-rigid 
291      displacements with outlier-tolerant probability distributions. 
292   
293      @param X: (n, 3) input vector 
294      @type X: numpy.array 
295      @param Y: (n, 3) input vector 
296      @type Y: numpy.array 
297      @param n_iter: number of iterations 
298      @type n_iter: int 
299      @param distribution: student or k 
300      @type distribution: str 
301      @param em: use maximum a posteriori probability (MAP) estimator 
302      @type em: bool 
303      @param full_output: if true, return ((R, t), scales) 
304      @type full_output: bool 
305      @rtype: tuple 
306      """ 
307      from csb.statistics import scalemixture as sm 
308   
309      if distribution == 'student': 
310          prior = sm.GammaPrior() 
311          if em: 
312              prior.estimator = sm.GammaPosteriorMAP() 
313      elif distribution == 'k': 
314          prior = sm.InvGammaPrior() 
315          if em: 
316              prior.estimator = sm.InvGammaPosteriorMAP() 
317      else: 
318          raise AttributeError('distribution') 
319   
320      mixture = sm.ScaleMixture(scales=X.shape[0], prior=prior, d=3) 
321   
322      R, t = fit(X, Y) 
323   
324      for _ in range(n_iter): 
325          data = distance(X, transform(Y, R, t)) 
326          mixture.estimate(data) 
327          R, t = probabilistic_fit(X, Y, mixture.scales) 
328   
329      if full_output: 
330          return (R, t), mixture.scales 
331      else: 
332          return (R, t) 
333   
334 -def xfit(X, Y, n_iter=10, seed=False, full_output=False): 
335      """ 
336      Maximum likelihood superposition of two coordinate arrays. Works similar 
337      to U{Theseus<http://theseus3d.org>} and to L{bfit}. 
338   
339      @param X: (n, 3) input vector 
340      @type X: numpy.array 
341      @param Y: (n, 3) input vector 
342      @type Y: numpy.array 
343      @param n_iter: number of EM iterations 
344      @type n_iter: int 
345      @type seed: bool 
346      @param full_output: if true, return ((R, t), scales) 
347      @type full_output: bool 
348      @rtype: tuple 
349      """ 
350      if seed: 
351          R, t = numpy.identity(3), numpy.zeros(3) 
352      else: 
353          R, t = fit(X, Y) 
354   
355      for _ in range(n_iter): 
356          data = distance_sq(X, transform(Y, R, t)) 
357          scales = 1.0 / data.clip(1e-9) 
358          R, t = wfit(X, Y, scales) 
359   
360      if full_output: 
361          return (R, t), scales 
362      else: 
363          return (R, t) 
364   
365 -def transform(Y, R, t, s=None, invert=False): 
366      """ 
367      Transform C{Y} by rotation C{R} and translation C{t}. Optionally scale by C{s}. 
368   
369          >>> R, t = fit(X, Y) 
370          >>> Y_fitted = transform(Y, R, t) 
371   
372      @param Y: (n, d) input vector 
373      @type Y: numpy.array 
374      @param R: (d, d) rotation matrix 
375      @type R: numpy.array 
376      @param t: (d,) translation vector 
377      @type t: numpy.array 
378      @param s: scaling factor 
379      @type s: float 
380      @param invert: if True, apply the inverse transformation 
381      @type invert: bool 
382      @return: transformed input vector 
383      @rtype: numpy.array 
384      """ 
385      if invert: 
386          x = numpy.dot(Y - t, R) 
387          if s is not None: 
388              s = 1. / s 
389      else: 
390          x = numpy.dot(Y, R.T) + t 
391      if s is not None: 
392          x *= s 
393      return x 
394   
395 -def fit_transform(X, Y, fit=fit, *args): 
396      """ 
397      Return Y superposed on X. 
398   
399      @type X: (n,3) numpy.array 
400      @type Y: (n,3) numpy.array 
401      @type fit: function 
402      @rtype: (n,3) numpy.array 
403      """ 
404      return transform(Y, *fit(X, Y, *args)) 
405   
406 -def rmsd(X, Y): 
407      """ 
408      Calculate the root mean squared deviation (RMSD) using Kabsch' formula. 
409   
410      @param X: (n, d) input vector 
411      @type X: numpy array 
412   
413      @param Y: (n, d) input vector 
414      @type Y: numpy array 
415   
416      @return: rmsd value between the input vectors 
417      @rtype: float 
418      """ 
419   
420      from numpy import sum, dot, sqrt, clip, average 
421      from numpy.linalg import svd, det 
422   
423      X = X - X.mean(0) 
424      Y = Y - Y.mean(0) 
425   
426      R_x = sum(X ** 2) 
427      R_y = sum(Y ** 2) 
428   
429      V, L, U = svd(dot(Y.T, X)) 
430   
431      if det(dot(V, U)) < 0.: 
432          L[-1] *= -1 
433   
434      return sqrt(clip(R_x + R_y - 2 * sum(L), 0., 1e300) / len(X)) 
435   
436 -def rmsd_cur(X, Y): 
437      """ 
438      Calculate the RMSD of two conformations as they are (no fitting is done). 
439      For details, see L{rmsd}. 
440   
441      @return: rmsd value between the input vectors 
442      @rtype: float 
443      """ 
444      return distance_sq(X, Y).mean() ** 0.5 
445   
446 -def wrmsd(X, Y, w): 
447      """ 
448      Calculate the weighted root mean squared deviation (wRMSD) using Kabsch' 
449      formula. 
450   
451      @param X: (n, d) input vector 
452      @type X: numpy array 
453   
454      @param Y: (n, d) input vector 
455      @type Y: numpy array 
456   
457      @param w: input weights 
458      @type w: numpy array 
459   
460      @return: rmsd value between the input vectors 
461      @rtype: float 
462      """ 
463   
464      from numpy import sum, dot, sqrt, clip, average 
465      from numpy.linalg import svd 
466   
467      ## normalize weights 
468   
469      w = w / w.sum() 
470   
471      X = X - dot(w, X) 
472      Y = Y - dot(w, Y) 
473   
474      R_x = sum(X.T ** 2 * w) 
475      R_y = sum(Y.T ** 2 * w) 
476   
477      L = svd(dot(Y.T * w, X))[1] 
478   
479      return sqrt(clip(R_x + R_y - 2 * sum(L), 0., 1e300)) 
480   
481 -def torsion_rmsd(x, y): 
482      """ 
483      Compute the circular RMSD of two phi/psi angle sets. 
484       
485      @param x: query phi/psi angles (Nx2 array, in radians) 
486      @type x: array 
487      @param y: subject phi/psi angles (Nx2 array, in radians) 
488      @type y: array 
489       
490      @rtype: float 
491      """ 
492      from numpy import array, sin, cos, sqrt 
493       
494      phi, psi = (x - y).T 
495      assert len(phi) == len(psi) 
496       
497      r = sin(phi).sum() ** 2 + cos(phi).sum() ** 2 + sin(psi).sum() ** 2 + cos(psi).sum() ** 2 
498      return 1 - (1.0 / len(phi)) * sqrt(r / 2.0) 
499   
500 -def _tm_d0(Lmin): 
501       
502      from numpy import power 
503        
504      if Lmin > 15: 
505          d0 = 1.24 * power(Lmin - 15.0, 1.0 / 3.0) - 1.8 
506      else: 
507          d0 = 0.5 
508   
509      return max(0.5, d0) 
510   
511 -def tm_score(x, y, L=None, d0=None): 
512      """ 
513      Evaluate the TM-score of two conformations as they are (no fitting is done). 
514       
515      @param x: 3 x n input array 
516      @type x: numpy array 
517      @param y: 3 x n input array 
518      @type y: numpy array 
519      @param L: length for normalization (default: C{len(x)}) 
520      @type L: int 
521      @param d0: d0 in Angstroms (default: calculate from C{L}) 
522      @type d0: float 
523       
524      @return: computed TM-score 
525      @rtype: float 
526      """ 
527      from numpy import sum 
528   
529      if not L: 
530          L = len(x) 
531      if not d0: 
532          d0 = _tm_d0(L) 
533      d = distance(x, y) 
534   
535      return sum(1 / (1 + (d / d0) ** 2)) / L 
536   
537 -def tm_superimpose(x, y, fit_method=fit, L=None, d0=None, L_ini_min=4, iL_step=1): 
538      """ 
539      Compute the TM-score of two protein coordinate vector sets.  
540      Reference:  http://zhanglab.ccmb.med.umich.edu/TM-score 
541       
542      @param x: 3 x n input vector 
543      @type x: numpy.array 
544      @param y: 3 x n  input vector 
545      @type y: numpy.array 
546      @param fit_method: a reference to a proper fitting function, e.g. fit 
547                         or fit_wellordered     
548      @type fit_method: function 
549      @param L: length for normalization (default: C{len(x)}) 
550      @type L: int 
551      @param d0: d0 in Angstroms (default: calculate from C{L}) 
552      @type d0: float 
553      @param L_ini_min: minimum length of initial alignment window (increase 
554      to speed up but loose precision, a value of 0 disables local alignment 
555      initialization) 
556      @type L_ini_min: int 
557      @param iL_step: initial alignment window shift (increase to speed up 
558      but loose precision) 
559      @type iL_step: int 
560       
561      @return: rotation matrix, translation vector, TM-score 
562      @rtype: tuple 
563      """ 
564      from numpy import asarray, sum, dot, zeros, clip 
565   
566      x, y = asarray(x), asarray(y) 
567      if not L: 
568          L = len(x) 
569      if not d0: 
570          d0 = _tm_d0(L) 
571      d0_search = clip(d0, 4.5, 8.0) 
572      best = None, None, 0.0 
573   
574      L_ini_min = min(L, L_ini_min) if L_ini_min else L 
575      L_ini = [L_ini_min] + list(filter(lambda x: x > L_ini_min, 
576              [L // (2 ** n_init) for n_init in range(6)])) 
577   
578      # the outer two loops define a sliding window of different sizes for the 
579      # initial local alignment (disabled with L_ini_min=0) 
580      for L_init in L_ini: 
581          for iL in range(0, L - L_init + 1, min(L_init, iL_step)): 
582              mask = zeros(L, bool) 
583              mask[iL:iL + L_init] = True 
584   
585              # refine mask until convergence, similar to fit_wellordered 
586              for i in range(20): 
587                  R, t = fit_method(x[mask], y[mask]) 
588   
589                  d = distance(x, dot(y, R.T) + t) 
590                  score = sum(1 / (1 + (d / d0) ** 2)) / L 
591   
592                  if score > best[-1]: 
593                      best = R, t, score 
594   
595                  mask_prev = mask 
596                  cutoff = d0_search + (-1 if i == 0 else 1) 
597                  while True: 
598                      mask = d < cutoff 
599                      if sum(mask) >= 3 or 3 >= len(mask): 
600                          break 
601                      cutoff += 0.5 
602   
603                  if (mask == mask_prev).all(): 
604                      break 
605   
606      return best 
607   
608 -def center_of_mass(x, m=None): 
609      """ 
610      Compute the mean of a set of (optionally weighted) points. 
611   
612      @param x: array of rank (n,d) where n is the number of points 
613                and d the dimension 
614      @type x: numpy.array 
615      @param m: rank (n,) array of masses / weights 
616      @type m: numpy.array 
617   
618      @return: center of mass 
619      @rtype: (d,) numpy.array 
620      """ 
621      if m is None: 
622          return x.mean(0) 
623      else: 
624          from numpy import dot 
625   
626          return dot(m, x) / m.sum() 
627       
628 -def radius_of_gyration(x, m=None): 
629      """ 
630      Compute the radius of gyration of a set of (optionally weighted) points. 
631   
632      @param x: array of rank (n,d) where n is the number of points 
633                and d the dimension 
634      @type x: numpy.array 
635      @param m: rank (n,) array of masses / weights 
636      @type m: numpy.array 
637   
638      @return: center of mass 
639      @rtype: (d,) numpy.array 
640      """ 
641      from numpy import sqrt, dot 
642       
643      x = x - center_of_mass(x, m) 
644      if m is None: 
645          return sqrt((x ** 2).sum() / len(x)) 
646      else: 
647          return sqrt(dot(m, (x ** 2).sum(1)) / m.sum()) 
648   
649 -def second_moments(x, m=None): 
650      """ 
651      Compute the tensor second moments of a set of (optionally weighted) points. 
652   
653      @param x: array of rank (n,d) where n is the number of points 
654                and d the dimension 
655      @type x: numpy.array 
656      @param m: rank (n,) array of masses / weights 
657      @type m: numpy.array 
658   
659      @return: second moments 
660      @rtype: (d,d) numpy.array 
661      """ 
662      from numpy import dot 
663   
664      x = (x - center_of_mass(x, m)).T 
665       
666      if m is not None: 
667          return dot(x * m, x.T) 
668      else: 
669          return dot(x, x.T) 
670   
671 -def inertia_tensor(x, m=None): 
672      """ 
673      Compute the inertia tensor of a set of (optionally weighted) points. 
674   
675      @param x: array of rank (n,d) where n is the number of points 
676                and d the dimension 
677      @type x: numpy.array 
678      @param m: rank (n,) array of masses / weights 
679      @type m: numpy.array 
680   
681      @return: inertia tensor 
682      @rtype: (d,d) numpy.array 
683      """ 
684      from numpy import dot, eye 
685   
686      x = (x - center_of_mass(x, m)).T 
687      r2 = (x ** 2).sum(0) 
688       
689      if m is not None: 
690          return eye(x.shape[0]) * dot(m, r2) - dot(x * m, x.T) 
691      else: 
692          return eye(x.shape[0]) * r2.sum() - dot(x, x.T) 
693   
694 -def find_pairs(cutoff, X, Y=None): 
695      """ 
696      Find pairs with euclidean distance below C{cutoff}. Either between 
697      C{X} and C{Y}, or within C{X} if C{Y} is C{None}. 
698   
699      Uses a KDTree and thus is memory efficient and reasonable fast. 
700   
701      @type cutoff: float 
702      @type X: (m,n) numpy.array 
703      @type Y: (k,n) numpy.array 
704      @return: set of index tuples 
705      @rtype: iterable 
706      """ 
707      try: 
708          from scipy.spatial import cKDTree as KDTree 
709          KDTree.query_pairs 
710          KDTree.query_ball_tree 
711      except (ImportError, AttributeError): 
712          from scipy.spatial import KDTree 
713   
714      tree = KDTree(X, len(X)) 
715      if Y is None: 
716          return tree.query_pairs(cutoff) 
717   
718      other = KDTree(Y, len(Y)) 
719      contacts = tree.query_ball_tree(other, cutoff) 
720      return ((i, j) for (i, js) in enumerate(contacts) for j in js) 
721   
722 -def distance_matrix(X, Y=None): 
723      """ 
724      Calculates a matrix of pairwise distances 
725   
726      @param X: m x n input vector 
727      @type X: numpy array 
728   
729      @param Y: k x n input vector or None, which defaults to Y=X 
730      @type Y: numpy array 
731   
732      @return: m x k distance matrix 
733      @rtype: numpy array 
734      """ 
735      from numpy import add, clip, sqrt, dot, transpose, sum 
736   
737      if Y is None: Y = X 
738   
739      if X.ndim < 2: X = X.reshape((1, -1)) 
740      if Y.ndim < 2: Y = Y.reshape((1, -1)) 
741   
742      C = dot(X, transpose(Y)) 
743      S = add.outer(sum(X ** 2, 1), sum(Y ** 2, 1)) 
744   
745      return sqrt(clip(S - 2 * C, 0., 1e300)) 
746   
747 -def distance_sq(X, Y): 
748      """ 
749      Squared distance between C{X} and C{Y} along the last axis. For details, see L{distance}. 
750   
751      @return: scalar or array of length m 
752      @rtype: (m,) numpy.array 
753      """ 
754      return ((numpy.asarray(X) - Y) ** 2).sum(-1) 
755   
756 -def distance(X, Y): 
757      """ 
758      Distance between C{X} and C{Y} along the last axis. 
759   
760      @param X: m x n input vector 
761      @type X: numpy array 
762      @param Y: m x n input vector 
763      @type Y: numpy array 
764      @return: scalar or array of length m 
765      @rtype: (m,) numpy.array 
766      """ 
767      return distance_sq(X, Y) ** 0.5 
768   
769 -def average_structure(X): 
770      """ 
771      Calculate an average structure from an ensemble of structures 
772      (i.e. X is a rank-3 tensor: X[i] is a (N,3) configuration matrix). 
773   
774      @param X: m x n x 3 input vector 
775      @type X: numpy array 
776       
777      @return: average structure 
778      @rtype: (n,3) numpy.array 
779      """ 
780      from numpy.linalg import eigh 
781   
782      B = csb.numeric.gower_matrix(X) 
783      v, U = eigh(B) 
784      if numpy.iscomplex(v).any(): 
785          v = v.real 
786      if numpy.iscomplex(U).any(): 
787          U = U.real 
788   
789      indices = numpy.argsort(v)[-3:] 
790      v = numpy.take(v, indices, 0) 
791      U = numpy.take(U, indices, 1) 
792           
793      x = U * numpy.sqrt(v) 
794      i = 0 
795      while is_mirror_image(x, X[0]) and i < 2: 
796          x[:, i] *= -1 
797          i += 1 
798      return x 
799   
800 -def is_mirror_image(X, Y): 
801      """ 
802      Check if two configurations X and Y are mirror images 
803      (i.e. their optimal superposition involves a reflection). 
804   
805      @param X: n x 3 input vector 
806      @type X: numpy array 
807      @param Y: n x 3 input vector 
808      @type Y: numpy array 
809    
810      @rtype: bool 
811      """ 
812      from numpy.linalg import det, svd 
813       
814      ## center configurations 
815   
816      X = X - numpy.mean(X, 0) 
817      Y = Y - numpy.mean(Y, 0) 
818   
819      ## SVD of correlation matrix 
820   
821      V, L, U = svd(numpy.dot(numpy.transpose(X), Y))             #@UnusedVariable 
822   
823      R = numpy.dot(V, U) 
824   
825      return det(R) < 0 
826   
827 -def deg(x): 
828      """ 
829      Convert an array of torsion angles in radians to torsion degrees 
830      ranging from -180 to 180. 
831   
832      @param x: array of angles 
833      @type x: numpy array 
834    
835      @rtype: numpy array      
836      """ 
837      from csb.bio.structure import TorsionAngles 
838       
839      func = numpy.vectorize(TorsionAngles.deg) 
840      return func(x) 
841   
842 -def rad(x): 
843      """ 
844      Convert an array of torsion angles in torsion degrees to radians. 
845   
846      @param x: array of angles 
847      @type x: numpy array 
848    
849      @rtype: numpy array      
850      """ 
851      from csb.bio.structure import TorsionAngles 
852       
853      func = numpy.vectorize(TorsionAngles.rad) 
854      return func(x) 
855   
856  # vi:expandtab:smarttab:sw=4 
857