1 """
2 Computational utility functions.
3
4 This module defines a number of low-level, numerical, high-performance utility
5 functions like L{rmsd} for example.
6 """
7
8 import numpy
9 import numpy.random
10
11 import csb.numeric
12
14 """
15 Return the translation vector and the rotation matrix
16 minimizing the RMSD between two sets of d-dimensional
17 vectors, i.e. if
18
19 >>> R,t = fit(X,Y)
20
21 then
22
23 >>> Y = dot(Y, transpose(R)) + t
24
25 will be the fitted configuration.
26
27 @param X: (n, d) input vector
28 @type X: numpy array
29
30 @param Y: (n, d) input vector
31 @type Y: numpy array
32
33 @return: (d, d) rotation matrix and (d,) translation vector
34 @rtype: tuple
35 """
36
37 from numpy.linalg import svd, det
38 from numpy import dot
39
40
41
42 x = X.mean(0)
43 y = Y.mean(0)
44
45
46
47 V, _L, U = svd(dot((X - x).T, Y - y))
48
49
50
51 R = dot(V, U)
52
53 if det(R) < 0.:
54 U[-1] *= -1
55 R = dot(V, U)
56
57 t = x - dot(R, y)
58
59 return R, t
60
62 """
63 Return the translation vector and the rotation matrix
64 minimizing the weighted RMSD between two sets of d-dimensional
65 vectors, i.e. if
66
67 >>> R,t = fit(X,Y)
68
69 then
70
71 >>> Y = dot(Y, transpose(R)) + t
72
73 will be the fitted configuration.
74
75 @param X: (n, d) input vector
76 @type X: numpy array
77
78 @param Y: (n, d) input vector
79 @type Y: numpy array
80
81 @param w: input weights
82 @type w: numpy array
83
84 @return: (d, d) rotation matrix and (d,) translation vector
85 @rtype: tuple
86 """
87
88 from numpy.linalg import svd, det
89 from numpy import dot, sum, average
90
91
92
93 norm = sum(w)
94 x = dot(w, X) / norm
95 y = dot(w, Y) / norm
96
97
98
99 V, _L, U = svd(dot((X - x).T * w, Y - y))
100
101
102
103 R = dot(V, U)
104
105 if det(R) < 0.:
106 U[2] *= -1
107 R = dot(V, U)
108
109 t = x - dot(R, y)
110
111 return R, t
112
114 """
115 Return the translation vector, the rotation matrix and a
116 global scaling factor minimizing the RMSD between two sets
117 of d-dimensional vectors, i.e. if
118
119 >>> R, t, s = scale_and_fit(X, Y)
120
121 then
122
123 >>> Y = s * (dot(Y, R.T) + t)
124
125 will be the fitted configuration.
126
127 @param X: (n, d) input vector
128 @type X: numpy array
129
130 @param Y: (n, d) input vector
131 @type Y: numpy array
132
133 @return: (d, d) rotation matrix and (d,) translation vector
134 @rtype: tuple
135 """
136 from numpy.linalg import svd, det
137 from numpy import dot, trace
138
139
140
141 x, y = X.mean(0), Y.mean(0)
142
143
144
145 V, L, U = svd(dot((X - x).T, Y - y))
146
147
148
149 R = dot(V, U)
150
151 if check_mirror_image and det(R) < 0:
152
153 U[-1] *= -1
154 L[-1] *= -1
155 R = dot(V, U)
156
157 s = (L.sum() / ((Y-y)**2).sum())
158 t = x / s - dot(R, y)
159
160 return R, t, s
161
163 """
164 Generate a superposition of X, Y where::
165
166 R ~ exp(trace(dot(transpose(dot(transpose(X-t), Y)), R)))
167 t ~ N(t_opt, 1 / sqrt(N))
168
169 @rtype: tuple
170 """
171
172 from csb.statistics.rand import random_rotation
173 from numpy import dot, transpose, average
174
175 if w is None:
176 R, t = fit(X, Y)
177 else:
178 R, t = wfit(X, Y, w)
179
180 N = len(X)
181
182 for i in range(niter):
183
184 if w is None:
185 A = dot(transpose(X - t), Y)
186 else:
187 A = dot(transpose(X - t) * w, Y)
188
189 R = random_rotation(A)
190
191
192 if w is None:
193 mu = average(X - dot(Y, transpose(R)), 0)
194 t = numpy.random.standard_normal(len(mu)) / numpy.sqrt(N) + mu
195 else:
196 mu = dot(w, X - dot(Y, transpose(R))) / numpy.sum(w)
197 t = numpy.random.standard_normal(len(mu)) / numpy.sqrt(numpy.sum(w)) + mu
198
199 return R, t
200
201 -def fit_wellordered(X, Y, n_iter=None, n_stdv=2, tol_rmsd=.5,
202 tol_stdv=0.05, full_output=False):
203 """
204 Match two arrays onto each other by iteratively throwing out
205 highly deviating entries.
206
207 (Reference: Nilges et al.: Delineating well-ordered regions in
208 protein structure ensembles).
209
210 @param X: (n, d) input vector
211 @type X: numpy array
212
213 @param Y: (n, d) input vector
214 @type Y: numpy array
215
216 @param n_stdv: number of standard deviations above which points are considered to be outliers
217 @param tol_rmsd: tolerance in rmsd
218 @param tol_stdv: tolerance in standard deviations
219 @param full_output: also return full history of values calculated by the algorithm
220
221 @rtype: tuple
222 """
223
224 from numpy import ones, compress, dot, sqrt, sum, nonzero, std, average
225
226 rmsd_list = []
227
228 rmsd_old = 0.
229 stdv_old = 0.
230
231 n = 0
232 converged = False
233
234 mask = ones(X.shape[0])
235
236 while not converged:
237
238
239
240 R, t = fit(compress(mask, X, 0), compress(mask, Y, 0))
241
242
243
244 d = sqrt(sum((X - dot(Y, R.T) - t) ** 2, 1))
245
246
247
248 rmsd = sqrt(average(compress(mask, d) ** 2, 0))
249 stdv = std(compress(mask, d))
250
251
252
253 if stdv < 1e-10: break
254
255 d_rmsd = abs(rmsd - rmsd_old)
256 d_stdv = abs(1 - stdv_old / stdv)
257
258 if d_rmsd < tol_rmsd:
259 if d_stdv < tol_stdv:
260 converged = 1
261 else:
262 stdv_old = stdv
263 else:
264 rmsd_old = rmsd
265 stdv_old = stdv
266
267
268
269 perc = average(1. * mask)
270
271
272
273 new_mask = mask * (d < rmsd + n_stdv * stdv)
274 outliers = nonzero(mask - new_mask)
275 rmsd_list.append([perc, rmsd, outliers])
276
277 mask = new_mask
278
279 if n_iter and n >= n_iter: break
280
281 n += 1
282
283 if full_output:
284 return (R, t), rmsd_list
285 else:
286 return (R, t)
287
288 -def bfit(X, Y, n_iter=10, distribution='student', em=False, full_output=False):
289 """
290 Robust superposition of two coordinate arrays. Models non-rigid
291 displacements with outlier-tolerant probability distributions.
292
293 @param X: (n, 3) input vector
294 @type X: numpy.array
295 @param Y: (n, 3) input vector
296 @type Y: numpy.array
297 @param n_iter: number of iterations
298 @type n_iter: int
299 @param distribution: student or k
300 @type distribution: str
301 @param em: use maximum a posteriori probability (MAP) estimator
302 @type em: bool
303 @param full_output: if true, return ((R, t), scales)
304 @type full_output: bool
305 @rtype: tuple
306 """
307 from csb.statistics import scalemixture as sm
308
309 if distribution == 'student':
310 prior = sm.GammaPrior()
311 if em:
312 prior.estimator = sm.GammaPosteriorMAP()
313 elif distribution == 'k':
314 prior = sm.InvGammaPrior()
315 if em:
316 prior.estimator = sm.InvGammaPosteriorMAP()
317 else:
318 raise AttributeError('distribution')
319
320 mixture = sm.ScaleMixture(scales=X.shape[0], prior=prior, d=3)
321
322 R, t = fit(X, Y)
323
324 for _ in range(n_iter):
325 data = distance(X, transform(Y, R, t))
326 mixture.estimate(data)
327 R, t = probabilistic_fit(X, Y, mixture.scales)
328
329 if full_output:
330 return (R, t), mixture.scales
331 else:
332 return (R, t)
333
334 -def xfit(X, Y, n_iter=10, seed=False, full_output=False):
335 """
336 Maximum likelihood superposition of two coordinate arrays. Works similar
337 to U{Theseus<http://theseus3d.org>} and to L{bfit}.
338
339 @param X: (n, 3) input vector
340 @type X: numpy.array
341 @param Y: (n, 3) input vector
342 @type Y: numpy.array
343 @param n_iter: number of EM iterations
344 @type n_iter: int
345 @type seed: bool
346 @param full_output: if true, return ((R, t), scales)
347 @type full_output: bool
348 @rtype: tuple
349 """
350 if seed:
351 R, t = numpy.identity(3), numpy.zeros(3)
352 else:
353 R, t = fit(X, Y)
354
355 for _ in range(n_iter):
356 data = distance_sq(X, transform(Y, R, t))
357 scales = 1.0 / data.clip(1e-9)
358 R, t = wfit(X, Y, scales)
359
360 if full_output:
361 return (R, t), scales
362 else:
363 return (R, t)
364
394
405
407 """
408 Calculate the root mean squared deviation (RMSD) using Kabsch' formula.
409
410 @param X: (n, d) input vector
411 @type X: numpy array
412
413 @param Y: (n, d) input vector
414 @type Y: numpy array
415
416 @return: rmsd value between the input vectors
417 @rtype: float
418 """
419
420 from numpy import sum, dot, sqrt, clip, average
421 from numpy.linalg import svd, det
422
423 X = X - X.mean(0)
424 Y = Y - Y.mean(0)
425
426 R_x = sum(X ** 2)
427 R_y = sum(Y ** 2)
428
429 V, L, U = svd(dot(Y.T, X))
430
431 if det(dot(V, U)) < 0.:
432 L[-1] *= -1
433
434 return sqrt(clip(R_x + R_y - 2 * sum(L), 0., 1e300) / len(X))
435
437 """
438 Calculate the RMSD of two conformations as they are (no fitting is done).
439 For details, see L{rmsd}.
440
441 @return: rmsd value between the input vectors
442 @rtype: float
443 """
444 return distance_sq(X, Y).mean() ** 0.5
445
447 """
448 Calculate the weighted root mean squared deviation (wRMSD) using Kabsch'
449 formula.
450
451 @param X: (n, d) input vector
452 @type X: numpy array
453
454 @param Y: (n, d) input vector
455 @type Y: numpy array
456
457 @param w: input weights
458 @type w: numpy array
459
460 @return: rmsd value between the input vectors
461 @rtype: float
462 """
463
464 from numpy import sum, dot, sqrt, clip, average
465 from numpy.linalg import svd
466
467
468
469 w = w / w.sum()
470
471 X = X - dot(w, X)
472 Y = Y - dot(w, Y)
473
474 R_x = sum(X.T ** 2 * w)
475 R_y = sum(Y.T ** 2 * w)
476
477 L = svd(dot(Y.T * w, X))[1]
478
479 return sqrt(clip(R_x + R_y - 2 * sum(L), 0., 1e300))
480
482 """
483 Compute the circular RMSD of two phi/psi angle sets.
484
485 @param x: query phi/psi angles (Nx2 array, in radians)
486 @type x: array
487 @param y: subject phi/psi angles (Nx2 array, in radians)
488 @type y: array
489
490 @rtype: float
491 """
492 from numpy import array, sin, cos, sqrt
493
494 phi, psi = (x - y).T
495 assert len(phi) == len(psi)
496
497 r = sin(phi).sum() ** 2 + cos(phi).sum() ** 2 + sin(psi).sum() ** 2 + cos(psi).sum() ** 2
498 return 1 - (1.0 / len(phi)) * sqrt(r / 2.0)
499
501
502 from numpy import power
503
504 if Lmin > 15:
505 d0 = 1.24 * power(Lmin - 15.0, 1.0 / 3.0) - 1.8
506 else:
507 d0 = 0.5
508
509 return max(0.5, d0)
510
512 """
513 Evaluate the TM-score of two conformations as they are (no fitting is done).
514
515 @param x: 3 x n input array
516 @type x: numpy array
517 @param y: 3 x n input array
518 @type y: numpy array
519 @param L: length for normalization (default: C{len(x)})
520 @type L: int
521 @param d0: d0 in Angstroms (default: calculate from C{L})
522 @type d0: float
523
524 @return: computed TM-score
525 @rtype: float
526 """
527 from numpy import sum
528
529 if not L:
530 L = len(x)
531 if not d0:
532 d0 = _tm_d0(L)
533 d = distance(x, y)
534
535 return sum(1 / (1 + (d / d0) ** 2)) / L
536
538 """
539 Compute the TM-score of two protein coordinate vector sets.
540 Reference: http://zhanglab.ccmb.med.umich.edu/TM-score
541
542 @param x: 3 x n input vector
543 @type x: numpy.array
544 @param y: 3 x n input vector
545 @type y: numpy.array
546 @param fit_method: a reference to a proper fitting function, e.g. fit
547 or fit_wellordered
548 @type fit_method: function
549 @param L: length for normalization (default: C{len(x)})
550 @type L: int
551 @param d0: d0 in Angstroms (default: calculate from C{L})
552 @type d0: float
553 @param L_ini_min: minimum length of initial alignment window (increase
554 to speed up but loose precision, a value of 0 disables local alignment
555 initialization)
556 @type L_ini_min: int
557 @param iL_step: initial alignment window shift (increase to speed up
558 but loose precision)
559 @type iL_step: int
560
561 @return: rotation matrix, translation vector, TM-score
562 @rtype: tuple
563 """
564 from numpy import asarray, sum, dot, zeros, clip
565
566 x, y = asarray(x), asarray(y)
567 if not L:
568 L = len(x)
569 if not d0:
570 d0 = _tm_d0(L)
571 d0_search = clip(d0, 4.5, 8.0)
572 best = None, None, 0.0
573
574 L_ini_min = min(L, L_ini_min) if L_ini_min else L
575 L_ini = [L_ini_min] + list(filter(lambda x: x > L_ini_min,
576 [L // (2 ** n_init) for n_init in range(6)]))
577
578
579
580 for L_init in L_ini:
581 for iL in range(0, L - L_init + 1, min(L_init, iL_step)):
582 mask = zeros(L, bool)
583 mask[iL:iL + L_init] = True
584
585
586 for i in range(20):
587 R, t = fit_method(x[mask], y[mask])
588
589 d = distance(x, dot(y, R.T) + t)
590 score = sum(1 / (1 + (d / d0) ** 2)) / L
591
592 if score > best[-1]:
593 best = R, t, score
594
595 mask_prev = mask
596 cutoff = d0_search + (-1 if i == 0 else 1)
597 while True:
598 mask = d < cutoff
599 if sum(mask) >= 3 or 3 >= len(mask):
600 break
601 cutoff += 0.5
602
603 if (mask == mask_prev).all():
604 break
605
606 return best
607
609 """
610 Compute the mean of a set of (optionally weighted) points.
611
612 @param x: array of rank (n,d) where n is the number of points
613 and d the dimension
614 @type x: numpy.array
615 @param m: rank (n,) array of masses / weights
616 @type m: numpy.array
617
618 @return: center of mass
619 @rtype: (d,) numpy.array
620 """
621 if m is None:
622 return x.mean(0)
623 else:
624 from numpy import dot
625
626 return dot(m, x) / m.sum()
627
629 """
630 Compute the radius of gyration of a set of (optionally weighted) points.
631
632 @param x: array of rank (n,d) where n is the number of points
633 and d the dimension
634 @type x: numpy.array
635 @param m: rank (n,) array of masses / weights
636 @type m: numpy.array
637
638 @return: center of mass
639 @rtype: (d,) numpy.array
640 """
641 from numpy import sqrt, dot
642
643 x = x - center_of_mass(x, m)
644 if m is None:
645 return sqrt((x ** 2).sum() / len(x))
646 else:
647 return sqrt(dot(m, (x ** 2).sum(1)) / m.sum())
648
650 """
651 Compute the tensor second moments of a set of (optionally weighted) points.
652
653 @param x: array of rank (n,d) where n is the number of points
654 and d the dimension
655 @type x: numpy.array
656 @param m: rank (n,) array of masses / weights
657 @type m: numpy.array
658
659 @return: second moments
660 @rtype: (d,d) numpy.array
661 """
662 from numpy import dot
663
664 x = (x - center_of_mass(x, m)).T
665
666 if m is not None:
667 return dot(x * m, x.T)
668 else:
669 return dot(x, x.T)
670
672 """
673 Compute the inertia tensor of a set of (optionally weighted) points.
674
675 @param x: array of rank (n,d) where n is the number of points
676 and d the dimension
677 @type x: numpy.array
678 @param m: rank (n,) array of masses / weights
679 @type m: numpy.array
680
681 @return: inertia tensor
682 @rtype: (d,d) numpy.array
683 """
684 from numpy import dot, eye
685
686 x = (x - center_of_mass(x, m)).T
687 r2 = (x ** 2).sum(0)
688
689 if m is not None:
690 return eye(x.shape[0]) * dot(m, r2) - dot(x * m, x.T)
691 else:
692 return eye(x.shape[0]) * r2.sum() - dot(x, x.T)
693
695 """
696 Find pairs with euclidean distance below C{cutoff}. Either between
697 C{X} and C{Y}, or within C{X} if C{Y} is C{None}.
698
699 Uses a KDTree and thus is memory efficient and reasonable fast.
700
701 @type cutoff: float
702 @type X: (m,n) numpy.array
703 @type Y: (k,n) numpy.array
704 @return: set of index tuples
705 @rtype: iterable
706 """
707 try:
708 from scipy.spatial import cKDTree as KDTree
709 KDTree.query_pairs
710 KDTree.query_ball_tree
711 except (ImportError, AttributeError):
712 from scipy.spatial import KDTree
713
714 tree = KDTree(X, len(X))
715 if Y is None:
716 return tree.query_pairs(cutoff)
717
718 other = KDTree(Y, len(Y))
719 contacts = tree.query_ball_tree(other, cutoff)
720 return ((i, j) for (i, js) in enumerate(contacts) for j in js)
721
723 """
724 Calculates a matrix of pairwise distances
725
726 @param X: m x n input vector
727 @type X: numpy array
728
729 @param Y: k x n input vector or None, which defaults to Y=X
730 @type Y: numpy array
731
732 @return: m x k distance matrix
733 @rtype: numpy array
734 """
735 from numpy import add, clip, sqrt, dot, transpose, sum
736
737 if Y is None: Y = X
738
739 if X.ndim < 2: X = X.reshape((1, -1))
740 if Y.ndim < 2: Y = Y.reshape((1, -1))
741
742 C = dot(X, transpose(Y))
743 S = add.outer(sum(X ** 2, 1), sum(Y ** 2, 1))
744
745 return sqrt(clip(S - 2 * C, 0., 1e300))
746
748 """
749 Squared distance between C{X} and C{Y} along the last axis. For details, see L{distance}.
750
751 @return: scalar or array of length m
752 @rtype: (m,) numpy.array
753 """
754 return ((numpy.asarray(X) - Y) ** 2).sum(-1)
755
757 """
758 Distance between C{X} and C{Y} along the last axis.
759
760 @param X: m x n input vector
761 @type X: numpy array
762 @param Y: m x n input vector
763 @type Y: numpy array
764 @return: scalar or array of length m
765 @rtype: (m,) numpy.array
766 """
767 return distance_sq(X, Y) ** 0.5
768
770 """
771 Calculate an average structure from an ensemble of structures
772 (i.e. X is a rank-3 tensor: X[i] is a (N,3) configuration matrix).
773
774 @param X: m x n x 3 input vector
775 @type X: numpy array
776
777 @return: average structure
778 @rtype: (n,3) numpy.array
779 """
780 from numpy.linalg import eigh
781
782 B = csb.numeric.gower_matrix(X)
783 v, U = eigh(B)
784 if numpy.iscomplex(v).any():
785 v = v.real
786 if numpy.iscomplex(U).any():
787 U = U.real
788
789 indices = numpy.argsort(v)[-3:]
790 v = numpy.take(v, indices, 0)
791 U = numpy.take(U, indices, 1)
792
793 x = U * numpy.sqrt(v)
794 i = 0
795 while is_mirror_image(x, X[0]) and i < 2:
796 x[:, i] *= -1
797 i += 1
798 return x
799
801 """
802 Check if two configurations X and Y are mirror images
803 (i.e. their optimal superposition involves a reflection).
804
805 @param X: n x 3 input vector
806 @type X: numpy array
807 @param Y: n x 3 input vector
808 @type Y: numpy array
809
810 @rtype: bool
811 """
812 from numpy.linalg import det, svd
813
814
815
816 X = X - numpy.mean(X, 0)
817 Y = Y - numpy.mean(Y, 0)
818
819
820
821 V, L, U = svd(numpy.dot(numpy.transpose(X), Y))
822
823 R = numpy.dot(V, U)
824
825 return det(R) < 0
826
828 """
829 Convert an array of torsion angles in radians to torsion degrees
830 ranging from -180 to 180.
831
832 @param x: array of angles
833 @type x: numpy array
834
835 @rtype: numpy array
836 """
837 from csb.bio.structure import TorsionAngles
838
839 func = numpy.vectorize(TorsionAngles.deg)
840 return func(x)
841
843 """
844 Convert an array of torsion angles in torsion degrees to radians.
845
846 @param x: array of angles
847 @type x: numpy array
848
849 @rtype: numpy array
850 """
851 from csb.bio.structure import TorsionAngles
852
853 func = numpy.vectorize(TorsionAngles.rad)
854 return func(x)
855
856
857