1 """
2 Python application for robust structure superposition of an ensemble of structures.
3 bfite models non-rigid displacements in protein ensembles with outlier-tolerant
4 probability distributions.
5 """
6 import numpy
7
8 import csb.apps
9 import csb.bio.structure
10
11 from csb.bio.io.wwpdb import LegacyStructureParser
12 from csb.bio.utils import average_structure, fit, wfit
13 from csb.statistics.scalemixture import ScaleMixture, GammaPrior
18
20
21 @property
24
26
27 cmd = csb.apps.ArgHandler(self.program, __doc__)
28
29
30 cmd.add_positional_argument('pdb', str,
31 'full path to the ensemble')
32
33
34 cmd.add_scalar_option('chain', 'c', str,
35 'Chain',
36 default='A')
37
38 cmd.add_scalar_option('scalemixture', 's', str,
39 'Scale mixture distribution',
40 default='student',
41 choices=['student', 'k'])
42
43 cmd.add_scalar_option('alignment', 'a', str,
44 'Alignment in fasta format defining equivalent positions\n'
45 + 'Assumes that chain1 is the first sequence of '
46 + 'the alignment and chain2 the second sequence')
47
48 cmd.add_scalar_option('outfile', 'o', str,
49 'file to which the rotated second ' +
50 'structure will be written',
51 default='bfit.pdb')
52
53 cmd.add_scalar_option('niter', 'n', int,
54 'Number of optimization steps',
55 default=200)
56
57 return cmd
58
59
60
61 -class BFitApp(csb.apps.Application):
62 """
63 Python application for robust structure superposition of two protein structures
64 """
65
67 try:
68 parser = LegacyStructureParser(self.args.pdb)
69 models = parser.models()
70
71 except IOError as e:
72 self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR)
73
74 if len(models) < 2:
75 self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR)
76
77 ensemble = parser.parse_models(models)
78 X = numpy.array([model[self.args.chain].get_coordinates(['CA'], True) for model in ensemble])
79 x_mu = average_structure(X)
80
81 m = X.shape[0]
82 R = numpy.zeros((m, 3, 3))
83 t = numpy.ones((m, 3))
84
85
86 prior = GammaPrior()
87 mixture = ScaleMixture(scales=X.shape[1],
88 prior=prior, d=3)
89
90 for i in range(m):
91 R[i, :, :], t[i, :] = fit(x_mu, X[i])
92
93
94 for j in range(self.args.niter):
95
96 data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) ** 2, -1) ** 0.5
97 for i in range(m)]).T
98
99 mixture.estimate(data)
100
101 for i in range(m):
102 R[i, :, :], t[i, :] = wfit(x_mu, X[i], mixture.scales)
103
104
105 out_ensemble = csb.bio.structure.Ensemble()
106
107 for i, model in enumerate(ensemble):
108 model.transform(R[i], t[i])
109 out_ensemble.models.append(model)
110
111 out_ensemble.to_pdb(self.args.outfile)
112
116
117
118 if __name__ == '__main__':
119 main()
120