1 """
2 ProMix: Take a protein structure ensemble and find a mixture of rigid
3 segments or a mixture of conformers. Writes K copies of the ensemble
4 (for segments) or K subsets of the ensemble (for conformers) as PDB
5 files, each superposed on different components.
6
7 Reference: Hirsch M, Habeck M. - Bioinformatics. 2008 Oct 1;24(19):2184-92
8 """
9
10 import numpy
11
12 import csb.apps
13 import csb.bio.structure
14
15 from csb.bio.io.wwpdb import LegacyStructureParser
16 from csb.statistics import mixtures
21
23
24 @property
27
29 cmd = csb.apps.ArgHandler(self.program, __doc__)
30
31 cmd.add_scalar_option('components', 'K', int, 'Number of components', -1)
32 cmd.add_scalar_option('type', 't', str, 'Type of mixture', 'segments', ('segments', 'conformers'))
33 cmd.add_positional_argument('infile', str, 'input PDB file')
34
35 return cmd
36
40
42
44 try:
45 parser = LegacyStructureParser(self.args.infile)
46 models = parser.models()
47 except:
48 self.exit('PDB file parsing failed', ExitCodes.IO_ERROR)
49
50 if len(models) < 2:
51 self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR)
52
53 ensemble = parser.parse_models(models)
54 X = numpy.array([model.get_coordinates(['CA'], True) for model in ensemble])
55
56 if self.args.type == 'segments':
57 self.main_segments(ensemble, X)
58 elif self.args.type == 'conformers':
59 self.main_conformers(ensemble, X)
60 else:
61 raise ValueError('type must be "segments" or "conformers"')
62
63 - def main_segments(self, ensemble, X):
64
65 mixture = mixtures.SegmentMixture.new(X, self.args.components)
66 self.log('Number of segments: {0}'.format(mixture.K))
67
68 for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)):
69 outfile = 'promix_segment_{0}.pdb'.format(k+1)
70 self.log(' {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile))
71
72 for model, R, t in zip(ensemble, mixture.R, mixture.t):
73 if k > 0:
74 model.transform(R[k-1], t[k-1])
75 R = R[k].T
76 t = -numpy.dot(R, t[k])
77 model.transform(R, t)
78
79 ensemble.to_pdb(outfile)
80
82
83 mixture = mixtures.ConformerMixture.new(X, self.args.components)
84 self.log('Number of conformers: {0}'.format(mixture.K))
85
86 membership = mixture.membership
87
88 for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)):
89 outfile = 'promix_conformer_{0}.pdb'.format(k+1)
90 self.log(' {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile))
91
92 ek = csb.bio.structure.Ensemble()
93
94 for model, R, t, mk in zip(ensemble, mixture.R, mixture.t, membership):
95 if mk != k:
96 continue
97 R = R[k].T
98 t = -numpy.dot(R, t[k])
99 model.transform(R, t)
100 ek.models.append(model)
101
102 ek.to_pdb(outfile)
103
107
108
109 if __name__ == '__main__':
110 main()
111