1 """
2 Python application for robust structure superposition of two structures.
3 bfit models non-rigid displacements in protein ensembles with outlier-tolerant
4 probability distributions.
5 """
6 import numpy
7
8 import csb.apps
9 import csb.bio.utils
10
11 from csb.bio.io.wwpdb import LegacyStructureParser
12 from csb.bio.sequence import SequenceAlignment
18
20
21 @property
24
26
27 cmd = csb.apps.ArgHandler(self.program, __doc__)
28
29
30 cmd.add_positional_argument('pdb1', str,
31 'full path to the first structure')
32
33 cmd.add_positional_argument('pdb2', str,
34 'full path to the second structure')
35
36
37 cmd.add_scalar_option('chain1', 'c', str,
38 'Chain of the first structure',
39 default='A')
40
41 cmd.add_scalar_option('chain2', 'd', str,
42 'Chain of the second structure',
43 default='A')
44
45 cmd.add_scalar_option('scalemixture', 's', str,
46 'Scale mixture distribution',
47 default='student',
48 choices=['student', 'k'])
49
50
51 cmd.add_scalar_option('alignment', 'a', str,
52 'Alignment in fasta format defining equivalent positions\n'
53 + 'Assumes that chain1 is the first sequence of '
54 + 'the alignment and chain2 the second sequence')
55
56 cmd.add_scalar_option('outfile', 'o', str,
57 'file to which the rotated second ' +
58 'structure will be written',
59 default='bfit.pdb')
60
61 cmd.add_scalar_option('niter', 'n', int,
62 'Number of optimization steps',
63 default=200)
64
65 cmd.add_boolean_option('em', None,
66 'Use the EM algorithm for optimsation',
67 default = False)
68
69 return cmd
70
71
72
73 -class BFitApp(csb.apps.Application):
74 """
75 Python application for robust structure superposition of two protein structures
76 """
77
79 try:
80 parser = LegacyStructureParser(self.args.pdb1)
81 r = parser.parse()
82
83 parser = LegacyStructureParser(self.args.pdb2)
84 m = parser.parse()
85 except IOError as e:
86 self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR)
87
88 X = numpy.array(r[self.args.chain1].get_coordinates(['CA'], True))
89 Y = numpy.array(m[self.args.chain2].get_coordinates(['CA'], True))
90
91 if self.args.alignment is not None:
92 align = SequenceAlignment.parse(file(self.args.alignment).read())
93 align = align[:2, :]
94
95 matches = []
96 for i in range(1, align.length + 1):
97 if not align.gap_at(i):
98 matches.append([align.columns[i][0].rank - 1,
99 align.columns[i][1].rank - 1])
100 matches = numpy.array(matches)
101 X = X[matches[:, 0], :]
102 Y = Y[matches[:, 1], :]
103
104
105 if len(X) != len(Y):
106 self.exit('Structures are of different lengths,' +
107 ' please specify an alignment',
108 ExitCodes.INPUT_ERROR)
109
110 R, t = csb.bio.utils.bfit(X, Y, self.args.niter,
111 self.args.scalemixture, self.args.em)
112
113 m.transform(R, t)
114 m.to_pdb(self.args.outfile)
115
119
120
121 if __name__ == '__main__':
122 main()
123