csb.apps.bfite

1 """ 2 Python application for robust structure superposition of an ensemble of structures. 3 bfite models non-rigid displacements in protein ensembles with outlier-tolerant 4 probability distributions. 5 """ 6 import numpy 7 8 import csb.apps 9 import csb.bio.structure 10 11 from csb.bio.io.wwpdb import LegacyStructureParser 12 from csb.bio.utils import average_structure, fit, wfit 13 from csb.statistics.scalemixture import ScaleMixture, GammaPrior

14 15 16 -class ExitCodes(csb.apps.ExitCodes):

17 IO_ERROR = 2

18

19 -class AppRunner(csb.apps.AppRunner):

20 21 @property

22 - def target(self):

23 return BFitApp

24

25 - def command_line(self):

26 27 cmd = csb.apps.ArgHandler(self.program, __doc__) 28 29 # Input structures 30 cmd.add_positional_argument('pdb', str, 31 'full path to the ensemble') 32 33 # Optional arguments 34 cmd.add_scalar_option('chain', 'c', str, 35 'Chain', 36 default='A') 37 38 cmd.add_scalar_option('scalemixture', 's', str, 39 'Scale mixture distribution', 40 default='student', 41 choices=['student', 'k']) 42 43 cmd.add_scalar_option('alignment', 'a', str, 44 'Alignment in fasta format defining equivalent positions\n' 45 + 'Assumes that chain1 is the first sequence of ' 46 + 'the alignment and chain2 the second sequence') 47 48 cmd.add_scalar_option('outfile', 'o', str, 49 'file to which the rotated second ' + 50 'structure will be written', 51 default='bfit.pdb') 52 53 cmd.add_scalar_option('niter', 'n', int, 54 'Number of optimization steps', 55 default=200) 56 57 return cmd

58

59 60 61 -class BFitApp(csb.apps.Application):

62 """ 63 Python application for robust structure superposition of two protein structures 64 """ 65

66 - def main(self):

67 try: 68 parser = LegacyStructureParser(self.args.pdb) 69 models = parser.models() 70 71 except IOError as e: 72 self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR) 73 74 if len(models) < 2: 75 self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR) 76 77 ensemble = parser.parse_models(models) 78 X = numpy.array([model[self.args.chain].get_coordinates(['CA'], True) for model in ensemble]) 79 x_mu = average_structure(X) 80 #n = X.shape[1] 81 m = X.shape[0] 82 R = numpy.zeros((m, 3, 3)) 83 t = numpy.ones((m, 3)) 84 85 86 prior = GammaPrior() 87 mixture = ScaleMixture(scales=X.shape[1], 88 prior=prior, d=3) 89 90 for i in range(m): 91 R[i, :, :], t[i, :] = fit(x_mu, X[i]) 92 93 # gibbs sampling cycle 94 for j in range(self.args.niter): 95 # apply rotation 96 data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) ** 2, -1) ** 0.5 97 for i in range(m)]).T 98 # sample scales 99 mixture.estimate(data) 100 # sample rotations 101 for i in range(m): 102 R[i, :, :], t[i, :] = wfit(x_mu, X[i], mixture.scales) 103 104 105 out_ensemble = csb.bio.structure.Ensemble() 106 107 for i, model in enumerate(ensemble): 108 model.transform(R[i], t[i]) 109 out_ensemble.models.append(model) 110 111 out_ensemble.to_pdb(self.args.outfile)

112

113 114 -def main():

115 AppRunner().run()

116 117 118 if __name__ == '__main__': 119 main() 120

Source Code for Module csb.apps.bfite