Package csb :: Package apps :: Module bfite
[frames] | no frames]

Source Code for Module csb.apps.bfite

  1  """ 
  2  Python application for robust structure superposition of an ensemble of structures. 
  3  bfite models non-rigid displacements in protein ensembles with outlier-tolerant 
  4  probability distributions. 
  5  """ 
  6  import numpy 
  7   
  8  import csb.apps 
  9  import csb.bio.structure 
 10   
 11  from csb.bio.io.wwpdb import LegacyStructureParser 
 12  from csb.bio.utils import average_structure, fit, wfit 
 13  from csb.statistics.scalemixture import ScaleMixture, GammaPrior 
14 15 16 -class ExitCodes(csb.apps.ExitCodes):
17 IO_ERROR = 2
18
19 -class AppRunner(csb.apps.AppRunner):
20 21 @property
22 - def target(self):
23 return BFitApp
24
25 - def command_line(self):
26 27 cmd = csb.apps.ArgHandler(self.program, __doc__) 28 29 # Input structures 30 cmd.add_positional_argument('pdb', str, 31 'full path to the ensemble') 32 33 # Optional arguments 34 cmd.add_scalar_option('chain', 'c', str, 35 'Chain', 36 default='A') 37 38 cmd.add_scalar_option('scalemixture', 's', str, 39 'Scale mixture distribution', 40 default='student', 41 choices=['student', 'k']) 42 43 cmd.add_scalar_option('alignment', 'a', str, 44 'Alignment in fasta format defining equivalent positions\n' 45 + 'Assumes that chain1 is the first sequence of ' 46 + 'the alignment and chain2 the second sequence') 47 48 cmd.add_scalar_option('outfile', 'o', str, 49 'file to which the rotated second ' + 50 'structure will be written', 51 default='bfit.pdb') 52 53 cmd.add_scalar_option('niter', 'n', int, 54 'Number of optimization steps', 55 default=200) 56 57 return cmd
58
59 60 61 -class BFitApp(csb.apps.Application):
62 """ 63 Python application for robust structure superposition of two protein structures 64 """ 65
66 - def main(self):
67 try: 68 parser = LegacyStructureParser(self.args.pdb) 69 models = parser.models() 70 71 except IOError as e: 72 self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR) 73 74 if len(models) < 2: 75 self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR) 76 77 ensemble = parser.parse_models(models) 78 X = numpy.array([model[self.args.chain].get_coordinates(['CA'], True) for model in ensemble]) 79 x_mu = average_structure(X) 80 #n = X.shape[1] 81 m = X.shape[0] 82 R = numpy.zeros((m, 3, 3)) 83 t = numpy.ones((m, 3)) 84 85 86 prior = GammaPrior() 87 mixture = ScaleMixture(scales=X.shape[1], 88 prior=prior, d=3) 89 90 for i in range(m): 91 R[i, :, :], t[i, :] = fit(x_mu, X[i]) 92 93 # gibbs sampling cycle 94 for j in range(self.args.niter): 95 # apply rotation 96 data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) ** 2, -1) ** 0.5 97 for i in range(m)]).T 98 # sample scales 99 mixture.estimate(data) 100 # sample rotations 101 for i in range(m): 102 R[i, :, :], t[i, :] = wfit(x_mu, X[i], mixture.scales) 103 104 105 out_ensemble = csb.bio.structure.Ensemble() 106 107 for i, model in enumerate(ensemble): 108 model.transform(R[i], t[i]) 109 out_ensemble.models.append(model) 110 111 out_ensemble.to_pdb(self.args.outfile)
112
113 114 -def main():
115 AppRunner().run()
116 117 118 if __name__ == '__main__': 119 main() 120