1  """ 
  2  ProMix: Take a protein structure ensemble and find a mixture of rigid 
  3  segments or a mixture of conformers. Writes K copies of the ensemble 
  4  (for segments) or K subsets of the ensemble (for conformers) as PDB 
  5  files, each superposed on different components. 
  6   
  7  Reference: Hirsch M, Habeck M. - Bioinformatics. 2008 Oct 1;24(19):2184-92 
  8  """ 
  9   
 10  import numpy 
 11   
 12  import csb.apps 
 13  import csb.bio.structure 
 14   
 15  from csb.bio.io.wwpdb import LegacyStructureParser 
 16  from csb.statistics import mixtures 
 21   
 23   
 24      @property 
 27   
 29          cmd = csb.apps.ArgHandler(self.program, __doc__) 
 30   
 31          cmd.add_scalar_option('components', 'K', int, 'Number of components', -1) 
 32          cmd.add_scalar_option('type', 't', str, 'Type of mixture', 'segments', ('segments', 'conformers')) 
 33          cmd.add_positional_argument('infile', str, 'input PDB file') 
 34   
 35          return cmd 
  36   
  40   
 42   
 44          try: 
 45              parser = LegacyStructureParser(self.args.infile) 
 46              models = parser.models() 
 47          except: 
 48              self.exit('PDB file parsing failed', ExitCodes.IO_ERROR) 
 49   
 50          if len(models) < 2: 
 51              self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR) 
 52   
 53          ensemble = parser.parse_models(models) 
 54          X = numpy.array([model.get_coordinates(['CA'], True) for model in ensemble]) 
 55   
 56          if self.args.type == 'segments': 
 57              self.main_segments(ensemble, X) 
 58          elif self.args.type == 'conformers': 
 59              self.main_conformers(ensemble, X) 
 60          else: 
 61              raise ValueError('type must be "segments" or "conformers"') 
  62   
 63 -    def main_segments(self, ensemble, X): 
  64   
 65          mixture = mixtures.SegmentMixture.new(X, self.args.components) 
 66          self.log('Number of segments: {0}'.format(mixture.K)) 
 67   
 68          for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)): 
 69              outfile = 'promix_segment_{0}.pdb'.format(k+1) 
 70              self.log('  {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile)) 
 71   
 72              for model, R, t in zip(ensemble, mixture.R, mixture.t): 
 73                  if k > 0: 
 74                      model.transform(R[k-1], t[k-1]) 
 75                  R = R[k].T 
 76                  t = -numpy.dot(R, t[k]) 
 77                  model.transform(R, t) 
 78   
 79              ensemble.to_pdb(outfile) 
  80   
 82   
 83          mixture = mixtures.ConformerMixture.new(X, self.args.components) 
 84          self.log('Number of conformers: {0}'.format(mixture.K)) 
 85   
 86          membership = mixture.membership 
 87   
 88          for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)): 
 89              outfile = 'promix_conformer_{0}.pdb'.format(k+1) 
 90              self.log('  {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile)) 
 91   
 92              ek = csb.bio.structure.Ensemble() 
 93   
 94              for model, R, t, mk in zip(ensemble, mixture.R, mixture.t, membership): 
 95                  if mk != k: 
 96                      continue 
 97                  R = R[k].T 
 98                  t = -numpy.dot(R, t[k]) 
 99                  model.transform(R, t) 
100                  ek.models.append(model) 
101   
102              ek.to_pdb(outfile) 
  103   
107       
108       
109  if __name__ == '__main__': 
110      main() 
111