Package csb :: Package bio :: Package io :: Module procheck
[frames] | no frames]

Source Code for Module csb.bio.io.procheck

 1  """ 
 2  Procheck parser 
 3  """ 
 4  import os 
 5  import re 
 6  import shutil 
 7   
 8  from csb.io import Shell 
 9  from csb.io import TempFolder 
10   
11 -class ProcheckParser():
12 """ 13 Simple Prochceck Summary parser 14 """
15 - def __init__(self):
16 self.binary = 'procheck.scr' 17 self.acc = 2.0
18
19 - def parse(self, fn):
20 """ 21 @param fn: source file to parse 22 @type fn: str 23 24 @return: dicttionary of parsed quality indicatiors 25 """ 26 info = dict() 27 28 f_handler = open(os.path.expanduser(fn)) 29 text = f_handler.read() 30 31 input_file_name = re.compile('>>>-----.*?\n.*?\n\s*\|\s*(\S+)\s+') 32 residues = re.compile('(\d+)\s*residues\s\|') 33 ramachandran_plot = re.compile('Ramachandran\splot:\s*(\d+\.\d+)' + 34 '%\s*core\s*(\d+\.\d+)%\s*allow\s*(\d+\.\d+)' + 35 '%\s*gener\s*(\d+\.\d+)%\s*disall') 36 labelled_all = re.compile('Ramachandrans:\s*(\d+)\s*.*?out\sof\s*(\d+)') 37 labelled_chi = re.compile('Chi1-chi2\splots:\s*(\d+)\s*.*?out\sof\s*(\d+)') 38 bad_contacts = re.compile('Bad\scontacts:\s*(\d+)') 39 g_factors = re.compile('G-factors\s*Dihedrals:\s*([0-9-+.]+)' + 40 '\s*Covalent:\s*([0-9-+.]+)\s*Overall:\s*([0-9-+.]+)') 41 42 info['input_file'] = input_file_name.search(text).groups()[0] 43 info['#residues'] = int(residues.search(text).groups()[0]) 44 info['rama_core'], info['rama_allow'], info['rama_gener'], info['rama_disall'] = \ 45 [float(g) for g in ramachandran_plot.search(text).groups()] 46 info['g_dihedrals'], info['g_bond'], info['g_overall'] = \ 47 [float(g) for g in g_factors.search(text).groups()] 48 info['badContacts'] = int(bad_contacts.search(text).groups()[0]) 49 info['labelledAll'] = float(labelled_all.search(text).groups()[0]) / \ 50 float(labelled_all.search(text).groups()[1]) 51 info['labelledChi'] = float(labelled_chi.search(text).groups()[0]) / \ 52 float(labelled_chi.search(text).groups()[0]) 53 54 f_handler.close() 55 56 return info
57 58
59 - def run(self, pdb_file):
60 """ 61 Runs procheck for the given pdbfile and parses the output. 62 Will fail if the procheck binary is not in the path. 63 64 @param pdb_file: file to parse 65 @return: dict of parsed values 66 """ 67 wd = os.getcwd() 68 base = os.path.basename(pdb_file) 69 70 with TempFolder() as tmp: 71 shutil.copy(os.path.expanduser(pdb_file), tmp.name) 72 os.chdir(tmp.name) 73 Shell.run('{0} {1} {2}'.format(self.binary, 74 os.path.join(tmp.name, base), 75 self.acc)) 76 summary = '.'.join([os.path.splitext(base)[0], 'sum']) 77 out = self.parse(os.path.join(tmp.name, summary)) 78 os.chdir(wd) 79 80 return out
81