csb.bio.io.vasco

9

10 - def __init__(self, residue_id, amino_acid, nucleus, 11 shift, element, secondary_structure):

12 13 self.residue_id = residue_id 14 self.nucleus = nucleus 15 self.element = element 16 self.amino_acid = amino_acid 17 self.shift = shift 18 self.secondary_structure = secondary_structure

19

20 - def __str__(self):

21 return '{0.amino_acid} {0.nucleus} {0.shift}'.format(self)

22 23 __repr__ = __str__

40 """ 41 Simple Vasco Parser 42 """ 43

44 - def __init__(self):

45 self._stream = None

46

47 - def parse(self, file_name, ignore_outliers=True):

48 """ 49 @param file_name: source file to parse 50 @type file_name: str 51 @return: a L{ChemicalShiftContainer} of L{ShiftInfo} objects 52 @rtype: dict 53 """ 54 self._stream = open(file_name) 55 shifts = self._parse_header() 56 57 self._parse_shifts(shifts, ignore_outliers=ignore_outliers) 58 self._stream.close() 59 60 return shifts

61

62 - def _parse_header(self):

63 64 bmrb_id = '' 65 pdb_id = '' 66 sequence = '' 67 chain = '' 68 exptype = '' 69 self._stream.seek(0) 70 71 while True: 72 try: 73 line = next(self._stream) 74 except StopIteration : 75 break 76 77 if line.startswith('#'): 78 if line[2:].startswith('BMRB ORIGIN'): 79 bmrb_id = line[20:].strip() 80 elif line[2:].startswith('PDB ORIGIN'): 81 pdb_id = line[20:].strip() 82 elif line[2:].startswith('SEQUENCE PDB'): 83 sequence = line[20:].strip() 84 chain = line[17] 85 elif line[2:].startswith('PDB EXPTYPE'): 86 exptype = line[20:].strip() 87 else: 88 break 89 90 91 return ChemicalShiftContainer(bmrb_id, pdb_id, chain, 92 sequence, exptype )

93 94

95 - def _parse_shifts(self, data, ignore_outliers=True):

96 97 while True: 98 try: 99 line = next(self._stream) 100 except StopIteration: 101 break 102 103 if ignore_outliers and "Shift outlier" in line: 104 continue 105 106 chain_id = line[7] 107 res_code = line[9:14].strip() 108 res_label = line[16:19].strip() 109 res_ss = line[21] 110 nucleus_name = line[23:28].strip() 111 nucleus_element = line[41] 112 shift = float(line[43:52]) 113 114 info = ShiftInfo(res_code, res_label, 115 nucleus_name, shift, 116 nucleus_element, res_ss) 117 118 if not chain_id in data: 119 data.append(chain_id, csb.core.OrderedDict()) 120 121 if not res_code in data[chain_id]: 122 data[chain_id][res_code] = {} 123 124 125 data[chain_id][res_code][nucleus_name] = info

Source Code for Module csb.bio.io.vasco