1 """
2 HHpred-related format parsers.
3
4 This module defines two HHpred format parsers: L{HHProfileParser} and
5 L{HHOutputParser}. The first one is used to read HMM (*.hhm) files:
6
7 >>> HHProfileParser("profile.hhm", "profile.pdb").parse()
8 <ProfileHMM> # ProfileHMM object
9
10 while the latter parses HHsearch results files (*.hhr):
11
12 >>> HHOutputParser().parse_file("hits.hhr"):
13 <HHpredHitList> # collection of HHpredHit-s
14
15 See L{ProfileHMM}, L{HHpredHitList} and L{HHpredHit} from L{csb.bio.hmm}
16 for details. For text serialization of HMM profiles, see L{HHMFileBuilder}
17 in this module.
18 """
19
20 import os
21 import re
22
23 import csb.io
24 import csb.bio.io
25 import csb.bio.structure as structure
26
27 from csb.core import Enum, CollectionIndexError, ItemNotFoundError
28 from csb.bio.sequence import Sequence, ProteinAlphabet, A3MAlignment
29
30 from csb.bio.hmm import State, Transition, ProfileHMM, HMMLayer, ProfileLength
31 from csb.bio.hmm import HHpredHitList, HHpredHit, ScoreUnits, States, EVDParameters
32 from csb.bio.hmm import StateNotFoundError, TransitionNotFoundError
37
40
43
46 """
47 A class that is HHpred HMM format aware.
48 Produces a L{ProfileHMM} object representation of a given HHpred profile
49 HMM.
50
51 @param hhm_file: *.hhm file name to parse
52 @type hhm_file: str
53 @param pdb_file: an optional hhm structure file, containing the structural
54 data, associated with the HMM.
55 @type pdb_file: str
56 @note: This is NOT a regular PDB file! It must be specifically formatted
57 for use with HHpred.
58
59 @raise IOError: if any of the files does not exist
60 """
61
62 - def __init__(self, hhm_file, pdb_file=None):
63
64 if not os.path.exists(hhm_file):
65 raise IOError("Could not read HHM file {0}".format(hhm_file))
66
67 if pdb_file:
68 if not os.path.exists(pdb_file):
69 raise IOError("Could not read structure file {0}".format(pdb_file))
70
71 self._file = hhm_file
72 self._pdb = pdb_file
73 self._properties = None
74 self._sequences = None
75 self._profile = None
76
77 self._chopped = False
78 self._chop()
79
124
126 """
127 Parse the HMM profile.
128
129 @param units: also convert the profile score units to the specified
130 L{csb.bio.hmm.ScoreUnits} kind
131 @type units: L{csb.core.EnumItem}
132
133 @return: a L{ProfileHMM} instance
134 @rtype: L{ProfileHMM}
135
136 @raise HHProfileFormatError: when the hhm file is invalid/corrupt
137 """
138 assert self._chopped
139
140 hmm = ProfileHMM(units=units, scale=-1000.0, logbase=2)
141 if self._profile:
142 self._parse_profile(hmm, units)
143 else:
144 raise HHProfileFormatError('Missing HMM profile table.')
145
146 if self._properties:
147 self._parse_properties(hmm)
148 else:
149 raise HHProfileFormatError('No profile properties found.')
150
151 if self._sequences:
152 self._parse_sequences(hmm)
153 else:
154 raise HHProfileFormatError('No profile MSA and secondary structure found.')
155
156 if hmm.dssp:
157 hmm._assign_secstructure()
158
159 if self._pdb:
160 self._parse_structure(hmm)
161
162 return hmm
163
165 """
166 Chop the HHM file into pieces - HMM properties, secondary structure +
167 MSA, HMM.
168 """
169
170 try:
171 with open(self._file, 'r') as f:
172 pr = csb.io.EntryReader(f, 'HHsearch', 'SEQ')
173 self._properties = pr.readall()[0].splitlines()
174
175 with open(self._file, 'r') as f:
176 sr = csb.io.EntryReader(f, '>', '#')
177 self._sequences = sr.readall()
178
179 with open(self._file, 'r') as f:
180 hr = csb.io.EntryReader(f, '#', '//')
181 self._profile = hr.readall()[0].splitlines()
182
183 self._chopped = True
184
185 except IndexError:
186 raise HHProfileFormatError('Corrupt HHM file.')
187
189 """
190 Parse the profile properties
191
192 @param hmm: the hmm object being constructed
193 @type hmm: L{ProfileHMM}
194 @return: the updated hmm
195 @rtype: hmm
196
197 @raise NotImplementedError: if an unexpected data field is encountered
198 """
199 assert self._chopped
200
201 for line in self._properties:
202
203 if line.startswith('NAME '):
204 hmm.name = line[6:].strip()
205
206 elif line.startswith('FAM '):
207 hmm.family = line[6:].strip()
208
209 elif line.startswith('LENG '):
210 m = re.search('(\d+)\D+(\d+)', line).groups()
211 m = tuple(map(int, m))
212 hmm.length = ProfileLength(m[0], m[1])
213
214 elif line.startswith('FILE '):
215 hmm.id = line[6:].strip()
216
217 elif line.startswith('HHsearch'):
218 hmm.version = float(line[9:])
219
220 elif line.startswith('NEFF '):
221 hmm.effective_matches = float(line[6:])
222
223 elif line.startswith('PCT '):
224 if line[6:].strip().lower() == 'true':
225 hmm.pseudocounts = True
226
227 elif line.startswith('EVD '):
228 lamda, mu = map(float, line[6:].split())
229 hmm.evd = EVDParameters(lamda, mu)
230
231 elif line[:5] in ('COM ', 'DATE ', 'FILT '):
232 pass
233 else:
234 raise NotImplementedError("Unexpected line: {0}.".format(line[0:5]))
235
236 if not hmm.id and hmm.name:
237 hmm.id = hmm.name.split()[0]
238 return hmm
239
241 """
242 Parse secondary structure and multiple alignment sections.
243
244 @param hmm: the hmm object being constructed
245 @type hmm: L{ProfileHMM}
246 @return: the updated hmm
247 @rtype: hmm
248 """
249 assert self._chopped
250
251 psipred = None
252 msa_entries = []
253
254 for entry in self._sequences:
255
256 header_token = entry[:8]
257 if header_token in ['>ss_dssp', '>sa_dssp', '>ss_pred', '>ss_conf', '>Consens']:
258
259 lines = entry.strip().splitlines()
260 seq = re.sub('\s+', '', ''.join(lines[1:]))
261
262 if header_token == '>ss_dssp':
263 hmm.dssp = structure.SecondaryStructure(seq)
264 elif header_token == '>sa_dssp':
265 hmm.dssp_solvent = seq
266 elif header_token == '>ss_pred':
267 psipred = seq
268 elif header_token == '>ss_conf':
269 conf = seq
270 hmm.psipred = structure.SecondaryStructure(psipred, conf)
271 elif header_token == '>Consens':
272 hmm.consensus = Sequence('Consensus', 'Consensus', seq)
273 else:
274 msa_entries.append(entry)
275
276 if msa_entries:
277 msa = '\n'.join(msa_entries)
278 hmm.alignment = A3MAlignment.parse(msa, strict=False)
279
280 return hmm
281
283 """
284 Parse the HMM profile.
285
286 @param hmm: the hmm object being constructed
287 @type hmm: L{ProfileHMM}
288 @return: the updated hmm
289 @rtype: L{ProfileHMM}
290
291 @raise NotImplementedError: when an unknown transition string is
292 encountered
293 """
294 assert self._chopped
295
296
297 hmm.start = State(States.Start)
298 hmm.end = State(States.End)
299
300 residues = None
301 background = {}
302 tran_types = None
303 tran_lines = []
304 start_probs = None
305
306 lines = iter(self._profile)
307 pattern = re.compile('^[A-Z\-]\s[0-9]+\s+')
308
309 if units == ScoreUnits.LogScales:
310
311 def parse_probability(v):
312 if v.strip() == '*':
313 return None
314 else:
315 return float(v)
316 else:
317
318 def parse_probability(v):
319 if v.strip() == '*':
320 return None
321 else:
322 return hmm._convert(units, float(v),
323 hmm.scale, hmm.logbase)
324
325
326
327 while True:
328 try:
329 line = next(lines)
330 except StopIteration:
331 break
332
333 if line.startswith('NULL'):
334 try:
335 backprobs = tuple(map(parse_probability, line.split()[1:]))
336
337 line = next(lines)
338 residues = line.split()[1:]
339 residues = [Enum.parse(ProteinAlphabet, aa) for aa in residues]
340
341 for pos, aa in enumerate(residues):
342 background[aa] = backprobs[pos]
343
344 line = next(lines)
345 tran_types = line.split()
346
347 line = next(lines)
348 start_probs = list(map(parse_probability, line.split()))
349 except StopIteration:
350 break
351
352 elif re.match(pattern, line):
353 emrow = line
354 try:
355 tran_lines.append(next(lines))
356
357 except StopIteration:
358 break
359
360 emprobs = emrow.split()
361 if len(emprobs) != 23:
362 raise HHProfileFormatError(
363 "Unexpected number of data fields: {0}".format(len(emprobs)))
364
365 rank = int(emprobs[1])
366 residue = structure.ProteinResidue(
367 rank=rank, type=emprobs[0], sequence_number=rank, insertion_code=None)
368 if residue.type == ProteinAlphabet.GAP:
369 raise HHProfileFormatError("Layer {0} can't be represented by a gap".format(rank))
370
371 new_layer = hmm.layers.append(HMMLayer(rank, residue))
372 if new_layer != rank:
373 raise HHProfileFormatError('Layer {0} defined as {1}'.format(new_layer, rank))
374
375 match = State(States.Match, emit=Enum.members(ProteinAlphabet))
376
377 match.rank = rank
378 match.background.set(background)
379
380 for col, aa in enumerate(residues):
381 prob = parse_probability(emprobs[col + 2])
382 match.emission.append(aa, prob)
383
384 hmm.layers[new_layer].append(match)
385 assert hmm.layers.last_index == match.rank
386
387
388
389
390 if len(hmm.layers) > 0:
391
392 first_match = hmm.layers[hmm.layers.start_index]
393
394 if start_probs[0] is None:
395 raise HHProfileFormatError("Transition Start > Match[1] is undefined")
396
397 start_tran = Transition(hmm.start, first_match[States.Match], start_probs[0])
398 hmm.start.transitions.append(start_tran)
399
400 if start_probs[1] is not None and start_probs[3] is not None:
401 start_ins = State(States.Insertion, emit=Enum.members(ProteinAlphabet))
402 start_ins.rank = 0
403 start_ins.background.set(background)
404 start_ins.emission = start_ins.background
405
406 hmm.start_insertion = start_ins
407
408 hmm.start.transitions.append(
409 Transition(hmm.start, hmm.start_insertion, start_probs[1]))
410
411 hmm.start_insertion.transitions.append(
412 Transition(hmm.start_insertion, first_match[States.Match], start_probs[3]))
413
414 if start_probs[4]:
415 hmm.start_insertion.transitions.append(
416 Transition(hmm.start_insertion, hmm.start_insertion, start_probs[4]))
417
418
419 if start_probs[2] is None and start_probs[6] is not None:
420
421 start_probs[2] = start_probs[6]
422
423 if start_probs[2] is not None:
424 start_del = State(States.Deletion)
425 start_del.rank = 1
426 hmm.layers[1].append(start_del)
427 start_tran = Transition(hmm.start, first_match[States.Deletion], start_probs[2])
428 hmm.start.transitions.append(start_tran)
429 else:
430 start_tran = Transition(hmm.start, hmm.end, start_probs[0])
431 hmm.start.transitions.append(start_tran)
432
433
434
435
436 for rank, fields in enumerate(tran_lines, start=hmm.layers.start_index):
437 assert hmm.layers[rank][States.Match].rank == rank
438
439 ofields = fields.split()
440 fields = tuple(map(parse_probability, ofields))
441
442
443 for col, neff in enumerate(tran_types[7:10], start=7):
444
445 if fields[col] is not None:
446 neff_value = float(ofields[col]) / abs(hmm.scale)
447
448 if neff == 'Neff':
449 hmm.layers[rank].effective_matches = neff_value
450
451 elif neff == 'Neff_I':
452 hmm.layers[rank].effective_insertions = neff_value
453
454 if States.Insertion not in hmm.layers[rank]:
455 insertion = State(States.Insertion, emit=Enum.members(ProteinAlphabet))
456 insertion.background.set(background)
457 insertion.emission.set(background)
458 insertion.rank = rank
459 hmm.layers[rank].append(insertion)
460
461 elif neff == 'Neff_D':
462 hmm.layers[rank].effective_deletions = neff_value
463
464 if States.Deletion not in hmm.layers[rank] and neff_value > 0:
465 deletion = State(States.Deletion)
466 deletion.rank = rank
467 hmm.layers[rank].append(deletion)
468
469
470 for col, tran in enumerate(tran_types):
471 probability = fields[col]
472
473 if probability is not None:
474 try:
475 self._add_transition(hmm, rank, tran, probability)
476 except (CollectionIndexError, ItemNotFoundError) as ex:
477 msg = "Can't add transition {0} at {1}: {2.__class__.__name__}, {2!s}"
478 raise HHProfileFormatError(msg.format(tran, rank, ex))
479
480 return hmm
481
483
484 match = hmm.layers[rank][States.Match]
485 nextmatch = None
486 if rank < hmm.layers.last_index:
487 nextmatch = hmm.layers[rank + 1][States.Match]
488 else:
489 nextmatch = hmm.end
490
491 if tran == 'M->M':
492 transition = Transition(match, nextmatch, probability)
493 match.transitions.append(transition)
494
495 elif tran == 'M->I':
496 insertion = hmm.layers[rank][States.Insertion]
497 transition = Transition(match, insertion, probability)
498 match.transitions.append(transition)
499
500 elif tran == 'M->D':
501 deletion = State(States.Deletion)
502 deletion.rank = rank + 1
503 hmm.layers[rank + 1].append(deletion)
504 transition = Transition(match, deletion, probability)
505 match.transitions.append(transition)
506
507 elif tran == 'I->M':
508 insertion = hmm.layers[rank][States.Insertion]
509 transition = Transition(insertion, nextmatch, probability)
510 insertion.transitions.append(transition)
511
512 elif tran == 'I->I':
513 insertion = hmm.layers[rank][States.Insertion]
514 selfloop = Transition(insertion, insertion, probability)
515 insertion.transitions.append(selfloop)
516
517 elif tran == 'D->M':
518 deletion = hmm.layers[rank][States.Deletion]
519 transition = Transition(deletion, nextmatch, probability)
520 deletion.transitions.append(transition)
521
522 elif tran == 'D->D':
523 deletion = hmm.layers[rank][States.Deletion]
524
525 if States.Deletion not in hmm.layers[rank + 1]:
526 nextdeletion = State(States.Deletion)
527 nextdeletion.rank = rank + 1
528 hmm.layers[rank + 1].append(nextdeletion)
529
530 else:
531 nextdeletion = hmm.layers[rank + 1][States.Deletion]
532 assert match.transitions[States.Deletion].successor == nextdeletion
533
534 transition = Transition(deletion, nextdeletion, probability)
535 deletion.transitions.append(transition)
536
537 else:
538 if not tran.startswith('Neff'):
539 raise NotImplementedError('Unknown transition: {0}'.format(tran))
540
581
582 HHpredProfileParser = HHProfileParser
586 """
587 Builder for HHpred's hhm files.
588
589 @param output: destination stream
590 @type output: file
591 """
592
594
595 if not hasattr(output, 'write'):
596 raise TypeError(output)
597
598 self._out = output
599
600 @property
602 """
603 Destination stream
604 @rtype: stream
605 """
606 return self._out
607
610
614
616 """
617 Append a new HMM to the destination stream.
618
619 @param hmm: profile HMM to serialize
620 @type hmm: L{ProfileHMM}
621 """
622
623 if hmm.score_units != ScoreUnits.LogScales:
624 raise ValueError('Scores must be converted to LogScales first.')
625
626 self.writeline('''HHsearch {0.version}
627 NAME {0.name}
628 FAM {0.family}
629 LENG {0.length.matches} match states, {0.length.layers} columns in multiple alignment
630 NEFF {0.effective_matches}
631 PCT {0.pseudocounts}'''.format(hmm))
632 if hmm.evd:
633 self.writeline('EVD {0.lamda} {0.mu}'.format(hmm.evd))
634
635 self.writeline('SEQ')
636 if hmm.dssp:
637 self.writeline('>ss_dssp')
638 self.writeline(hmm.dssp.to_string())
639 if hmm.dssp_solvent:
640 self.writeline('>sa_dssp')
641 self.writeline(hmm.dssp_solvent)
642 if hmm.psipred:
643 self.writeline('>ss_pred')
644 self.writeline(hmm.psipred.to_string())
645 self.writeline('>ss_conf')
646 confidence = [''.join(map(str, m.score)) for m in hmm.psipred]
647 self.writeline(''.join(confidence))
648
649 if hmm.alignment:
650 if hmm.consensus:
651 self.writeline(str(hmm.consensus))
652 self.writeline(hmm.alignment.format().rstrip('\r\n'))
653
654 self.writeline('#')
655
656 first_match = hmm.layers[1][States.Match]
657 null = [int(first_match.background[aa])
658 for aa in sorted(map(str, first_match.background))]
659 self.writeline('NULL {0}'.format('\t'.join(map(str, null))))
660 self.writeline('HMM {0}'.format(
661 '\t'.join(sorted(map(str, first_match.emission)))))
662
663 tran_types = 'M->M M->I M->D I->M I->I D->M D->D'.split()
664 self.writeline(' {0}'.format(
665 '\t'.join(tran_types + 'Neff Neff_I Neff_D'.split())))
666
667 self.write(" ")
668 for tran_type in tran_types:
669 source_statekind = Enum.parse(States, tran_type[0])
670 target_statekind = Enum.parse(States, tran_type[3])
671 if source_statekind == States.Match:
672 try:
673 self.write("{0:<7}\t".format(
674 int(hmm.start.transitions[target_statekind].probability)))
675 except TransitionNotFoundError:
676 self.write("*\t")
677 else:
678 self.write("*\t")
679 self.writeline('*\t' * 3)
680
681 for layer in hmm.layers:
682
683 self.write("{0} {1:<5}".format(layer.residue.type, layer.rank))
684 for aa in sorted(layer[States.Match].emission):
685 emission = layer[States.Match].emission[aa]
686 if emission is None:
687 emission = '*'
688 else:
689 emission = int(emission)
690 self.write("{0:<7}\t".format(emission))
691 self.writeline("{0}".format(layer.rank))
692
693 self.write(" ")
694 for tran_type in tran_types:
695 source_statekind = Enum.parse(States, tran_type[0])
696 target_statekind = Enum.parse(States, tran_type[3])
697
698 if target_statekind == States.Match and layer.rank == hmm.layers.last_index:
699 target_statekind = States.End
700
701 try:
702 state = layer[source_statekind]
703 self.write("{0:<7}\t".format(
704 int(state.transitions[target_statekind].probability)))
705 except StateNotFoundError:
706 self.write("*\t")
707
708 for data in (layer.effective_matches, layer.effective_insertions,
709 layer.effective_deletions):
710 if data is None:
711 data = '*'
712 else:
713 data = int(data * abs(hmm.scale))
714 self.write("{0:<7}\t".format(data))
715
716 self.writeline("\n")
717
718 self.writeline('//')
719
722 """
723 Parser for HHsearch result (*.hhr) files.
724
725 @param alignments: if set to False, the parser will skip the
726 alignments section of the file
727 @type alignments: bool
728 """
729
734
736 return "<HHsearch Result Parser>"
737
738 @property
740 """
741 True if hit alignments will be parsed
742 @rtype: bool
743 """
744 return self._alignments
745 @alignments.setter
747 self._alignments = bool(value)
748
749 - def parse_file(self, hhr_file, header_only=False):
750 """
751 Parse all hits from this HHpred result file.
752
753 @param hhr_file: input HHR file name
754 @type hhr_file: str
755
756 @return: parsed hits
757 @rtype: HHpredHitList
758
759 @raise HHOutputFormatError: if the hhr file is corrupt
760 """
761
762 with open(os.path.expanduser(hhr_file)) as stream:
763 return self._parse(stream, header_only)
764
766 """
767 Get all hits from an C{output} string.
768
769 @param output: HHpred standard output
770 @type output: str
771
772 @return: parsed hits
773 @rtype: HHpredHitList
774
775 @raise HHOutputFormatError: if the output is corrupt
776 """
777 stream = csb.io.MemoryStream()
778 stream.write(output)
779 stream.seek(0)
780
781 return self._parse(stream, header_only)
782
783 - def _parse(self, stream, header_only):
784
785 qlen = None
786 in_hits = False
787 in_alis = False
788 has_alis = False
789 c_rank = 0
790 header = {}
791 hits = {}
792 alis = {}
793
794 for line in stream:
795
796 if not in_hits and not in_alis:
797
798 if line.replace(' ', '').startswith('NoHitProbE-value'):
799 in_hits = True
800 continue
801 elif line.strip() == '':
802 continue
803 else:
804 columns = line.strip().split(None, 1)
805 if len(columns) == 2:
806
807 identifier, data = columns
808 if identifier in ('Query', 'Command'):
809 data = data.strip()
810 elif identifier == 'Neff':
811 data = float(data)
812 elif identifier in ('Searched_HMMs', 'Match_columns'):
813 data = int(data)
814
815 header[identifier] = data
816
817 if identifier == 'Match_columns':
818 qlen = data
819
820 if in_hits and not header_only:
821 if not line.strip():
822 in_hits = False
823 in_alis = True
824 if self.alignments:
825 continue
826 else:
827 break
828 elif line.strip() == 'Done':
829 in_hits = False
830 in_alis = False
831 break
832
833 description = line[:34].split()
834 rank = int(description[0])
835 id = description[1]
836
837 pos = line[85:94].strip()
838 start, end = map(int, pos.split('-'))
839
840 qpos = line[75:84].strip()
841 qstart, qend = map(int, qpos.split('-'))
842
843 probability = float(line[35:40]) / 100.0
844
845 hit = HHpredHit(rank, id, start, end, qstart, qend, probability, qlen)
846
847 hit.evalue = float(line[41:48])
848 hit.pvalue = float(line[49:56])
849 hit.score = float(line[57:63])
850 hit.ss_score = float(line[64:69])
851
852 hit.slength = int(line[94:].replace('(', '').replace(')', ''))
853
854 hits[hit.rank] = hit
855 alis[hit.rank] = {'q': [], 's': []}
856
857 elif in_alis and not header_only:
858 if line.startswith('Done'):
859 in_alis = False
860 break
861
862 elif line.startswith('No '):
863 c_rank = int(line[3:])
864 if c_rank not in hits:
865 raise HHOutputFormatError('Alignment {0}. refers to a non-existing hit'.format(c_rank))
866
867 elif line.startswith('>'):
868 hits[c_rank].name = line[1:].strip()
869
870 elif line.startswith('Probab='):
871 for pair in line.split():
872 key, value = pair.split('=')
873 if key == 'Identities':
874 hits[c_rank].identity = float(
875 value.replace('%', ''))
876 elif key == 'Similarity':
877 hits[c_rank].similarity = float(value)
878 elif key == 'Sum_probs':
879 hits[c_rank].prob_sum = float(value)
880
881 elif line.startswith('Q ') and not line[:11].rstrip() in ('Q Consensus', 'Q ss_pred','Q ss_conf', 'Q ss_dssp'):
882 for residue in line[22:]:
883 if residue.isspace() or residue.isdigit():
884 break
885 else:
886 alis[c_rank]['q'].append(residue)
887 has_alis = True
888
889 elif line.startswith('T ') and not line[:11].rstrip() in ('T Consensus', 'T ss_pred','T ss_conf', 'T ss_dssp'):
890 for residue in line[22:]:
891 if residue.isspace() or residue.isdigit():
892 break
893 else:
894 alis[c_rank]['s'].append(residue)
895
896 if self.alignments and has_alis:
897 for rank in alis:
898 try:
899 hits[rank].add_alignment(alis[rank]['q'], alis[rank]['s'])
900
901 except (KeyError, ValueError) as er:
902 raise HHOutputFormatError('Corrupt alignment at hit No {0}.\n {1}'.format(rank, er))
903
904 del alis
905
906 hits = HHpredHitList(hits.values())
907
908 hits.sort()
909
910
911 for identifier, data in header.items():
912 if identifier == 'Query':
913 hits.query_name = data
914 elif identifier == 'Match_columns':
915 hits.match_columns = data
916 elif identifier == 'No_of_seqs':
917 hits.no_of_seqs = data
918 elif identifier == 'Neff':
919 hits.neff = data
920 elif identifier == 'Searched_HMMs':
921 hits.searched_hmms = data
922 elif identifier == 'Date':
923 hits.date = data
924 elif identifier == 'Command':
925 hits.command = data
926
927 return hits
928
929 HHpredOutputParser = HHOutputParser
930