1  """ 
   2  Sequence and sequence alignment APIs. 
   3   
   4  This module defines the base interfaces for biological sequences and alignments: 
   5  L{AbstractSequence} and L{AbstractAlignment}. These are the central abstractions 
   6  here. This module provides also a number of useful enumerations, like L{SequenceTypes} 
   7  and L{SequenceAlphabets}. 
   8   
   9  Sequences 
  10  ========= 
  11  L{AbstractSequence} has a number of implementations. These are of course interchangeable, 
  12  but have different intents and may differ significantly in performance. The standard 
  13  L{Sequence} implementation is what you are after if all you need is high performance 
  14  and efficient storage (e.g. when you are parsing big files). L{Sequence} objects store 
  15  their underlying sequences as strings. L{RichSequence}s on the other hand will store 
  16  their residues as L{ResidueInfo} objects, which have the same basic interface as the  
  17  L{csb.bio.structure.Residue} objects. This of course comes at the expense of degraded 
  18  performance. A L{ChainSequence} is a special case of a rich sequence, whose residue 
  19  objects are I{actually} real L{csb.bio.structure.Residue}s. 
  20   
  21  Basic usage: 
  22   
  23      >>> seq = RichSequence('id', 'desc', 'sequence', SequenceTypes.Protein) 
  24      >>> seq.residues[1] 
  25      <ResidueInfo [1]: SER> 
  26      >>> seq.dump(sys.stdout) 
  27      >desc 
  28      SEQUENCE 
  29   
  30  See L{AbstractSequence} for details.     
  31   
  32  Alignments 
  33  ========== 
  34  L{AbstractAlignment} defines a table-like interface to access the data in an 
  35  alignment: 
  36   
  37      >>> ali = SequenceAlignment.parse(">a\\nABC\\n>b\\nA-C") 
  38      >>> ali[0, 0] 
  39      <SequenceAlignment>   # a new alignment, constructed from row #1, column #1 
  40      >>> ali[0, 1:3] 
  41      <SequenceAlignment>   # a new alignment, constructed from row #1, columns #2..#3 
  42   
  43  which is just a shorthand for using the standard 1-based interface: 
  44   
  45      >>> ali.rows[1] 
  46      <AlignedSequenceAdapter: a, 3>                        # row #1 (first sequence) 
  47      >>> ali.columns[1] 
  48      (<ColumnInfo a [1]: ALA>, <ColumnInfo b [1]: ALA>)    # residues at column #1 
  49   
  50  See L{AbstractAlignment} for all details and more examples. 
  51   
  52  There are a number of L{AbstractAlignment} implementations defined here. 
  53  L{SequenceAlignment} is the default one, nothing surprising. L{A3MAlignment} 
  54  is a more special one: the first sequence in the alignment is a master sequence. 
  55  This alignment is usually used in the context of HHpred. More important is the 
  56  L{StructureAlignment}, which is an alignment of L{csb.bio.structure.Chain} objects. 
  57  The residues in every aligned sequence are really the L{csb.bio.structure.Residue} 
  58  objects taken from those chains. 
  59  """ 
  60   
  61  import re 
  62  import csb.core 
  63  import csb.io 
  64   
  65  from abc import ABCMeta, abstractmethod, abstractproperty 
  73   
  79   
  81      """ 
  82      Enumeration of alignment strategies 
  83      """ 
  84      Global='global'; Local='local' 
   85   
  92       
 100                                       
 107       
 109      """ 
 110      Unknown sequence alphabet 
 111      """   
 112      UNK='X'; GAP='-'; INSERTION='.' 
  113      
 155   
 159   
 161       
 162 -    def __init__(self, index=None, start=1, end=None): 
  172           
 174           
 175          if self.index is not None: 
 176              s = 'Position {0.index} is out of range [{0.start}, {0.end}]' 
 177          else: 
 178              s = 'Out of range [{0.start}, {0.end}]' 
 179               
 180          return s.format(self)             
   181           
 184   
 187        
 190   
 193                   
 195           
 197           
 198          self._type = None     
 199          self._rank = rank 
 200           
 201          self.type = type 
  202                   
 203      @property 
 205          """ 
 206          Residue type - a member of any sequence alphabet 
 207          @rtype: enum item 
 208          """ 
 209          return self._type 
  210      @type.setter 
 211 -    def type(self, type): 
  215           
 216      @property 
 218          """ 
 219          Residue position (1-based) 
 220          @rtype: int 
 221          """ 
 222          return self._rank 
  223       
 225          return '<{1} [{0.rank}]: {0.type!r}>'.format(self, self.__class__.__name__) 
   226   
 229       
 230 -    def __init__(self, column, id, rank, residue): 
  236   
 238          return '<{0.__class__.__name__} {0.id} [{0.column}]: {0.residue.type!r}>'.format(self)                 
   239       
 241       
 243          self._container = container 
  244   
 251       
 253          return iter(self._container) 
   254               
 267   
 276       
 279      """ 
 280      Represents a list of L{AbstractSequence}s. 
 281      """ 
 282   
 285   
  300   
 303      """ 
 304      Base abstract class for all Sequence objects. 
 305       
 306      Provides 1-based access to the residues in the sequence via the 
 307      sequence.residues property. The sequence object itself also behaves like 
 308      a collection and provides 0-based access to its elements (residues).    
 309           
 310      @param id: FASTA ID of this sequence (e.g. accession number) 
 311      @type id: str 
 312      @param header: FASTA sequence header 
 313      @type header: str 
 314      @param residues: sequence residues 
 315      @type residues: str or collection of L{ResidueInfo} 
 316      @param type: a L{SequenceTypes} member (defaults to protein) 
 317      @type type: L{EnumItem} 
 318      """ 
 319        
 320      __metaclass__ = ABCMeta 
 321       
 322      DELIMITER = '>' 
 323   
 337       
 339           
 340          if isinstance(spec, slice): 
 341              spec = SliceHelper(spec, 0, self.length) 
 342              return self.subregion(spec.start + 1, spec.stop) 
 343          else: 
 344              if not 0 <= spec < self.length: 
 345                  raise IndexError(spec)             
 346              return self._get(spec + 1) 
  347       
 349          for index in range(self.length): 
 350              yield self[index] 
  351           
 352      @abstractmethod 
 353 -    def _add(self, residue): 
  354          """ 
 355          Append a C{residue} to the sequence. 
 356           
 357          This is a hook method invoked internally for each residue during object 
 358          construction. By implementing this method, sub-classes define how 
 359          residues are attached to the sequence object.    
 360          """ 
 361          pass 
  362   
 363      @abstractmethod 
 364 -    def _get(self, rank): 
  365          """ 
 366          Retrieve the sequence residue at the specified position (1-based, positive). 
 367           
 368          This is a hook method which defines the actual behavior of the sequence 
 369          residue indexer. 
 370             
 371          @rtype: L{ResidueInfo} 
 372          @raise SequencePositionError: when the supplied rank is out of range 
 373          """ 
 374          pass 
  375       
 377          """ 
 378          Return a new sequence of the current L{AbstractSequence} sub-class. 
 379          """ 
 380          return self.__class__(*a, **k)     
  381   
 383          """ 
 384          Remove all gaps and insertions from the sequence. 
 385           
 386          @return: a new sequence instance, containing no gaps 
 387          @rtype: L{AbstractSequence} 
 388          """ 
 389          residues = [r for r in self._residues  
 390                      if r.type not in (self.alphabet.GAP, self.alphabet.INSERTION)] 
 391           
 392          return self._factory(self.id, self.header, residues, self.type) 
  393               
 395          """ 
 396          Extract a subsequence, defined by [start, end]. The start and end 
 397          positions are 1-based, inclusive. 
 398           
 399          @param start: start position 
 400          @type start: int 
 401          @param end: end position 
 402          @type end: int 
 403           
 404          @return: a new sequence 
 405          @rtype: L{AbstractSequence} 
 406           
 407          @raise SequencePositionError: if start/end positions are out of range 
 408          """ 
 409          positions = range(start, end + 1) 
 410          return self.extract(positions) 
  411       
 413          """ 
 414          Extract a subsequence, defined by a list of 1-based positions. 
 415           
 416          @param positions: positions to extract 
 417          @type positions: tuple of int 
 418           
 419          @return: a new sequence 
 420          @rtype: L{AbstractSequence} 
 421           
 422          @raise SequencePositionError: if any position is out of range 
 423          """ 
 424   
 425          end = self.length 
 426          residues = [] 
 427           
 428          for rank in sorted(set(positions)): 
 429              if 1 <= rank <= end: 
 430                  residues.append(self._get(rank)) 
 431              else: 
 432                  raise SequencePositionError(rank, 1, end) 
 433               
 434          return self._factory(self.id, self.header, residues, self.type) 
  435       
 436 -    def dump(self, output_file): 
  447           
 448      @property 
 450          """ 
 451          Number of residues 
 452          @rtype: int 
 453          """ 
 454          return len(self._residues) 
  455       
 456      @property 
 458          """ 
 459          Sequence identifier 
 460          @rtype: str 
 461          """         
 462          return self._id 
  463      @id.setter 
 464 -    def id(self, value): 
  468               
 469      @property 
 471          """ 
 472          Sequence description 
 473          @rtype: str 
 474          """         
 475          return self._header 
  476      @header.setter 
 483       
 484      @property   
 486          """ 
 487          Sequence type - a member of L{SequenceTypes} 
 488          @rtype: enum item 
 489          """ 
 490          return self._type 
  491      @type.setter 
 492 -    def type(self, value): 
  498   
 499      @property 
 501          """ 
 502          The actual sequence 
 503          @rtype: str 
 504          """ 
 505          return ''.join([str(r.type) for r in self._residues]) 
  506       
 507      @property 
 509          """ 
 510          The sequence alphabet corresponding to the current sequence type 
 511          @rtype: L{csb.core.enum} 
 512          """ 
 513          return SequenceAlphabets.get(self._type) 
  514       
 515      @property 
 517          """ 
 518          Rank-based access to the underlying L{residues<csb.bio.sequence.ResidueInfo>} 
 519          @rtype: L{SequenceIndexer} 
 520          """ 
 521          return SequenceIndexer(self) 
  522   
 525       
 527          return '<{0.__class__.__name__}: {0.id}, {0.length} residues>'.format(self) 
  528       
 531           
 533      """ 
 534      High-performance sequence object. The actual sequence is stored internally 
 535      as a string. The indexer acts as a residue factory, which creates a new 
 536      L{ResidueInfo} instance each time.  
 537       
 538      @note: This class was created with parsing large volumes of data in mind. This 
 539             comes at the expense of degraded performance of the sequence indexer. 
 540       
 541      @param id: FASTA ID of this sequence (e.g. accession number) 
 542      @type id: str 
 543      @param header: FASTA sequence header 
 544      @type header: str 
 545      @param residues: sequence string 
 546      @type residues: str 
 547      @param type: a L{SequenceTypes} member (defaults to protein) 
 548      @type type: L{EnumItem} 
 549      """ 
 550       
 552   
 553          self._id = None 
 554          self._header = None 
 555          self._residues = '' 
 556          self._type = None 
 557             
 558          self.id = id 
 559          self.header = header 
 560          self.type = type         
 561   
 562          self._append(residues) 
  563       
 565           
 566          self._residues += re.sub('([^\w\-\.])+', '', string) 
  567           
 568 -    def _add(self, char): 
  570               
 571 -    def _get(self, rank):         
  572           
 573          type = csb.core.Enum.parse(self.alphabet, self._residues[rank - 1]) 
 574          return ResidueInfo(rank, type) 
  575       
 581   
 589   
 602               
 603      @property 
 605          return self._residues 
  606   
 608      """ 
 609      Sequence implementation, which converts the sequence into a list of 
 610      L{ResidueInfo} objects. See L{AbstractSequence} for details. 
 611      """ 
 612           
 613 -    def _add(self, residue): 
  624               
 625 -    def _get(self, rank): 
  626          return self._residues[rank - 1] 
  627   
 628      @staticmethod 
  638   
 640      """ 
 641      Sequence view for L{csb.bio.structure.Chain} objects. 
 642      See L{AbstractSequence} for details. 
 643      """ 
 644               
 645 -    def _add(self, residue): 
  651               
 652 -    def _get(self, rank): 
  653          return self._residues[rank - 1] 
  654       
 655      @staticmethod 
  665   
 668      """ 
 669      Base wrapper class for L{AbstractSequence} objects. 
 670      Needs to be sub-classed (does not do anything special on its own). 
 671       
 672      @param sequence: adaptee 
 673      @type sequence: L{AbstractSequence} 
 674      """ 
 675       
 682   
 684          return self._subject[i] 
  685       
 687          return iter(self._subject) 
  688                   
 690          return '<{0.__class__.__name__}: {0.id}, {0.length}>'.format(self)         
  691       
 693          return str(self._subject) 
  694       
 696          raise NotImplementedError() 
  697       
 698 -    def _get(self, rank): 
  699          return self._subject._get(rank) 
  700       
 702          return self.__class__(self._subject._factory(*a, **k)) 
  703       
 705          return self._subject.strip() 
  706               
 709       
 711          return self._subject.extract(positions)     
  712   
 713      @property 
 715          return self._subject.id 
  716   
 717      @property 
 719          return self._subject.length 
  720   
 721      @property 
 723          return self._subject.type 
  724   
 725      @property 
 727          return self._subject.header 
  728       
 729      @property 
 732                       
 733      @property 
 736   
 738      """ 
 739      Adapter, which wraps a gapped L{AbstractSequence} object and makes it 
 740      compatible with the MSA row/entry interface, expected by L{AbstractAlignment}. 
 741       
 742      The C{adapter.residues} property operates with an L{UngappedSequenceIndexer}, 
 743      which provides a gap-free view of the underlying sequence. 
 744       
 745      The C{adapter.columns} property operates with a standard L{ColumnIndexer}, 
 746      the same indexer which is used to provide the column view in multiple  
 747      alignments. Adapted sequences therefore act as alignment rows and allow for 
 748      MSA-column-oriented indexing. 
 749       
 750      @param sequence: adaptee 
 751      @type sequence: L{AbstractSequence}     
 752      """ 
 753   
 770   
 772          if not 0 <= index < self.length: 
 773              raise IndexError(index) 
 774          return self._get_column(index + 1) 
  775       
 777          for c in sorted(self._fmap): 
 778              yield self._get_column(c) 
  779                       
 780      @property 
 782          """ 
 783          Provides 1-based access to the respective columns in the MSA. 
 784          @rtype: L{ColumnIndexer} 
 785          """         
 786          return ColumnIndexer(self) 
  787   
 788      @property 
 790          """ 
 791          Provides 1-based access to the residues of the unaligned (ungapped) 
 792          sequence. 
 793          @rtype: L{UngappedSequenceIndexer}  
 794          """ 
 795          return UngappedSequenceIndexer(self) 
  796   
 800               
 803       
 805          """ 
 806          Return the MSA column number corresponding to the specified ungapped 
 807          sequence C{rank}. 
 808           
 809          @param rank: 1-based residue rank 
 810          @type rank: int 
 811          @rtype: int 
 812          """ 
 813          return self._rmap[rank] 
  814       
 816          """ 
 817          Return the ungapped sequence rank corresponding to the specified MSA 
 818          C{column} number. 
 819           
 820          @param column: 1-based alignment column number 
 821          @type column: int 
 822          @rtype: int 
 823          """         
 824          return self._fmap[column]     
   825       
 827       
 828 -    def __init__(self, slice, start=0, stop=0): 
  829           
 830          s, e, t = slice.start, slice.stop, slice.step 
 831           
 832          if s is None: 
 833              s = start 
 834          if e is None: 
 835              e = stop 
 836          if t is None: 
 837              t = 1 
 838               
 839          for value in [s, e, t]: 
 840              if value < 0: 
 841                  raise IndexError(value) 
 842               
 843          self.start = s 
 844          self.stop = e 
 845          self.step = t             
   846   
 884           
 887      """ 
 888      Base class for all alignment objects. 
 889       
 890      Provides 1-based access to the alignment.rows and alignment.columns. 
 891      Alignment rows can also be accessed by sequence ID. In addition, all 
 892      alignments support 0-based slicing: 
 893       
 894          >>> alignment[rows, columns] 
 895          AbstractAlignment (sub-alignment) 
 896           
 897      where 
 898          - C{rows} can be a slice, tuple of row indexes or tuple of sequence IDs 
 899          - columns can be a slice or tuple of column indexes 
 900           
 901      For example: 
 902       
 903          >>> alignment[:, 2:] 
 904          AbstractAlignment     # all rows, columns [3, alignment.length] 
 905          >>> alignment[(0, 'seqx'), (3, 5)] 
 906          AbstractAlignment     # rows #1 and 'seq3', columns #4 and #5 
 907           
 908      @param sequences: alignment entries (must have equal length) 
 909      @type sequences: list of L{AbstractSequence}s 
 910      @param strict: if True, raise {DuplicateSequenceError} when a duplicate ID 
 911                     is found (default=True) 
 912      @type strict: bool 
 913       
 914      @note: if C{strict} is False and there are C{sequences} with redundant identifiers, 
 915             those sequences will be added to the C{rows} collection with :An suffix, 
 916             where n is a serial number. Therefore, rows['ID'] will return only one sequence, 
 917             the first sequence with id=ID. All remaining sequences can be retrieved 
 918             with C{rows['ID:A1']}, {rows['ID:A2']}, etc. However, the sequence objects will 
 919             remain intact, e.g. {rows['ID:A1'].id} still returns 'ID' and not 'ID:A1'.  
 920      """ 
 921       
 922      __metaclass__ = ABCMeta 
 923       
 924 -    def __init__(self, sequences, strict=True): 
  925           
 926          self._length = None 
 927          self._msa = AlignmentRowsTable(self) 
 928          self._colview = ColumnIndexer(self) 
 929          self._map = {} 
 930          self._strict = bool(strict) 
 931           
 932          self._construct(sequences) 
  933               
 935           
 936           
 937           
 938           
 939           
 940          if not isinstance(spec, tuple) or len(spec) not in (1, 2): 
 941              raise TypeError('Invalid alignment slice expression') 
 942           
 943          if len(spec) == 2: 
 944              rowspec, colspec = spec 
 945          else: 
 946              rowspec, colspec = [spec, slice(None)] 
 947   
 948           
 949          if isinstance(rowspec, slice): 
 950              if isinstance(rowspec.start, csb.core.string) or isinstance(rowspec.stop, csb.core.string): 
 951                  raise TypeError("Invalid row slice: only indexes are supported") 
 952              rowspec = SliceHelper(rowspec, 0, self.size) 
 953              rows = range(rowspec.start + 1, rowspec.stop + 1) 
 954          elif isinstance(rowspec, int): 
 955              rows = [rowspec + 1]      
 956          elif csb.core.iterable(rowspec): 
 957              try: 
 958                  rows = [] 
 959                  for r in rowspec: 
 960                      if isinstance(r, int): 
 961                          rows.append(r + 1) 
 962                      else: 
 963                          rows.append(self._map[r]) 
 964              except KeyError as ke: 
 965                  raise KeyError('No such Sequence ID: {0!s}'.format(ke)) 
 966          else: 
 967                  raise TypeError('Unsupported row expression')             
 968   
 969           
 970          if isinstance(colspec, slice): 
 971              colspec = SliceHelper(colspec, 0, self._length or 0) 
 972              cols = range(colspec.start + 1, colspec.stop + 1) 
 973          elif isinstance(colspec, int): 
 974              cols = [colspec + 1]  
 975          elif csb.core.iterable(colspec): 
 976              try: 
 977                  cols = [ c + 1 for c in colspec ] 
 978              except:             
 979                  raise TypeError('Unsupported column expression') 
 980          else: 
 981              raise TypeError('Unsupported column expression') 
 982           
 983           
 984          if len(rows) == 0: 
 985              raise ValueError("The expression returns zero rows") 
 986          if len(cols) == 0: 
 987              raise ValueError("The expression returns zero columns") 
 988                   
 989           
 990          return self._extract(rows, cols) 
  991       
 992 -    def _range(self, slice, start, end): 
  993           
 994          s, e, t = slice.start, slice.end, slice.step 
 995           
 996          if s is None: 
 997              s = start 
 998          if e is None: 
 999              e = end 
1000          if t is None: 
1001              t = 1 
1002               
1003          return range(s, e, t) 
 1004       
1006          for cn in range(1, self.length + 1): 
1007              yield self._get_column(cn) 
 1008               
1009      @abstractmethod 
1011          """ 
1012          Hook method, called internally upon object construction. Subclasses 
1013          define how the source alignment sequences are handled during alignment 
1014          construction. 
1015           
1016          @param sequences: alignment entries 
1017          @type sequences: list of L{AbstractSequence}s 
1018          """ 
1019          pass 
 1020       
1022          """ 
1023          Hook method, which is used to initialize various alignment properties 
1024          (such as length) from the first alignned sequence. 
1025          """ 
1026          if rep_sequence.length == 0: 
1027              raise SequenceError("Sequence '{0}' is empty".format(rep_sequence.id)) 
1028                   
1029          assert self._length is None 
1030          self._length = rep_sequence.length  
 1031           
1033          """ 
1034          Return a new sequence of the current L{AbstractAlignment} sub-class. 
1035          """  
1036          return self.__class__(*a, **k) 
 1037         
1038 -    def add(self, sequence): 
 1058   
1059      @property 
1061          """ 
1062          Number of columns in the alignment 
1063          @rtype: int 
1064          """ 
1065          return self._length or 0 
 1066   
1067      @property 
1069          """ 
1070          Number of rows (sequences) in the alignment 
1071          @rtype: int 
1072          """         
1073          return self._msa.length     
 1074               
1075      @property 
1077          """ 
1078          1-based access to the alignment entries (sequences) 
1079          @rtype: L{AlignmentRowsTable} 
1080          """ 
1081          return self._msa  
 1082           
1083      @property 
1085          """ 
1086          1-based access to the alignment columns 
1087          @rtype: L{ColumnIndexer} 
1088          """ 
1089          return self._colview 
 1090       
1092          """ 
1093          Return True of C{column} contains at least one gap. 
1094          @param column: column number, 1-based 
1095          @type column: int 
1096           
1097          @rtype: bool 
1098          """ 
1099           
1100          for row in self._msa: 
1101              if row.columns[column].residue.type == row.alphabet.GAP: 
1102                  return True 
1103               
1104          return False         
 1105       
1107          return tuple(row._get_column(column) for row in self.rows) 
 1108       
1110           
1111          rows = set(rows) 
1112          cols = set(cols) 
1113                   
1114          if not 1 <= min(rows) <= max(rows) <= self.size: 
1115              raise IndexError('Row specification out of range') 
1116                   
1117          if not 1 <= min(cols) <= max(cols) <= self.length: 
1118              raise IndexError('Column specification out of range') 
1119           
1120          sequences = [] 
1121           
1122          for rn, row in enumerate(self.rows, start=1): 
1123              if rn in rows: 
1124                  sequences.append(row.extract(cols)) 
1125                   
1126          return self._factory(sequences, strict=self._strict) 
 1127       
1129          """ 
1130          Extract a sub-alignment, ranging from C{start} to C{end} columns. 
1131           
1132          @param start: starting column, 1-based 
1133          @type start: int 
1134          @param end: ending column, 1-based 
1135          @type end: int 
1136           
1137          @return: a new alignment of the current type 
1138          @rtype: L{AbstractAlignment} 
1139           
1140          @raise ColumnPositionError: if start/end is out of range  
1141          """ 
1142          if not 1 <= start <= end <= self.length: 
1143              raise ColumnPositionError(None, 1, self.length) 
1144           
1145          sequences = [] 
1146           
1147          for row in self.rows: 
1148              sequences.append(row.subregion(start, end)) 
1149                   
1150          return self._factory(sequences, strict=self._strict) 
 1151       
 1175   
1177      """ 
1178      Multiple sequence alignment. See L{AbstractAlignment} for details. 
1179      """ 
1180           
1185               
1186      @staticmethod 
1187 -    def parse(string, strict=True): 
  1200           
1202      """ 
1203      Multiple structure alignment. Similar to a L{SequenceAlignment}, but 
1204      the alignment holds the actual L{csb.bio.structure.ProteinResidue} objects, 
1205      taken from the corresponding source L{csb.bio.structure.Chain}s. 
1206       
1207      See L{AbstractAlignment} for details. 
1208      """ 
1209           
1214               
1215      @staticmethod 
1216 -    def parse(string, provider, id_factory=None, strict=True): 
 1217          """ 
1218          Create a new L{StructureAlignment} from an mFASTA string. See  
1219          L{csb.bio.io.fasta.StructureAlignmentFactory} for details.  
1220           
1221          @param string: MSA-formatted string 
1222          @type string: str 
1223          @param provider: data source for all structures found in the alignment 
1224          @type provider: L{csb.bio.io.wwpdb.StructureProvider} 
1225          @param strict: see L{AbstractAlignment} 
1226          @type strict: bool 
1227          @param id_factory: callable factory, which transforms a sequence ID into 
1228                             a L{csb.bio.io.wwpdb.EntryID} object. By default 
1229                             this is L{csb.bio.io.wwpdb.EntryID.create}.  
1230          @type id_factory: callable         
1231          @rtype: L{StructureAlignment} 
1232          """ 
1233          from csb.bio.io.fasta import StructureAlignmentFactory 
1234           
1235          factory = StructureAlignmentFactory( 
1236                          provider, id_factory=id_factory, strict=strict) 
1237          return factory.make_alignment(string) 
  1238       
1240      """ 
1241      A specific type of multiple alignment, which provides some operations 
1242      relative to a master sequence (the first entry in the alignment).  
1243      """ 
1244       
1245 -    def __init__(self, sequences, strict=True): 
 1252   
1254           
1255          super(A3MAlignment, self)._initialize(rep_sequence) 
1256          self._alphabet = rep_sequence.alphabet         
 1257               
1273       
1274      @property 
1276          """ 
1277          The master sequence 
1278          @rtype: L{AbstractSequence} 
1279          """ 
1280          return self._master 
 1281       
1283          """ 
1284          Return True of C{column} contains at least one insertion. 
1285           
1286          @param column: column number, 1-based 
1287          @type column: int 
1288          @rtype: bool 
1289          """         
1290          return column in self._insertions 
 1291       
1305   
1308       
1309      @property 
1311          """ 
1312          Number of match states (residues in the ungapped master). 
1313          @rtype: int 
1314          """ 
1315          return self._matches 
 1316       
1317      @staticmethod 
1318 -    def parse(string, strict=True): 
  1331