Package csb :: Package bio :: Package sequence :: Module alignment
[frames] | no frames]

Source Code for Module csb.bio.sequence.alignment

  1  """ 
  2  Collection of sequence alignment algorithms. 
  3   
  4  @note: The classes in this module have been optimized for performance. 
  5         Think twice before switching a field to a generally nicer property 
  6         access, because it turns out that these things often add significant 
  7         constants to the running time of a dynamic programming algorithm. 
  8  """ 
  9   
 10  from csb.bio.sequence import AbstractSequence, SequenceAlignment, RichSequence, ResidueInfo 
 11  from abc import ABCMeta, abstractmethod 
12 13 14 -class ResidueNotFoundError(KeyError):
15 pass
16
17 18 -class AbstractScoringMatrix(object):
19 """ 20 Defines a pairwise sequence alignment scoring function. 21 """ 22 23 __metaclass__ = ABCMeta 24 25 @abstractmethod
26 - def score(self, x, y):
27 """ 28 Return the pairwise score of residues C{x} and C{y}. 29 C{x} and C{y} must be comparable (e.g. implement __eq__ and __hash__). 30 31 @param x: first residue 32 @type x: object 33 @param y: second residue 34 @type y: object 35 36 @rtype: float 37 38 @raise ResidueNotFoundError: if C{x} or C{y} cannot be handled by this 39 scoring matrix 40 """ 41 pass
42
43 -class IdentityMatrix(AbstractScoringMatrix):
44 """ 45 Simple identity-based scoring matrix. 46 47 @param match: score for a match 48 @type match: float 49 @param mismatch: score for a mismatch 50 @type mismatch: float 51 """ 52
53 - def __init__(self, match=1, mismatch=-1):
54 55 self._match = float(match) 56 self._mismatch = float(mismatch)
57 58 @property
59 - def match(self):
60 """ 61 Score for a match 62 @rtype: float 63 """ 64 return self._match
65 66 @property
67 - def mismatch(self):
68 """ 69 Score for a mismatch 70 @rtype: float 71 """ 72 return self._mismatch
73
74 - def score(self, x, y):
75 76 if x == y: 77 return self._match 78 else: 79 return self._mismatch
80
81 -class SimilarityMatrix(AbstractScoringMatrix):
82 """ 83 Similarity scoring matrix. 84 85 @param matrix: 86 """ 87 88 BLOSUM62 = { 89 'A': { 'A': 4.0, 'R':-1.0, 'N':-2.0, 'D':-2.0, 'C': 0.0, 'Q':-1.0, 'E':-1.0, 'G': 0.0, 'H':-2.0, 'I':-1.0, 'L':-1.0, 'K':-1.0, 'M':-1.0, 'F':-2.0, 'P':-1.0, 'S': 1.0, 'T': 0.0, 'W':-3.0, 'Y':-2.0, 'V': 0.0, 'B':-2.0, 'Z':-1.0, 'X': 0.0, '*':-4.0 }, 90 'R': { 'A':-1.0, 'R': 5.0, 'N': 0.0, 'D':-2.0, 'C':-3.0, 'Q': 1.0, 'E': 0.0, 'G':-2.0, 'H': 0.0, 'I':-3.0, 'L':-2.0, 'K': 2.0, 'M':-1.0, 'F':-3.0, 'P':-2.0, 'S':-1.0, 'T':-1.0, 'W':-3.0, 'Y':-2.0, 'V':-3.0, 'B':-1.0, 'Z': 0.0, 'X':-1.0, '*':-4.0 }, 91 'N': { 'A':-2.0, 'R': 0.0, 'N': 6.0, 'D': 1.0, 'C':-3.0, 'Q': 0.0, 'E': 0.0, 'G': 0.0, 'H': 1.0, 'I':-3.0, 'L':-3.0, 'K': 0.0, 'M':-2.0, 'F':-3.0, 'P':-2.0, 'S': 1.0, 'T': 0.0, 'W':-4.0, 'Y':-2.0, 'V':-3.0, 'B': 3.0, 'Z': 0.0, 'X':-1.0, '*':-4.0 }, 92 'D': { 'A':-2.0, 'R':-2.0, 'N': 1.0, 'D': 6.0, 'C':-3.0, 'Q': 0.0, 'E': 2.0, 'G':-1.0, 'H':-1.0, 'I':-3.0, 'L':-4.0, 'K':-1.0, 'M':-3.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-4.0, 'Y':-3.0, 'V':-3.0, 'B': 4.0, 'Z': 1.0, 'X':-1.0, '*':-4.0 }, 93 'C': { 'A': 0.0, 'R':-3.0, 'N':-3.0, 'D':-3.0, 'C': 9.0, 'Q':-3.0, 'E':-4.0, 'G':-3.0, 'H':-3.0, 'I':-1.0, 'L':-1.0, 'K':-3.0, 'M':-1.0, 'F':-2.0, 'P':-3.0, 'S':-1.0, 'T':-1.0, 'W':-2.0, 'Y':-2.0, 'V':-1.0, 'B':-3.0, 'Z':-3.0, 'X':-2.0, '*':-4.0 }, 94 'Q': { 'A':-1.0, 'R': 1.0, 'N': 0.0, 'D': 0.0, 'C':-3.0, 'Q': 5.0, 'E': 2.0, 'G':-2.0, 'H': 0.0, 'I':-3.0, 'L':-2.0, 'K': 1.0, 'M': 0.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-2.0, 'Y':-1.0, 'V':-2.0, 'B': 0.0, 'Z': 3.0, 'X':-1.0, '*':-4.0 }, 95 'E': { 'A':-1.0, 'R': 0.0, 'N': 0.0, 'D': 2.0, 'C':-4.0, 'Q': 2.0, 'E': 5.0, 'G':-2.0, 'H': 0.0, 'I':-3.0, 'L':-3.0, 'K': 1.0, 'M':-2.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-3.0, 'Y':-2.0, 'V':-2.0, 'B': 1.0, 'Z': 4.0, 'X':-1.0, '*':-4.0 }, 96 'G': { 'A': 0.0, 'R':-2.0, 'N': 0.0, 'D':-1.0, 'C':-3.0, 'Q':-2.0, 'E':-2.0, 'G': 6.0, 'H':-2.0, 'I':-4.0, 'L':-4.0, 'K':-2.0, 'M':-3.0, 'F':-3.0, 'P':-2.0, 'S': 0.0, 'T':-2.0, 'W':-2.0, 'Y':-3.0, 'V':-3.0, 'B':-1.0, 'Z':-2.0, 'X':-1.0, '*':-4.0 }, 97 'H': { 'A':-2.0, 'R': 0.0, 'N': 1.0, 'D':-1.0, 'C':-3.0, 'Q': 0.0, 'E': 0.0, 'G':-2.0, 'H': 8.0, 'I':-3.0, 'L':-3.0, 'K':-1.0, 'M':-2.0, 'F':-1.0, 'P':-2.0, 'S':-1.0, 'T':-2.0, 'W':-2.0, 'Y': 2.0, 'V':-3.0, 'B': 0.0, 'Z': 0.0, 'X':-1.0, '*':-4.0 }, 98 'I': { 'A':-1.0, 'R':-3.0, 'N':-3.0, 'D':-3.0, 'C':-1.0, 'Q':-3.0, 'E':-3.0, 'G':-4.0, 'H':-3.0, 'I': 4.0, 'L': 2.0, 'K':-3.0, 'M': 1.0, 'F': 0.0, 'P':-3.0, 'S':-2.0, 'T':-1.0, 'W':-3.0, 'Y':-1.0, 'V': 3.0, 'B':-3.0, 'Z':-3.0, 'X':-1.0, '*':-4.0 }, 99 'L': { 'A':-1.0, 'R':-2.0, 'N':-3.0, 'D':-4.0, 'C':-1.0, 'Q':-2.0, 'E':-3.0, 'G':-4.0, 'H':-3.0, 'I': 2.0, 'L': 4.0, 'K':-2.0, 'M': 2.0, 'F': 0.0, 'P':-3.0, 'S':-2.0, 'T':-1.0, 'W':-2.0, 'Y':-1.0, 'V': 1.0, 'B':-4.0, 'Z':-3.0, 'X':-1.0, '*':-4.0 }, 100 'K': { 'A':-1.0, 'R': 2.0, 'N': 0.0, 'D':-1.0, 'C':-3.0, 'Q': 1.0, 'E': 1.0, 'G':-2.0, 'H':-1.0, 'I':-3.0, 'L':-2.0, 'K': 5.0, 'M':-1.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-3.0, 'Y':-2.0, 'V':-2.0, 'B': 0.0, 'Z': 1.0, 'X':-1.0, '*':-4.0 }, 101 'M': { 'A':-1.0, 'R':-1.0, 'N':-2.0, 'D':-3.0, 'C':-1.0, 'Q': 0.0, 'E':-2.0, 'G':-3.0, 'H':-2.0, 'I': 1.0, 'L': 2.0, 'K':-1.0, 'M': 5.0, 'F': 0.0, 'P':-2.0, 'S':-1.0, 'T':-1.0, 'W':-1.0, 'Y':-1.0, 'V': 1.0, 'B':-3.0, 'Z':-1.0, 'X':-1.0, '*':-4.0 }, 102 'F': { 'A':-2.0, 'R':-3.0, 'N':-3.0, 'D':-3.0, 'C':-2.0, 'Q':-3.0, 'E':-3.0, 'G':-3.0, 'H':-1.0, 'I': 0.0, 'L': 0.0, 'K':-3.0, 'M': 0.0, 'F': 6.0, 'P':-4.0, 'S':-2.0, 'T':-2.0, 'W': 1.0, 'Y': 3.0, 'V':-1.0, 'B':-3.0, 'Z':-3.0, 'X':-1.0, '*':-4.0 }, 103 'P': { 'A':-1.0, 'R':-2.0, 'N':-2.0, 'D':-1.0, 'C':-3.0, 'Q':-1.0, 'E':-1.0, 'G':-2.0, 'H':-2.0, 'I':-3.0, 'L':-3.0, 'K':-1.0, 'M':-2.0, 'F':-4.0, 'P': 7.0, 'S':-1.0, 'T':-1.0, 'W':-4.0, 'Y':-3.0, 'V':-2.0, 'B':-2.0, 'Z':-1.0, 'X':-2.0, '*':-4.0 }, 104 'S': { 'A': 1.0, 'R':-1.0, 'N': 1.0, 'D': 0.0, 'C':-1.0, 'Q': 0.0, 'E': 0.0, 'G': 0.0, 'H':-1.0, 'I':-2.0, 'L':-2.0, 'K': 0.0, 'M':-1.0, 'F':-2.0, 'P':-1.0, 'S': 4.0, 'T': 1.0, 'W':-3.0, 'Y':-2.0, 'V':-2.0, 'B': 0.0, 'Z': 0.0, 'X': 0.0, '*':-4.0 }, 105 'T': { 'A': 0.0, 'R':-1.0, 'N': 0.0, 'D':-1.0, 'C':-1.0, 'Q':-1.0, 'E':-1.0, 'G':-2.0, 'H':-2.0, 'I':-1.0, 'L':-1.0, 'K':-1.0, 'M':-1.0, 'F':-2.0, 'P':-1.0, 'S': 1.0, 'T': 5.0, 'W':-2.0, 'Y':-2.0, 'V': 0.0, 'B':-1.0, 'Z':-1.0, 'X': 0.0, '*':-4.0 }, 106 'W': { 'A':-3.0, 'R':-3.0, 'N':-4.0, 'D':-4.0, 'C':-2.0, 'Q':-2.0, 'E':-3.0, 'G':-2.0, 'H':-2.0, 'I':-3.0, 'L':-2.0, 'K':-3.0, 'M':-1.0, 'F': 1.0, 'P':-4.0, 'S':-3.0, 'T':-2.0, 'W': 11.0,'Y': 2.0, 'V':-3.0, 'B':-4.0, 'Z':-3.0, 'X':-2.0, '*':-4.0 }, 107 'Y': { 'A':-2.0, 'R':-2.0, 'N':-2.0, 'D':-3.0, 'C':-2.0, 'Q':-1.0, 'E':-2.0, 'G':-3.0, 'H': 2.0, 'I':-1.0, 'L':-1.0, 'K':-2.0, 'M':-1.0, 'F': 3.0, 'P':-3.0, 'S':-2.0, 'T':-2.0, 'W': 2.0, 'Y': 7.0, 'V':-1.0, 'B':-3.0, 'Z':-2.0, 'X':-1.0, '*':-4.0 }, 108 'V': { 'A': 0.0, 'R':-3.0, 'N':-3.0, 'D':-3.0, 'C':-1.0, 'Q':-2.0, 'E':-2.0, 'G':-3.0, 'H':-3.0, 'I': 3.0, 'L': 1.0, 'K':-2.0, 'M': 1.0, 'F':-1.0, 'P':-2.0, 'S':-2.0, 'T': 0.0, 'W':-3.0, 'Y':-1.0, 'V': 4.0, 'B':-3.0, 'Z':-2.0, 'X':-1.0, '*':-4.0 }, 109 'B': { 'A':-2.0, 'R':-1.0, 'N': 3.0, 'D': 4.0, 'C':-3.0, 'Q': 0.0, 'E': 1.0, 'G':-1.0, 'H': 0.0, 'I':-3.0, 'L':-4.0, 'K': 0.0, 'M':-3.0, 'F':-3.0, 'P':-2.0, 'S': 0.0, 'T':-1.0, 'W':-4.0, 'Y':-3.0, 'V':-3.0, 'B': 4.0, 'Z': 1.0, 'X':-1.0, '*':-4.0 }, 110 'Z': { 'A':-1.0, 'R': 0.0, 'N': 0.0, 'D': 1.0, 'C':-3.0, 'Q': 3.0, 'E': 4.0, 'G':-2.0, 'H': 0.0, 'I':-3.0, 'L':-3.0, 'K': 1.0, 'M':-1.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-3.0, 'Y':-2.0, 'V':-2.0, 'B': 1.0, 'Z': 4.0, 'X':-1.0, '*':-4.0 }, 111 'X': { 'A': 0.0, 'R':-1.0, 'N':-1.0, 'D':-1.0, 'C':-2.0, 'Q':-1.0, 'E':-1.0, 'G':-1.0, 'H':-1.0, 'I':-1.0, 'L':-1.0, 'K':-1.0, 'M':-1.0, 'F':-1.0, 'P':-2.0, 'S': 0.0, 'T': 0.0, 'W':-2.0, 'Y':-1.0, 'V':-1.0, 'B':-1.0, 'Z':-1.0, 'X':-1.0, '*':-4.0 }, 112 '*': { 'A':-4.0, 'R':-4.0, 'N':-4.0, 'D':-4.0, 'C':-4.0, 'Q':-4.0, 'E':-4.0, 'G':-4.0, 'H':-4.0, 'I':-4.0, 'L':-4.0, 'K':-4.0, 'M':-4.0, 'F':-4.0, 'P':-4.0, 'S':-4.0, 'T':-4.0, 'W':-4.0, 'Y':-4.0, 'V':-4.0, 'B':-4.0, 'Z':-4.0, 'X':-4.0, '*': 1.0 } 113 } 114
115 - def __init__(self, matrix=BLOSUM62):
116 self._matrix = matrix
117
118 - def score(self, x, y):
119 try: 120 return self._matrix[x][y] 121 except KeyError as ke: 122 raise ResidueNotFoundError(ke.message)
123 124 @staticmethod
125 - def parse(string):
126 """ 127 Parse a standard scoring matrix file, where the first row and 128 column are residue labels. 129 130 @param string: scoring matrix string 131 @type string: str 132 133 @rtype: L{SimilarityMatrix} 134 """ 135 136 residues = {} 137 matrix = {} 138 139 for line in string.splitlines(): 140 if not line.strip() or line.startswith("#"): 141 continue 142 143 if not residues: 144 residues = line.split() 145 146 else: 147 items = line.split() 148 if len(items) != len(residues) + 1: 149 raise ValueError("{0} scoring columns expected".format(len(residues))) 150 151 try: 152 aa, scores = items[0], map(float, items[1:]) 153 matrix[aa] = dict((residues[i], s) for i, s in enumerate(scores)) 154 except (KeyError, ValueError): 155 raise ValueError("Corrupt scoring matrix") 156 157 return SimilarityMatrix(matrix)
158
159 160 -class AbstractAlignmentAlgorithm(object):
161 """ 162 Base class for all sequence alignment algorithms. 163 164 This class was designed with simple sequence alignment algorithms in mind. 165 Implementors have full control over the behavior of the scoring function and 166 the dynamic programming matrix, but implementing things that require 167 additional matrices (such as affine gap penalties) might be trickier. 168 169 @param scoring: scoring matrix 170 @type scoring: L{AbstractScoringMatrix} 171 @param gap: simple gap penalty 172 @type gap: float 173 """ 174 175 __metaclass__ = ABCMeta 176
177 - def __init__(self, scoring=IdentityMatrix(), gap=0):
178 179 if not isinstance(scoring, AbstractScoringMatrix): 180 raise TypeError(scoring) 181 182 self._gap = float(gap) 183 self._scoring = scoring
184 185 @property
186 - def gap(self):
187 """ 188 Simple gap penalty 189 @rtype: float 190 """ 191 return self._gap
192 193 @property
194 - def scoring_matrix(self):
195 """ 196 Scoring matrix 197 @rtype: L{AbstractScoringMatrix} 198 """ 199 return self._scoring
200
201 - def align(self, query, subject):
202 """ 203 Align two sequences and return the optimal alignment. 204 205 @param query: first sequence 206 @type query: L{AbstractSequence} 207 @param subject: second sequence 208 @type subject: L{AbstractSequence} 209 210 @rtype: L{AlignmentResult} 211 """ 212 if query.length == 0 or subject.length == 0: 213 raise ValueError("Can't align zero length sequence") 214 215 # working with string sequences results in a massive speed-up 216 qseq = ["*"] + self._sequence(query) 217 sseq = ["*"] + self._sequence(subject) 218 219 # 1. create a dynamic programming matrix 220 matrix = [] 221 rows, cols = len(query), len(subject) 222 223 for i in range(rows + 1): 224 matrix.append([]) 225 for j in range(cols + 1): 226 matrix[i].append(None) 227 228 # 2. initialize the first row and column 229 self._initialize(matrix) 230 231 # fill 232 for i in range(1, rows + 1): 233 for j in range(1, cols + 1): 234 score = self._score(qseq[i], sseq[j]) 235 self._fill(matrix, i, j, score) 236 237 # 3. compute alignment 238 return self._traceback(matrix, query, subject)
239
240 - def _sequence(self, s):
241 """ 242 Extract and return the string sequence of {s}. 243 244 @param s: sequence object 245 @type s: L{AbstractSequence} 246 247 @rtype: list of str 248 """ 249 return list(s.sequence)
250 251 @abstractmethod
252 - def _initialize(self, matrix):
253 """ 254 Initialize (typically the first row and column) of the dynamic 255 programming matrix. 256 257 @param matrix: list (2D) 258 @type matrix: list 259 """ 260 pass
261
262 - def _score(self, residue1, residue2):
263 """ 264 Retrieve the pairwise score of two residues using the current 265 scoring matrix. 266 267 @rtype: float 268 """ 269 return self._scoring.score(residue1, residue2)
270
271 - def _fill(self, matrix, i, j, score):
272 """ 273 Compute and set the best score that leads to cell i,j in the dynamic 274 programming matrix: right, down or diagonal. 275 276 See also L{AbstractAlignmentAlgorithm._max}. 277 278 @param score: pairwise score at matrix[i][j] 279 @type score: float 280 @return: the best score 281 @rtype: float 282 """ 283 284 match = matrix[i-1][j-1] + score 285 insertion = matrix[i][j-1] + self._gap 286 deletion = matrix[i-1][j] + self._gap 287 288 best = self._max(match, insertion, deletion) 289 matrix[i][j] = best 290 291 return best
292 293 @abstractmethod
294 - def _max(self, match, insertion, deletion):
295 """ 296 Choose the best score among all given possibilities: 297 scores for match, insertion and deletion. This will determine 298 the direction taken at each step while building the dynamic programming 299 matrix (diagonal, down or right). 300 301 This is an expected notable point of divergence for most sequence 302 alignment algorithms. 303 """ 304 pass
305
306 - def _traceback(self, m, seq1, seq2):
307 """ 308 Trace back and return the optimal alignment. 309 """ 310 311 query = [] 312 subject = [] 313 314 # working with string sequences results in a massive speed-up 315 qseq = ["*"] + self._sequence(seq1) 316 sseq = ["*"] + self._sequence(seq2) 317 318 i, j = self._terminus(m) 319 qstart, start = i, j 320 qend, end = i, j 321 score = m[i][j] 322 323 while self._expandable(m, i, j): 324 325 if i > 0 and j > 0 and m[i][j] == (m[i-1][j-1] + self._score(qseq[i], sseq[j])): 326 query.append(seq1.residues[i]) 327 subject.append(seq2.residues[j]) 328 qstart, start = i, j 329 i, j = i - 1, j - 1 330 331 elif i > 0 and m[i][j] == (m[i-1][j] + self._gap): 332 query.append(seq1.residues[i]) 333 subject.append(ResidueInfo(-1, seq2.alphabet.GAP)) 334 qstart = i 335 i = i - 1 336 337 elif j > 0 and m[i][j] == (m[i][j-1] + self._gap): 338 query.append(ResidueInfo(-1, seq1.alphabet.GAP)) 339 subject.append(seq2.residues[j]) 340 start = j 341 j = j - 1 342 343 else: 344 assert False 345 346 query.reverse() 347 subject.reverse() 348 349 aligned_query = RichSequence(seq1.id, seq1.header, query, seq1.type) 350 aligned_subject = RichSequence(seq2.id, seq2.header, subject, seq2.type) 351 352 return AlignmentResult(score, aligned_query, aligned_subject, qstart, qend, start, end)
353 354 @abstractmethod
355 - def _terminus(self, matrix):
356 """ 357 Find the coordinates of the optimal alignment's right endpoint in the 358 dynamic programming matrix. This is the starting point of a traceback. 359 360 @param matrix: the complete dynamic programming matrix 361 @type matrix: 2D list 362 363 @rtype: 2-tuple (i, j) 364 """ 365 pass
366 367 @abstractmethod
368 - def _expandable(self, i, j):
369 """ 370 Return True if the traceback procedure must not terminate at 371 position C{i,j} in the dynamic programming matrix. 372 373 @rtype: bool 374 """ 375 pass
376
377 378 -class GlobalAlignmentAlgorithm(AbstractAlignmentAlgorithm):
379 """ 380 Needleman-Wunsch global sequence alignment. 381 """ 382
383 - def __init__(self, scoring=IdentityMatrix(), gap=0):
384 super(GlobalAlignmentAlgorithm, self).__init__(scoring, gap)
385
386 - def _initialize(self, matrix):
387 388 for i in range(len(matrix)): 389 matrix[i][0] = self._gap * i 390 for j in range(len(matrix[0])): 391 matrix[0][j] = self._gap * j
392
393 - def _max(self, match, insertion, deletion):
394 return max(match, insertion, deletion)
395
396 - def _terminus(self, matrix):
397 398 i = len(matrix) - 1 399 j = len(matrix[0]) - 1 400 401 return (i, j)
402
403 - def _expandable(self, matrix, i, j):
404 return i > 0 or j > 0
405
406 -class LocalAlignmentAlgorithm(AbstractAlignmentAlgorithm):
407 """ 408 Smith-Waterman local sequence alignment. 409 """ 410 411 START = 0 412 """ 413 Score for initiation of a new local alignment 414 """ 415
416 - def __init__(self, scoring=IdentityMatrix(), gap=-1):
417 super(LocalAlignmentAlgorithm, self).__init__(scoring, gap)
418
419 - def _initialize(self, matrix):
420 421 for i in range(len(matrix)): 422 matrix[i][0] = LocalAlignmentAlgorithm.START 423 for j in range(len(matrix[0])): 424 matrix[0][j] = LocalAlignmentAlgorithm.START
425
426 - def _max(self, match, insertion, deletion):
427 return max(match, insertion, deletion, LocalAlignmentAlgorithm.START)
428
429 - def _terminus(self, matrix):
430 431 maxi, maxj = 0, 0 432 433 for i in range(len(matrix)): 434 for j in range(len(matrix[i])): 435 if matrix[i][j] > matrix[maxi][maxj]: 436 maxi, maxj = i, j 437 438 return (maxi, maxj)
439
440 - def _expandable(self, matrix, i, j):
441 return matrix[i][j] != LocalAlignmentAlgorithm.START
442
443 444 -class AlignmentResult(object):
445 """ 446 Represents a pairwise sequence alignment result. 447 448 @param score: raw alignment score 449 @type score: float 450 @param query: aligned query sequence (with gaps) 451 @type query: L{AbstractSequence} 452 @param subject: aligned subject sequence (with gaps) 453 @type subject: L{AbstractSequence} 454 @param qstart: query start position 455 @type qstart: int 456 @param qend: query end position 457 @type qend: int 458 @param start: subject start position 459 @type start: int 460 @param end: subject end position 461 @type end: int 462 """ 463
464 - def __init__(self, score, query, subject, qstart, qend, start, end):
465 466 if not isinstance(query, AbstractSequence): 467 raise TypeError(query) 468 if not isinstance(subject, AbstractSequence): 469 raise TypeError(query) 470 471 if not (len(query) == len(subject)): 472 raise ValueError("Corrupt alignment") 473 474 self._score = float(score) 475 self._query = query 476 self._subject = subject 477 self._qstart = int(qstart) 478 self._qend = int(qend) 479 self._start = int(start) 480 self._end = int(end) 481 self._identicals = 0 482 self._gaps = 0 483 self._length = 0 484 485 if query.length > 0 and subject.length > 0: 486 487 if not 1 <= qstart <= qend: 488 raise ValueError("Invalid query start/end positions") 489 if not 1 <= start <= end: 490 raise ValueError("Invalid subject start/end positions") 491 492 qgap = query.alphabet.GAP 493 sgap = subject.alphabet.GAP 494 495 for q, s in zip(query, subject): 496 if q.type == qgap or s.type == sgap: 497 self._gaps += 1 498 elif q.type == s.type: 499 self._identicals += 1 500 501 self._length = (self._gaps + (qend - qstart + 1) + (end - start + 1)) / 2 502 503 else: 504 if (score + qstart + qend + start + end) != 0: 505 raise ValueError("Corrupt alignment") 506 self._length = 0
507 508
509 - def __str__(self):
510 string = "{0.qstart:5} {0.query.sequence:} {0.qend:<5}\n" 511 string += "{0.start:5} {0.subject.sequence:} {0.end:<5}" 512 return string.format(self)
513 514 @property
515 - def is_empty(self):
516 """ 517 Return True if this is an empty alignment (i.e. no matches) 518 @rtype: bool 519 """ 520 return self.length == 0 or self.gaps == self.length
521 522 @property
523 - def score(self):
524 """ 525 Raw alignment score 526 @rtype: float 527 """ 528 return self._score
529 530 @property
531 - def query(self):
532 """ 533 Aligned query sequence (with gaps) 534 @rtype: L{AbstractSequence} 535 """ 536 return self._query
537 538 @property
539 - def subject(self):
540 """ 541 Aligned subject sequence (with gaps) 542 @rtype: L{AbstractSequence} 543 """ 544 return self._subject
545 546 @property
547 - def qstart(self):
548 """ 549 Query start position 550 @rtype: int 551 """ 552 return self._qstart
553 554 @property
555 - def qend(self):
556 """ 557 Query end position 558 @rtype: int 559 """ 560 return self._qend
561 562 @property
563 - def start(self):
564 """ 565 Subject start position 566 @rtype: int 567 """ 568 return self._start
569 570 @property
571 - def end(self):
572 """ 573 Subject end position 574 @rtype: int 575 """ 576 return self._end
577 578 @property
579 - def identicals(self):
580 """ 581 Number of identical residues 582 @rtype: int 583 """ 584 return self._identicals
585 586 @property
587 - def identity(self):
588 """ 589 Percentage of identical residues 590 @rtype: int 591 """ 592 return float(self._identicals) / self._length
593 594 @property
595 - def gaps(self):
596 """ 597 Total number of gaps (query + subject) 598 @rtype: int 599 """ 600 return self._gaps
601 602 @property
603 - def length(self):
604 """ 605 Alignment length (query + subject + gaps / 2) 606 @rtype: int 607 """ 608 return self._length
609
610 - def alignment(self):
611 """ 612 @return: as a sequence alignment object 613 @rtype: L{SequenceAlignment} 614 """ 615 return SequenceAlignment([self.query, self.subject])
616