Package csb :: Package bio :: Package io :: Module clans
[frames] | no frames]

Source Code for Module csb.bio.io.clans

   1  """ 
   2  Classes for parsing/manipulating/writing CLANS (by Tancred Frickey) files 
   3   
   4  This module defines L{ClansParser} and L{ClansFileWriter} for parsing and writing CLANS format files, respectively. 
   5  Further, class L{Clans} and several helper classes are used to hold and handle the parsed data. 
   6   
   7  The most commenly used CLANS data can be accessed in an L{Clans} instance via 
   8   - .entries <L{ClansEntryCollection} containing L{ClansEntry} instances> 
   9    - .name 
  10    - .seq <the amino acid sequence> 
  11    - .hsps <connections of this L{ClansEntry} to others> 
  12    - .groups <L{ClansSeqgroup}s the entry belongs to> 
  13   - .seqgroups <L{ClansSeqgroupCollection} containing L{ClansSeqgroup} instances> 
  14   - .params <L{ClansParams}> 
  15   
  16  Parse a file into L{Clans} instance C{clans_instance} by 
  17      >>> clans_instance = ClansParser().parse_file('input.clans') 
  18   
  19  Create a new entry C{e} with name \"C{my entry}\", sequence C{AAAA} and coordinates C{(x=1, y=1, z=1)} 
  20      >>> e = ClansEntry(name='my entry', seq='AAAA', coords=(1, 1, 1)) 
  21   
  22  and add it to an existing L{Clans} instance C{clans_instance} 
  23      >>> clans_instance.add_entry(e) 
  24   
  25  Entries can be accessed using indices of C{Clans} instances 
  26      >>> clans_instance[0]  # access to first entry 
  27   
  28  and deleted by 
  29      >>> clans_instance.remove_entry(e) 
  30   
  31  Equivalent functions exist for ClansSeqgroups. 
  32   
  33  Author: Klaus Kopec 
  34  MPI fuer Entwicklungsbiologie, Tuebingen 
  35  """ 
  36   
  37  import os 
  38  import re 
  39  import operator 
  40  import csb.core 
  41   
  42  from abc import ABCMeta, abstractmethod 
  43  from numpy import array, float64, eye, random 
44 45 46 -class DuplicateEntryNameError(Exception):
47 """ 48 Raised during L{Clans.get_entry} if two entries have identical names. 49 """ 50 pass
51
52 53 -class DuplicateEntryError(Exception):
54 """ 55 Raised during L{Clans._update_index} if two entries are identical in name, sequence, and coordinates. 56 """ 57 pass
58
59 60 -class MissingBlockError(Exception):
61 """ 62 Raised if an expected tag is not found during parsing of a CLANS file. 63 """ 64 pass
65
66 67 -class UnknownTagError(ValueError):
68 """ 69 Raised if an unknown tag is encountered while parsing a CLANS file. 70 """ 71 pass
72
73 74 -class Color(object):
75 """ 76 RGB color handling class. 77 Color is stored as r, g, b, and a (i.e. alpha) attributes. 78 Default color is C{r}=C{g}=C{b}=0 (i.e. black) with a=255 79 80 @param r: the red value 81 @type r: int 82 83 @param g: the green value 84 @type g: int 85 86 @param b: the blue value 87 @type b: int 88 89 @param a: the alpha value 90 @type a: int 91 """ 92
93 - def __init__(self, r=0, g=0, b=0, a=255):
94 self._r = None 95 self.r = r 96 self._g = None 97 self.g = g 98 self._b = None 99 self.b = b 100 self._a = None 101 self.a = a
102
103 - def __repr__(self):
104 return 'Color {0}'.format(self.to_clans_color())
105 106 __str__ = __repr__ 107 108 @staticmethod
109 - def from_string(color_string, separator=';'):
110 """ 111 Factory for a Color instance created from a string formatted as r{separator}g{separator}b{separator}a, where the final \'{separator}a\' is optional. 112 113 @param color_string: the color string 114 @type color_string: str 115 116 @raises TypeError: if C{color_string} is not a string 117 @raises ValueError: if any value in color is outside of range(256) 118 """ 119 if not isinstance(color_string, csb.core.string): 120 raise TypeError('{0} is no string'.format(color_string)) 121 122 if color_string.count(separator) == 2: 123 r, g, b = map(int, color_string.split(';')) 124 a = 255 125 elif color_string.count(separator) == 3: 126 r, g, b, a = map(int, color_string.split(';')) 127 128 else: 129 raise ValueError( 130 ('format needs to be \'r{0}g{0}b\' but color_string was ' + 131 '{1} [optionally with alpha value: \'r{0}g{0}b{0}a\']').format(separator, color_string)) 132 133 return Color(r, g, b, a)
134 135 @property
136 - def r(self):
137 """ 138 the red value of the RGB color. 139 140 raises ValueError if C{value} is outside of range(256) 141 142 @rtype: int 143 """ 144 return self._r
145 146 @r.setter
147 - def r(self, value):
148 """ 149 Set the red value of the RGB color. 150 """ 151 if value < 0 or value > 255: 152 raise ValueError( 153 'valid color values are in range(256), was \'{0}\''.format( 154 value)) 155 156 self._r = value
157 158 @property
159 - def g(self):
160 """ 161 the green value of the RGB color. 162 163 raises ValueError if C{value} is outside of range(256) 164 165 @rtype: int 166 """ 167 return self._g
168 169 @g.setter
170 - def g(self, value):
171 172 if value < 0 or value > 255: 173 raise ValueError('valid color values are in range(256).') 174 175 self._g = value
176 177 @property
178 - def b(self):
179 """ 180 the blue value of the RGB color. 181 182 raises ValueError if C{value} is outside of range(256) 183 184 @rtype: int 185 """ 186 return self._b
187 188 @b.setter
189 - def b(self, value):
190 191 if value < 0 or value > 255: 192 raise ValueError('valid color values are in range(256).') 193 194 self._b = value
195 196 @property
197 - def a(self):
198 """ 199 the alpha value of the RGB color. 200 201 raises ValueError if C{value} is outside of range(256) 202 203 @rtype: int 204 """ 205 return self._a
206 207 @a.setter
208 - def a(self, value):
209 210 if value < 0 or value > 255: 211 raise ValueError('valid color values are in range(256).') 212 213 self._a = value
214
215 - def to_clans_color(self):
216 """ 217 Formats the color for use in CLANS files. 218 219 @return: the color formatted for use in CLANS files; format: r;g;b;a 220 @rtype: str 221 """ 222 return '{0.r};{0.g};{0.b};{0.a}'.format(self)
223
224 225 -class ClansParser(object):
226 """ 227 CLANS file format aware parser. 228 """ 229
230 - def __init__(self):
231 self._clans_instance = None 232 self._data_block_dict = {}
233
234 - def __repr__(self):
235 return 'ClansParser instance'
236 237 __str__ = __repr__ 238 239 @property
240 - def clans_instance(self):
241 """ 242 the L{Clans} instance that resulted from parsing a CLANS file. 243 244 raises a ValueError if no CLANS file has been parsed yet 245 246 @rtype: L{Clans} instance 247 """ 248 if self._clans_instance is None: 249 raise ValueError('you need to parse a CLANS file first') 250 251 return self._clans_instance
252
253 - def parse_file(self, filename, permissive=True):
254 """ 255 Create a L{Clans} instance by parsing the CLANS format file C{filename} 256 257 @param filename: name of the CLANS file. 258 @type filename: str 259 260 @param permissive: if True, tolerate missing non-essential or unknown 261 blocks. 262 @type permissive: bool 263 264 @rtype: L{Clans} instance 265 @return: a L{Clans} instance containing the parsed data 266 267 @raise MissingBlockError: if C{permissive == True} and any essential 268 block is missing. if C{permissive == False} and any block is missing 269 @raise UnknownTagError: if C{permissive == False} and an unknown tag/ 270 data block is encountered 271 """ 272 self._clans_instance = Clans() 273 self._clans_instance._filename = filename 274 275 self._read_block_dict() # read and preprocess the CLANS file 276 277 try: # param and rotmtx are non-essential blocks 278 self._parse_param() 279 self._parse_rotmtx() 280 except MissingBlockError as error: 281 if not permissive: 282 raise MissingBlockError(error) 283 284 seq = {} 285 try: 286 seq = self._parse_seq() 287 except MissingBlockError as error: 288 if not permissive: 289 raise MissingBlockError(error) 290 291 seqgroups = self._parse_seqgroups() 292 293 pos = {} 294 try: 295 pos = self._parse_pos() 296 except MissingBlockError as error: 297 if not permissive: 298 raise MissingBlockError(error) 299 300 hsp_att_mode = "hsp" 301 hsp = {} 302 try: 303 if 'hsp' in self._data_block_dict: 304 hsp = self._parse_hsp_att('hsp') 305 306 elif 'att' in self._data_block_dict: 307 hsp_att_mode = "att" 308 hsp = self._parse_hsp_att('att') 309 310 elif 'mtx' in self._data_block_dict: 311 hsp = self._parse_mtx() 312 313 except MissingBlockError as error: 314 if not permissive: 315 raise MissingBlockError(error) 316 317 ## raise UnknownTagError for unknown blocks 318 known_block_tags = set(('param', 'rotmtx', 'seq', 'seqgroups', 'pos', 319 'hsp', 'mtx', 'att')) 320 unprocessed_block_tags = set(self._data_block_dict.keys()).difference( 321 known_block_tags) 322 323 if len(unprocessed_block_tags) > 0 and not permissive: 324 raise UnknownTagError( 325 ('tags unknown: {0}. File corrupt or further implementations ' 326 + 'needed!').format(', '.join(unprocessed_block_tags))) 327 328 ## if no entries exist, we cannot add pos, seqgroup and hsp data 329 if len(seq) > 0: 330 331 ## add Entries 332 if len(pos) > 0: 333 for i in pos: 334 self._clans_instance.add_entry(ClansEntry(seq[i][0], seq[i][1], 335 pos[i], parent=self._clans_instance)) 336 337 ## add groups 338 self._clans_instance._seqgroups = ClansSeqgroupCollection() 339 if len(seqgroups) > 0: 340 for group_raw_data in seqgroups: 341 342 group = ClansSeqgroup(name=group_raw_data['name'], 343 type=group_raw_data['type'], 344 size=group_raw_data['size'], 345 hide=group_raw_data['hide'] == '1', 346 color=group_raw_data['color']) 347 348 ## get members corresponding to the IDs in this group 349 ## NOTE: this silently corrects files where a seqgroup 350 ## contains the same entry multiple times 351 members = [self._clans_instance.entries[number] 352 for number in set(group_raw_data['numbers'])] 353 354 self._clans_instance.add_group(group, members) 355 356 ## add hsp values 357 if len(hsp) > 0: 358 [self._clans_instance.entries[a].add_hsp( 359 self._clans_instance.entries[b], value) 360 for ((a, b), value) in hsp.items()] 361 362 self._clans_instance._hsp_att_mode = hsp_att_mode 363 364 return self._clans_instance
365
366 - def _read_block_dict(self):
367 """ 368 Extracts all <tag>DATA</tag> blocks from file 369 self.clans_instance.filename. 370 371 @rtype: dict 372 @return: data in the form: dict[tag] = DATA. 373 """ 374 # read file and remove the first line, i.e. sequence=SEQUENCE_COUNT 375 data_blocks = open(os.path.expanduser( 376 self._clans_instance.filename)).read().split('\n', 1)[1] 377 378 ## flag re.DOTALL is necessary to make . match newlines 379 data = re.findall(r'(<(\w+)>(.+)</\2>)', data_blocks, 380 flags=re.DOTALL) 381 self._data_block_dict = dict([(tag, datum.strip().split('\n')) 382 for _tag_plus_data, tag, datum in data])
383
384 - def _parse_param(self):
385 """ 386 Parse a list of lines in the CLANS <param> format: 387 388 parameter1=data1\n 389 parameter2=data2\n 390 ... 391 """ 392 if 'param' not in self._data_block_dict: 393 raise MissingBlockError('file contains no <param> block.') 394 395 block = self._data_block_dict['param'] 396 397 tmp_params = dict([block[i].split('=') for i in range(len(block))]) 398 399 ## create colors entry from colorcutoffs and colorarr 400 colorcutoffs = [float(val) for val in 401 tmp_params.pop('colorcutoffs').strip(';').split(';')] 402 colors = tmp_params.pop('colorarr').strip(':') 403 colors = colors.replace('(', '').replace(')', '').split(':') 404 colorarr = [Color(*map(int, color_definition)) for color_definition in 405 [color.split(';') for color in colors]] 406 407 tmp_params['colors'] = tuple(zip(colorcutoffs, colorarr)) 408 409 ## convert 'true' and 'false' into Python bools 410 for k, v in tmp_params.items(): 411 if v == 'true': 412 tmp_params[k] = True 413 elif v == 'false': 414 tmp_params[k] = False 415 416 self._clans_instance._params = ClansParams(strict=False, **tmp_params)
417
418 - def _parse_rotmtx(self):
419 """ 420 Parse a list of lines in the CLANS <rotmtx> format. The data is stored 421 in the clans_instance as a 3x3 numpy.array. 422 423 @raise ValueError: if the rotmtx block does not contain exactly 3 lines 424 """ 425 if 'rotmtx' not in self._data_block_dict: 426 raise MissingBlockError('file contains no <rotmtx> block.') 427 428 block = self._data_block_dict['rotmtx'] 429 430 if len(block) != 3: 431 raise ValueError('CLANS <rotmtx> blocks comprise exactly 3 lines.') 432 self._clans_instance.rotmtx = array( 433 [[float64(val) for val in line.split(';')[:3]] for line in block])
434
435 - def _parse_seq(self):
436 """ 437 Parse a list of lines in the CLANS <seq> format, which are in FASTA 438 format. 439 440 @rtype: dict 441 @return: dict with running numbers as key and 2-tuples (id, sequence) 442 as values 443 """ 444 if 'seq' not in self._data_block_dict: 445 raise MissingBlockError( 446 'file contains no <seq> block. This is OK if the file does ' 447 + 'not contain any sequences.') 448 449 block = self._data_block_dict['seq'] 450 if len(block) % 2 == 1: 451 block += [''] 452 453 return dict([(i, (block[2 * i][1:], block[2 * i + 1].strip())) 454 for i in range(int(len(block) / 2))])
455
456 - def _parse_seqgroups(self):
457 """ 458 Parse a list of lines in the CLANS <seqgroup> format: 459 460 name=name of the group\n 461 type=0\n 462 size=12\n 463 hide=0\n 464 color=255;204;51\n 465 numbers=0;1;2;3;4;5;6;10;13\n 466 ... 467 468 @rtype: list 469 @return: list of dicts (one for each group) with the tags (name, type, 470 size, hide, ...) as keys and their typecasted data as values 471 (i.e. name will be a string, size will be an integer, etc) 472 """ 473 if 'seqgroups' not in self._data_block_dict: 474 return ClansSeqgroupCollection() 475 476 block = self._data_block_dict['seqgroups'] 477 478 groups = [] 479 for line in block: 480 p, v = line.split('=', 1) 481 if p == 'name': 482 groups.append({'name': v}) 483 elif p == 'numbers': 484 ## empty groups with terminal semicolon ("numbers=;") contained an empty string in v.split(';') 485 groups[-1][p] = [int(val) for val in [member for member in v.split(';')[:-1] if member != '']] 486 else: 487 groups[-1][p] = v 488 return groups
489
490 - def _parse_pos(self):
491 """ 492 Parse a list of lines in the CLANS <pos> format \'INT FLOAT FLOAT 493 FLOAT\'. 494 495 @rtype: dict 496 @return: a dict using the integers as keys and a (3,1)-array created 497 from the three floats as values. 498 """ 499 if 'pos' not in self._data_block_dict: 500 raise MissingBlockError( 501 'file contains no <pos> block. This is OK if the file does ' 502 + 'not contain any sequences.') 503 504 block = self._data_block_dict['pos'] 505 506 return dict([(int(l.split()[0]), 507 array([float64(val) for val in l.split()[1:]])) 508 for l in block])
509
510 - def _parse_hsp_att(self, mode):
511 """ 512 Parse a list of lines in the CLANS <hsp> format \'INT INT: FLOAT\'. 513 514 NOTE: some CLANS <hsp> lines contain more than one float; we omit the 515 additional numbers 516 517 @param mode: either "hsp" or "att" depending on the type of tag to be 518 parsed 519 @type mode: str 520 521 @rtype: dict 522 @return: a dict using 2-tuples of the two integers as keys and the 523 float as values 524 """ 525 if mode not in ("hsp", "att"): 526 raise ValueError('mode must be either "hsp" or "att"') 527 528 if mode not in self._data_block_dict: 529 raise MissingBlockError( 530 ('file contains no <{0}> block. This is OK if the file does ' 531 + 'not contain any sequences or if none of the contained ' 532 + 'sequences have any connections.').format(mode)) 533 534 block = self._data_block_dict[mode] 535 536 if mode == "hsp": 537 return dict([(tuple([int(val) 538 for val in line.split(':')[0].split()]), 539 float(line.split(':')[1].split(' ')[0])) 540 for line in block]) 541 542 else: 543 return dict([(tuple([int(val) for val in line.split(' ')[:2]]), 544 float(line.split(' ')[2])) 545 for line in block])
546
547 - def _parse_mtx(self):
548 """ 549 Parse a list of lines in the CLANS <mtx> format. 550 551 @rtype: dict 552 @return: a dict using 2-tuples of the two integers as keys and the 553 float as values 554 """ 555 if 'mtx' not in self._data_block_dict: 556 raise MissingBlockError( 557 'file contains no <mtx> block. This is OK if the file does ' 558 + 'not contain any sequences or if none of the contained ' 559 + 'sequences have any connections.') 560 561 block = self._data_block_dict['mtx'] 562 563 return dict([((i, j), float(entry)) 564 for i, line in enumerate(block) 565 for j, entry in enumerate(line.split(';')[:-1]) 566 if float(entry) != 0])
567
568 -class ClansFileBuilder(object):
569 """ 570 Base abstract files for building a file in CLANS format. 571 Defines a common step-wise interface according to the Builder pattern. 572 573 @param output: output stream (this is where the product is constructed) 574 @type output: stream 575 """ 576 577 __metaclass__ = ABCMeta 578
579 - def __init__(self, output):
580 581 if not hasattr(output, 'write'): 582 raise TypeError(output) 583 584 self._out = output
585 586 @property
587 - def output(self):
588 """ 589 Destination stream 590 @rtype: stream 591 """ 592 return self._out
593
594 - def write(self, text):
595 """ 596 Write a chunk of text 597 """ 598 self._out.write(text)
599
600 - def writeline(self, text):
601 """ 602 Write a chunk of text and append a new line terminator 603 """ 604 self._out.write(text) 605 self._out.write('\n')
606 607 @abstractmethod
608 - def add_param_block(self, block_data):
609 pass
610 611 @abstractmethod
612 - def add_rotmtx_block(self, block_data):
613 pass
614 615 @abstractmethod
616 - def add_seq_block(self, block_data):
617 pass
618 619 @abstractmethod
620 - def add_seqgroups_block(self, block_data):
621 pass
622 623 @abstractmethod
624 - def add_pos_block(self, block_data):
625 pass
626 627 @abstractmethod
628 - def add_hsp_block(self, block_data):
629 pass
630
631 632 -class ClansFileWriter(ClansFileBuilder):
633 """ 634 Class for serializing a L{Clans} instance to a file in CLANS format. 635 636 @param output: the output stream 637 @type output: stream 638 """ 639 640
641 - def __init__(self, output):
642 super(ClansFileWriter, self).__init__(output)
643
644 - def serialize(self, clans_instance):
645 """ 646 Creates a CLANS file containing all data from C{clans_instance} 647 648 @param clans_instance: the source of the data to be serialized 649 @type clans_instance: a L{Clans} instance 650 """ 651 652 self.add_sequences_line(clans_instance) 653 self.add_param_block(clans_instance) 654 self.add_rotmtx_block(clans_instance) 655 self.add_seq_block(clans_instance) 656 self.add_seqgroups_block(clans_instance) 657 self.add_pos_block(clans_instance) 658 self.add_hsp_block(clans_instance)
659
660 - def add_sequences_line(self, clans_instance):
661 """ 662 Appends the \'sequences=<#sequences>\' line to the output. 663 664 @param clans_instance: the source of the data to be serialized 665 @type clans_instance: a L{Clans} instance 666 """ 667 self.writeline('sequences={0}'.format(len(clans_instance.entries)))
668
669 - def add_param_block(self, clans_instance):
670 """ 671 Appends a <param>data</param> CLANS file block to the output. 672 673 @param clans_instance: the source of the data to be serialized 674 @type clans_instance: a L{Clans} instance 675 """ 676 param_block = clans_instance.params._to_clans_param_block() 677 self.write(param_block)
678
679 - def add_rotmtx_block(self, clans_instance):
680 """ 681 Appends a <rotmtx>data</rotmtx> CLANS file block to the output. 682 683 @param clans_instance: the source of the data to be serialized 684 @type clans_instance: a L{Clans} instance 685 686 @raise ValueError: if self.clans_instance.rotmtx is no 3x3 numpy.array 687 """ 688 rotmtx = clans_instance.rotmtx 689 690 if rotmtx is None: 691 return 692 693 if rotmtx.shape != (3, 3): 694 raise ValueError('rotmtx must be a 3x3 array') 695 696 self.writeline('<rotmtx>') 697 698 self.write('\n'.join( 699 ['{0};{1};{2};'.format(*tuple(rotmtx[i])) for i in range(3)])) 700 self.write('\n') 701 702 self.writeline('</rotmtx>')
703
704 - def add_seq_block(self, clans_instance):
705 """ 706 Appends a <seq>data</seq> CLANS file block to the output. 707 708 @param clans_instance: the source of the data to be serialized 709 @type clans_instance: a L{Clans} instance 710 """ 711 self.writeline('<seq>') 712 713 self.write(''.join([e.output_string_seq() 714 for e in clans_instance.entries])) 715 716 self.writeline('</seq>')
717
718 - def add_seqgroups_block(self, clans_instance):
719 """ 720 Appends a <seqgroupsparam>data</seqgroups> CLANS file block to the output. 721 722 @param clans_instance: the source of the data to be serialized 723 @type clans_instance: a L{Clans} instance 724 """ 725 seqgroups = clans_instance.seqgroups 726 727 if seqgroups is not None and len(seqgroups) > 0: 728 729 self.writeline('<seqgroups>') 730 731 self.write('\n'.join([s.output_string() for s in seqgroups])) 732 self.write('\n') 733 734 self.writeline('</seqgroups>')
735
736 - def add_pos_block(self, clans_instance):
737 """ 738 Appends a <pos>data</pos> CLANS file block to the output. 739 740 @param clans_instance: the source of the data to be serialized 741 @type clans_instance: a L{Clans} instance 742 """ 743 self.writeline('<pos>') 744 745 self.write('\n'.join([e.output_string_pos() 746 for e in clans_instance.entries])) 747 self.write('\n') 748 749 self.writeline('</pos>')
750
751 - def add_hsp_block(self, clans_instance):
752 """ 753 Appends a <hsp>data</hsp> CLANS file block to the output. 754 If the CLANS instance has hsp_att_mode=="att" we add a <att>data<att> 755 block which has the same format. 756 757 @param clans_instance: the source of the data to be serialized 758 @type clans_instance: a L{Clans} instance 759 """ 760 761 self.writeline('<{0}>'.format(clans_instance._hsp_att_mode)) 762 763 ## sorting is not necessary, but makes a nicer looking clans file 764 idToEntryMapping = [(e.get_id(), e) 765 for e in clans_instance.entries] 766 idToEntryMapping.sort(key=operator.itemgetter(0)) 767 entryToIdMapping = dict([(entry, identifier) 768 for (identifier, entry) in idToEntryMapping]) 769 770 for i, (entry1_id, entry1) in enumerate(idToEntryMapping): 771 772 ## sort list of hsp targets by id 773 hspTargets = [(entryToIdMapping[entry2], pvalue) 774 for (entry2, pvalue) in entry1.hsp.items()] 775 hspTargets.sort(key=operator.itemgetter(0)) 776 777 for (entry2_id, pvalue) in hspTargets: 778 if entry1_id >= entry2_id: 779 continue 780 781 line_format = '{0} {1}:{2}\n' 782 if clans_instance._hsp_att_mode == "att": 783 line_format = '{0} {1} {2}\n' 784 785 self.write( 786 line_format.format(entry1_id, entry2_id, repr(pvalue))) 787 788 self.writeline('</{0}>'.format(clans_instance._hsp_att_mode))
789
790 791 -class ClansParams(object):
792 """ 793 Class for handling L{Clans} parameters. 794 See L{ClansParams}._DEFAULTS for accepted parameter names. 795 796 @kwparam **kw: parameters as C{kw[parameter_name] = parameter_value} 797 798 @raise KeyError: if a supplied parameter name is not known 799 (i.e. it is not a key in _DEFAULTS) 800 """ 801 802 _DEFAULTS = {'attfactor': 10.0, 803 'attvalpow': 1, 804 'avgfoldchange': False, 805 'blastpath': 'blastall -p blastp', 806 'cluster2d': False, 807 'colors': ((0.0, (230, 230, 230)), 808 (0.1, (207, 207, 207)), 809 (0.2, (184, 184, 184)), 810 (0.3, (161, 161, 161)), 811 (0.4, (138, 138, 138)), 812 (0.5, (115, 115, 115)), 813 (0.6, (92, 92, 92)), 814 (0.7, (69, 69, 69)), 815 (0.8, (46, 46, 46)), 816 (0.9, (23, 23, 23))), 817 'complexatt': True, 818 'cooling': 1.0, 819 'currcool': 1.0, 820 'dampening': 0.2, 821 'dotsize': 2, 822 'formatdbpath': 'formatdb', 823 'groupsize': 4, 824 'maxmove': 0.1, 825 'minattract': 1.0, 826 'ovalsize': 10, 827 'pval': 1.0, 828 'repfactor': 5.0, 829 'repvalpow': 1, 830 'showinfo': True, 831 'usefoldchange': False, 832 'usescval': False, 833 'zoom': 1.0} 834
835 - def __init__(self, strict=True, **kw):
836 self.set_default_params() 837 838 for param_name, param_value in kw.items(): 839 if param_name not in self._DEFAULTS and strict: 840 raise KeyError('parameter {0} (value: {1}) unknown'.format( 841 param_name, param_value)) 842 self.__setattr__(param_name, param_value)
843 844 @property
845 - def complexatt(self):
846 """ 847 if True, complex attraction computations are used. 848 849 raises ValueError if set to non-boolean value 850 851 @rtype: bool 852 """ 853 return self._complexatt
854 855 @complexatt.setter
856 - def complexatt(self, value):
857 if not isinstance(value, bool): 858 raise ValueError(('complexatt cannot be {0} (accepted values: True' 859 + '/False)').format(value)) 860 self._complexatt = value
861 862 @property
863 - def attfactor(self):
864 """ 865 factor in the attractive force 866 867 raises ValueError if C{value} is not castable to float 868 869 @rtype: float 870 """ 871 return self._attfactor
872 873 @attfactor.setter
874 - def attfactor(self, value):
875 self._attfactor = float(value)
876 877 @property
878 - def attvalpow(self):
879 """ 880 exponent in the attractive force 881 882 raises ValueError if C{value} is not castable to float 883 884 @rtype: float 885 """ 886 return self._attvalpow
887 888 @attvalpow.setter
889 - def attvalpow(self, value):
890 self._attvalpow = float(value)
891 892 @property
893 - def repfactor(self):
894 """ 895 factor in the repulsive force 896 897 raises ValueError if C{value} is not castable to float 898 899 @rtype: float 900 """ 901 return self._repfactor
902 903 @repfactor.setter
904 - def repfactor(self, value):
905 self._repfactor = float(value)
906 907 @property
908 - def repvalpow(self):
909 """ 910 exponent in the repulsive force 911 912 raises ValueError if C{value} is not castable to float 913 914 @rtype: float 915 """ 916 return self._repvalpow
917 918 @repvalpow.setter
919 - def repvalpow(self, value):
920 self._repvalpow = float(value)
921 922 @property
923 - def cluster2d(self):
924 """ 925 if True, clustering is done in 2D. Else in 3D. 926 927 raises ValueError if set to non-boolean value 928 929 @rtype: bool 930 """ 931 return self._cluster2d
932 933 934 @cluster2d.setter
935 - def cluster2d(self, value):
936 if not isinstance(value, bool): 937 raise ValueError(('cluster2d cannot be {0} (accepted values: True' 938 + '/False)').format(value)) 939 940 self._cluster2d = value
941 942 @property
943 - def pval(self):
944 """ 945 p-value cutoff that determines which connections are considered for 946 the attractive force 947 948 raises ValueError if C{value} is not castable to float 949 950 @rtype: float 951 """ 952 return self._pval
953 954 @pval.setter
955 - def pval(self, value):
956 self._pval = float(value)
957 958 @property
959 - def maxmove(self):
960 """ 961 maximal sequence (i.e. dot in the clustermap) movement per round 962 963 raises ValueError if C{value} is not castable to float 964 965 @rtype: float 966 """ 967 return self._maxmove
968 969 @maxmove.setter
970 - def maxmove(self, value):
971 self._maxmove = float(value)
972 973 @property
974 - def usescval(self):
975 """ 976 parameter with unclear function. Check in Clans. 977 978 raises ValueError if set to non-boolean value 979 980 @rtype: bool 981 """ 982 return self._usescval
983 984 @usescval.setter
985 - def usescval(self, value):
986 if not isinstance(value, bool): 987 raise ValueError(('usescval cannot be {0} (accepted values: True' 988 + '/False)').format(value)) 989 990 self._usescval = value
991 992 @property
993 - def cooling(self):
994 """ 995 parameter with unclear function. Check in Clans. 996 997 raises ValueError if C{value} is not castable to float 998 999 @rtype: float 1000 """ 1001 return self._cooling
1002 1003 @cooling.setter
1004 - def cooling(self, value):
1005 self._cooling = float(value)
1006 1007 @property
1008 - def currcool(self):
1009 """ 1010 parameter with unclear function. Check in Clans. 1011 1012 raises ValueError if C{value} is not castable to float 1013 1014 @rtype: float 1015 """ 1016 return self._currcool
1017 1018 @currcool.setter
1019 - def currcool(self, value):
1020 self._currcool = float(value)
1021 1022 @property
1023 - def dampening(self):
1024 """ 1025 parameter with unclear function. Check in Clans. 1026 1027 raises ValueError if C{value} is not castable to float 1028 1029 @rtype: float 1030 """ 1031 return self._dampening
1032 1033 @dampening.setter
1034 - def dampening(self, value):
1035 self._dampening = float(value)
1036 1037 @property
1038 - def minattract(self):
1039 """ 1040 parameter with unclear function. Check in Clans. 1041 1042 raises ValueError if C{value} is not castable to float 1043 1044 @rtype: float 1045 """ 1046 return self._minattract
1047 1048 @minattract.setter
1049 - def minattract(self, value):
1050 self._minattract = float(value)
1051 1052 @property
1053 - def blastpath(self):
1054 """ 1055 path to the BLAST executable for protein-protein comparisons. BLAST+ is 1056 currently not supported by Clans. 1057 1058 raises ValueError if C{value} is not a string 1059 1060 @rtype: str 1061 """ 1062 return self._blastpath
1063 1064 @blastpath.setter
1065 - def blastpath(self, value):
1066 if not isinstance(value, csb.core.string): 1067 raise ValueError(('blastpath cannot be {0} (accepted values: ' 1068 + 'strings)').format(value)) 1069 1070 self._blastpath = value
1071 1072 @property
1073 - def formatdbpath(self):
1074 """ 1075 path to the formatdb executable of BLAST. 1076 1077 raises ValueError if C{value} is not a string 1078 1079 @rtype: str 1080 """ 1081 return self._formatdbpath
1082 1083 @formatdbpath.setter
1084 - def formatdbpath(self, value):
1085 if not isinstance(value, csb.core.string): 1086 raise ValueError(('formatdbpath cannot be {0} (accepted values: ' 1087 + 'strings)').format(value)) 1088 1089 self._formatdbpath = value
1090 1091 @property
1092 - def showinfo(self):
1093 """ 1094 if True, additional data (rotation matrix) is shown in the clustring 1095 window) 1096 1097 raises ValueError if set to non-boolean value 1098 1099 @rtype: bool 1100 """ 1101 return self._showinfo
1102 1103 @showinfo.setter
1104 - def showinfo(self, value):
1105 if not isinstance(value, bool): 1106 raise ValueError(('showinfo cannot be {0} (accepted values: True' 1107 + '/False)').format(value)) 1108 1109 self._showinfo = value
1110 1111 @property
1112 - def zoom(self):
1113 """ 1114 zoom value (1.0 == not zoomed) 1115 1116 raises ValueError if C{value} is not castable to float 1117 1118 @rtype: float 1119 """ 1120 return self._zoom
1121 1122 @zoom.setter
1123 - def zoom(self, value):
1124 self._zoom = float(value)
1125 1126 @property
1127 - def dotsize(self):
1128 """ 1129 size of the central dot representing each sequence in the clustermap 1130 1131 raises ValueError if C{value} is not castable to int 1132 1133 @rtype: int 1134 """ 1135 return self._dotsize
1136 1137 @dotsize.setter
1138 - def dotsize(self, value):
1139 self._dotsize = int(value)
1140 1141 @property
1142 - def ovalsize(self):
1143 """ 1144 size of the circle around selected sequences 1145 1146 raises ValueError if value not castable to int 1147 1148 @rtype: int 1149 """ 1150 return self._ovalsize
1151 1152 @ovalsize.setter
1153 - def ovalsize(self, value):
1154 self._ovalsize = int(value)
1155 1156 @property
1157 - def groupsize(self):
1158 """ 1159 default for the size of circles that mark newly created groups 1160 1161 raises ValueError if C{value} is not castable to int 1162 1163 @rtype: int 1164 """ 1165 return self._groupsize
1166 1167 @groupsize.setter
1168 - def groupsize(self, value):
1169 self._groupsize = int(value)
1170 1171 @property
1172 - def usefoldchange(self):
1173 """ 1174 parameter with unclear function. Check in Clans. 1175 1176 raises ValueError if set to non-boolean value 1177 1178 @rtype: bool 1179 """ 1180 return self._usefoldchange
1181 1182 @usefoldchange.setter
1183 - def usefoldchange(self, value):
1184 if not isinstance(value, bool): 1185 raise ValueError(('usefoldchange cannot be {0} (accepted values: ' 1186 + 'True/False)').format(value)) 1187 1188 self._usefoldchange = value
1189 1190 @property
1191 - def avgfoldchange(self):
1192 """ 1193 parameter with unclear function. Check in Clans. 1194 1195 raises ValueError if set to non-boolean value 1196 1197 @rtype: bool 1198 """ 1199 return self._avgfoldchange
1200 1201 @avgfoldchange.setter
1202 - def avgfoldchange(self, value):
1203 if not isinstance(value, bool): 1204 raise ValueError(('avgfoldchange cannot be {0} (accepted values: ' 1205 + 'True/False)').format(value)) 1206 1207 self._avgfoldchange = value
1208 1209 @property
1210 - def colors(self):
1211 """ 1212 colors that define the coloring for different p-values/attractions 1213 1214 raises ValueError if set to s.th. else than a 10-tuple of 2-tuples 1215 1216 @rtype: tuple 1217 """ 1218 return self._colors
1219 1220 @colors.setter
1221 - def colors(self, value):
1222 if not isinstance(value, tuple): 1223 raise ValueError('colors must be a tuple') 1224 if len(value) != 10: 1225 raise ValueError('colors must be a 10-tuple') 1226 lengths = [len(v) for v in value] 1227 if len(set(lengths)) != 1 or lengths[0] != 2: 1228 raise ValueError('each item of colors must be a 2-tuple') 1229 self._colors = value
1230
1231 - def set_default_params(self):
1232 """ 1233 Sets the parameters to CLANS default values. 1234 See L{ClansParams}._DEFAULTS. 1235 """ 1236 for k, v in self._DEFAULTS.items(): 1237 if k == 'colors': 1238 continue 1239 1240 self.__setattr__(k, v) 1241 1242 tmp_list = [] 1243 for i, (cutoff, color) in enumerate(ClansParams._DEFAULTS['colors']): 1244 tmp_list.append((cutoff, Color(*color))) 1245 self.colors = tuple(tmp_list)
1246
1247 - def _to_clans_param_block(self):
1248 """ 1249 Creates a param block for a CLANS file from the L{ClansParams} values. 1250 1251 @return: a CLANS file format <param>[data]</param> block 1252 @rtype: str 1253 """ 1254 1255 param_dict = {} 1256 1257 for param_name in sorted(ClansParams._DEFAULTS): 1258 if param_name == 'colors': 1259 1260 ## divide 'colors' into 'colorcutoffs' and 'colorarr' 1261 param_dict['colorcutoffs'] = ''.join( 1262 ['{0:.2f};'.format(cutoff) for cutoff, color in self.colors]) 1263 1264 param_dict['colorarr'] = ''.join( 1265 ['({0}):'.format(color.to_clans_color()) 1266 for cutoff, color in self.colors]) 1267 1268 continue 1269 1270 if param_name in ('avgfoldchange', 'cluster2d', 'complexatt', 1271 'showinfo', 'usefoldchange', 'usescval'): 1272 param_dict[param_name] = ['false', 'true'][ 1273 self.__getattribute__(param_name)] 1274 1275 continue 1276 1277 param_dict[param_name] = self.__getattribute__(param_name) 1278 1279 param_block_string = '<param>\n' 1280 param_block_string += '\n'.join( 1281 ['{0}={1}'.format(param_name, param_dict[param_name]) 1282 for param_name in sorted(param_dict)]) 1283 param_block_string += '\n</param>\n' 1284 1285 return param_block_string
1286
1287 1288 -class ClansEntryCollection(csb.core.ReadOnlyCollectionContainer):
1289 """ 1290 Read-only container for holding (and internally managing) L{ClansEntry} 1291 instances. 1292 """ 1293
1294 - def __init__(self):
1295 1296 super(ClansEntryCollection, self).__init__(type=ClansEntry)
1297
1298 - def _remove_item(self, item):
1299 """ 1300 Removes {item} from the collection. 1301 1302 @param item: the item to be removed 1303 @type item: a L{ClansEntry} instance 1304 1305 @raises TypeError: if {item} is not a L{ClansEntry} instance 1306 """ 1307 1308 if self._type: 1309 if not isinstance(item, self._type): 1310 raise TypeError("Item {0} is not of the required {1} type.".format( 1311 item, self._type.__name__)) 1312 self._items.remove(item)
1313
1314 - def _sort(self, key=None):
1315 """ 1316 Sort entries by their {name} or by a custom key function. 1317 1318 @Note: If the L{ClansEntryCollection} is part of a L{Clans} instance, 1319 use L{Clans.sort} instead to avoid silently corrupting the index in 1320 L{Clans._idx}. 1321 1322 @param key: None to sort by name, a custom key function else 1323 @type key: function 1324 """ 1325 if key is None: 1326 key = lambda entry: entry.name 1327 1328 self._items.sort(key=key)
1329
1330 1331 -class ClansSeqgroupCollection(csb.core.ReadOnlyCollectionContainer):
1332 """ 1333 Read-only container for holding (and internally managing) L{ClansSeqgroup} 1334 instances. 1335 """ 1336
1337 - def __init__(self):
1340
1341 - def _remove_item(self, item):
1342 """ 1343 Removes {item} from the collection. 1344 1345 @param item: the item to be removed 1346 @type item: a L{ClansSeqgroup} instance 1347 1348 @raises TypeError: if {item} is not a L{ClansSeqgroup} instance 1349 """ 1350 1351 if self._type: 1352 if not isinstance(item, self._type): 1353 raise TypeError("Item {0} is not of the required {1} type.".format( 1354 item, self._type.__name__)) 1355 self._items.remove(item)
1356
1357 1358 -class Clans(object):
1359 """ 1360 Class for holding and manipulating data from one CLANS file. 1361 Initialization is always done as empty clustermap with default parameters. 1362 """ 1363
1364 - def __init__(self):
1365 self._filename = None 1366 1367 self._params = ClansParams() 1368 1369 self._rotmtx = None 1370 self.set_default_rotmtx() 1371 1372 self._hsp_att_mode = "hsp" 1373 1374 self._entries = ClansEntryCollection() 1375 self._seqgroups = ClansSeqgroupCollection() 1376 1377 self._idx = None 1378 '''Index dict for fast access to entry positions''' 1379 self._has_good_index = False
1380
1381 - def __repr__(self):
1382 return 'Clans object: {0} sequences; {1} seqgroups'.format( 1383 len(self), len(self.seqgroups))
1384 1385 __str__ = __repr__ 1386
1387 - def __len__(self):
1388 return len(self.entries)
1389
1390 - def __getitem__(self, index):
1391 return self.entries[index]
1392
1393 - def __setitem__(self, index, data):
1394 self.entries[index] = data 1395 self._has_good_index = False
1396 1397 @property
1398 - def filename(self):
1399 """ 1400 file from which the data was parsed 1401 1402 @rtype: str or None 1403 """ 1404 return self._filename
1405 1406 @property
1407 - def params(self):
1408 """ 1409 L{ClansParams} that contains the parameters set for this L{Clans} 1410 instance. 1411 1412 @rtype: L{ClansParams} 1413 """ 1414 return self._params
1415 1416 @property
1417 - def rotmtx(self):
1418 """ 1419 3x3 rotation matrix that indicates the rotation state of the clustermap 1420 1421 raises ValueError if rotation matrix shape is not 3x3 1422 1423 @rtype: numpy.array 1424 """ 1425 return self._rotmtx
1426 1427 @rotmtx.setter
1428 - def rotmtx(self, value):
1429 if value.shape != (3, 3): 1430 raise ValueError('rotation matrix needs to be a 3x3 numpy array') 1431 self._rotmtx = value
1432 1433 @property
1434 - def entries(self):
1435 """ 1436 list of clustermap L{ClansEntry}s. 1437 1438 @rtype: list 1439 """ 1440 return self._entries
1441 1442 @property
1443 - def seqgroups(self):
1444 """ 1445 list of L{ClansSeqgroup}s defined in the clustermap. 1446 1447 @rtype: list 1448 """ 1449 return self._seqgroups
1450
1451 - def set_default_rotmtx(self):
1452 """ 1453 Resets the rotation matrix (rotmtx) to no rotation. 1454 """ 1455 self.rotmtx = eye(3)
1456
1457 - def _update_index(self):
1458 """ 1459 Creates an index of L{ClansEntry}s to their position in the L{Clans} 1460 instance. 1461 1462 The index is used to allow for fast access via L{ClansEntry.get_id} and 1463 was introduced to get a better L{Clans}.write() performance, which 1464 suffered from excessive entry.get_id() calls during HSP block generation 1465 (see L{ClansFileWriter.add_hsp_block}). 1466 1467 @raises DuplicateEntryError: if two entries have the same name, sequence, and coordinates 1468 """ 1469 unique_ids = [e._get_unique_id() for e in self] 1470 1471 if len(unique_ids) != len(set(unique_ids)): 1472 for i, entry in enumerate(unique_ids): 1473 if unique_ids.count(entry) != 1: 1474 raise DuplicateEntryError( 1475 '{0} is contained multiple times in this Clans instance'.format( 1476 repr(self.entries[i]))) 1477 1478 self._idx = dict(zip(unique_ids, range(len(self)))) 1479 self._has_good_index = True
1480
1481 - def initialize(self):
1482 """Initializes the coordinates of all entries with random numbers in [-1, 1].""" 1483 [entry.initialize_coordinates() for entry in self]
1484
1485 - def sort(self, key=None):
1486 """ 1487 Sorts the L{ClansEntry}s by their name or by a custom comparison function. 1488 1489 @param key: a custom key function 1490 @type key: function 1491 """ 1492 self._entries._sort(key) 1493 1494 self._has_good_index = False
1495
1496 - def add_group(self, group, members=None):
1497 """ 1498 Adds a new group. 1499 1500 @param group: the new group 1501 @type group: L{ClansSeqgroup} instance 1502 1503 @param members: L{ClansEntry} instances to be in the new group 1504 @type members: list 1505 1506 @raise ValueError: if group is no ClansSeqgroup instance 1507 """ 1508 self.seqgroups._append_item(group) 1509 1510 if members is not None: 1511 [group.add(member) for member in members]
1512
1513 - def remove_group(self, group):
1514 """ 1515 Removes a group. 1516 1517 @param group: the new group 1518 @type group: L{ClansSeqgroup} instance 1519 """ 1520 self.seqgroups._remove_item(group) 1521 1522 [group.remove(member) for member in group.members[:]]
1523
1524 - def append_groups_from(self, other):
1525 ''' 1526 Append the L{ClansSeqgroup}-s of C{other} that contain at least one entry 1527 that corresponds to an entry in this instance. Entries are compared by 1528 their name only! Groups without any matching members in this instance are 1529 not created in the local instance. 1530 1531 @param other: the source of the new group definitions 1532 @type other: L{Clans} instance 1533 ''' 1534 1535 for group in other.seqgroups: 1536 new_group = ClansSeqgroup(name=group.name, 1537 type=group.type, 1538 size=group.size, 1539 hide=group.hide, 1540 color=group.color) 1541 1542 for member in group.members: 1543 try: 1544 new_member = self.get_entry(member.name, pedantic=True) 1545 1546 except ValueError: # no entry with this name found 1547 continue 1548 1549 except DuplicateEntryNameError: 1550 raise DuplicateEntryNameError('multiple entries with identical name: {0}'.format(member.name)) 1551 1552 new_group.add(new_member) 1553 1554 if len(new_group.members) > 0: 1555 self.add_group(new_group)
1556
1557 - def add_entry(self, entry):
1558 """ 1559 Adds an new entry. 1560 1561 @param entry: the new entry 1562 @type entry: L{ClansEntry} instance 1563 1564 @raise ValueError: if C{entry} is no L{ClansEntry} instance 1565 """ 1566 if not isinstance(entry, ClansEntry): 1567 raise ValueError('entries need to be L{ClansEntry} instances') 1568 1569 self.entries._append_item(entry) 1570 entry._parent = self 1571 1572 self._has_good_index = False
1573
1574 - def remove_entry_by_name(self, entry_name):
1575 """ 1576 Removes an entry fetched by its name. 1577 1578 @param entry_name: name of the entry that shall be removed 1579 @type entry_name: string 1580 """ 1581 entry = self.get_entry(entry_name, True) 1582 1583 self.remove_entry(entry)
1584
1585 - def remove_entry(self, entry):
1586 """ 1587 Removes an entry. 1588 1589 @param entry: the entry that shall be removed 1590 @type entry: L{ClansEntry} instance 1591 """ 1592 for other_entry in entry.hsp.keys(): 1593 other_entry.remove_hsp(entry) 1594 1595 groups = [g for g in entry.groups] 1596 for g in groups: 1597 g.remove(entry) 1598 1599 remove_groups = [g for g in self.seqgroups if g.is_empty()] 1600 [self.seqgroups._remove_item(g) for g in remove_groups] 1601 1602 self.entries._remove_item(entry) 1603 self._has_good_index = False
1604
1605 - def get_entry(self, name, pedantic=True):
1606 """ 1607 Checks if an entry with name C{name} exists and returns it. 1608 1609 @param name: name of the sought entry 1610 @type name: str 1611 1612 @param pedantic: If True, a ValueError is raised if multiple entries 1613 with name name are found. If False, returns the first 1614 one. 1615 @type pedantic: bool 1616 1617 @raise ValueError: if no entry with name C{name} is found 1618 @raise DuplicateEntryNameError: if multiple entries with name C{name} 1619 are found and C{pedantic == True} 1620 1621 @rtype: L{ClansEntry} 1622 @return: entry with name C{name} 1623 """ 1624 1625 hits = [e for e in self.entries if e.name == name] 1626 1627 if len(hits) == 1: 1628 return hits[0] 1629 1630 elif len(hits) > 1: 1631 if pedantic: 1632 raise DuplicateEntryNameError( 1633 'multiple entries have name \'{0}\''.format(name)) 1634 return hits[0] 1635 1636 else: 1637 raise ValueError('ClansEntry {0} does not exist.'.format(name))
1638 1639
1640 - def restrict_to_max_pvalue(self, cutoff):
1641 """ 1642 removes all L{ClansEntry}s that have no connections above the C{cutoff} 1643 1644 @param cutoff: the cutoff 1645 @type cutoff: float 1646 """ 1647 ## loop to hit entries that have no HSPs left after the previous round 1648 removed_entries = [] # all removed entries go here 1649 remove_us = ['first_loop_round_starter'] 1650 while len(remove_us) > 0: 1651 1652 remove_us = [] # entries removed this round 1653 for entry in self.entries: 1654 hsp_values = entry.hsp.values() 1655 if len(hsp_values) == 0 or min(hsp_values) >= cutoff: 1656 remove_us.append(entry) 1657 removed_entries.append(entry) 1658 1659 [self.remove_entry(e) for e in remove_us if e in self] 1660 1661 return removed_entries
1662
1663 - def restrict(self, keep_names):
1664 """ 1665 Removes all entries whose name is not in keep_names 1666 1667 @param keep_names: names of entries that shall be kept 1668 @type keep_names: iterable 1669 """ 1670 1671 [self.remove_entry(entry) for entry in 1672 [e for e in self.entries if e.name not in keep_names]]
1673
1674 - def write(self, filename):
1675 """ 1676 writes the L{Clans} instance to a file in CLANS format 1677 1678 @param filename: the target file\'s name 1679 @type filename: str 1680 """ 1681 1682 with open(filename, 'w') as stream: 1683 writer = ClansFileWriter(stream) 1684 writer.serialize(self)
1685
1686 1687 -class ClansEntry(object):
1688 """ 1689 Class holding the data of one CLANS sequence entry. 1690 1691 @param name: the entry name 1692 @type name: str 1693 1694 @param seq: the entry\'s amino acid sequence 1695 @type seq: str 1696 1697 @param coords: coordinates in 3D space 1698 @type coords: iterable with 3 items 1699 1700 @param parent: parent of this entry 1701 @type parent: L{Clans} instance 1702 """ 1703
1704 - def __init__(self, name=None, seq='', coords=None, parent=None):
1705 self._name = name 1706 self._seq = seq 1707 1708 if coords is None: 1709 self.initialize_coordinates() 1710 else: 1711 self._coords = coords 1712 1713 self._parent = parent 1714 1715 self._groups = [] 1716 self._hsp = {}
1717
1718 - def __repr__(self):
1719 if self.coords is None: 1720 coords_string = 'NoCoordsSet' 1721 else: 1722 coords_string = '({0:.2f}, {1:.2f}, {2:.2f})'.format( 1723 *tuple(self.coords)) 1724 1725 groups = 'not in a group' 1726 if len(self.groups) > 0: 1727 groups = 'groups: {0}'.format( 1728 ', '.join([g.name for g in self.groups])) 1729 1730 return 'ClansEntry "{0}": {1} '.format( 1731 self.name, '; '.join((coords_string, groups)))
1732 1733 @property
1734 - def name(self):
1735 """ 1736 name of the entry 1737 1738 raises ValueError if C{value} is not a string 1739 1740 @rtype: string 1741 """ 1742 return self._name
1743 1744 @name.setter
1745 - def name(self, value):
1746 if not isinstance(value, csb.core.string): 1747 raise ValueError(('name cannot be {0} (accepted values: ' 1748 + 'strings)').format(value)) 1749 1750 self._name = value
1751 1752 @property
1753 - def seq(self):
1754 """ 1755 protein sequence of the entry 1756 1757 raises ValueError if C{value} is not a string 1758 1759 @rtype: string 1760 """ 1761 return self._seq
1762 1763 @seq.setter
1764 - def seq(self, value):
1765 if not isinstance(value, csb.core.string): 1766 raise ValueError(('seq cannot be {0} (accepted values: ' 1767 + 'strings)').format(value)) 1768 1769 self._seq = value
1770 1771 @property
1772 - def coords(self):
1773 """ 1774 entry coordinates in 3D space 1775 1776 raises ValueError if C{value} is not an iterable with 3 items 1777 1778 @rtype: string 1779 """ 1780 return self._coords
1781 1782 @coords.setter
1783 - def coords(self, value):
1784 if len(value) != 3: 1785 raise ValueError(('coords cannot be {0} (accepted values: ' 1786 + 'iteratables with 3 items)').format(value)) 1787 1788 self._coords = value
1789 1790 @property
1791 - def parent(self):
1792 """ 1793 L{Clans} instance that parents this L{ClansEntry} 1794 1795 @rtype: L{Clans} 1796 """ 1797 return self._parent
1798 1799 @property
1800 - def groups(self):
1801 """ 1802 L{ClansSeqgroup}s that contain the entry 1803 1804 @rtype: list 1805 """ 1806 return self._groups
1807 1808 @property
1809 - def hsp(self):
1810 """ 1811 connections between this and another L{ClansEntry} 1812 1813 @rtype: dict 1814 """ 1815 return self._hsp
1816
1817 - def get_id(self):
1818 """ 1819 Returns the id of the current entry. 1820 1821 Note: the first call to this method triggers L{Clans._update_index}, 1822 which will make it appear slower than successive calls. 1823 1824 @rtype: str 1825 @return: the entrys\' id is returned unless it has no parent in which 1826 case -1 is returned 1827 """ 1828 1829 if self.parent is None: 1830 return -1 1831 1832 if not self.parent._has_good_index: 1833 self.parent._update_index() 1834 1835 return self.parent._idx[self._get_unique_id()]
1836
1837 - def _get_unique_id(self):
1838 """ 1839 Returns a >>more or less<< unique ID (however this is not guaranteed to be 1840 really unique) consisting of the name, sequence, and coordinates of the entry. 1841 If two entries have the same 'unique' id,L{Clans._update_index} will raise a 1842 DuplicateEntryError. 1843 1844 @rtype: str 1845 @return: a more or less unique id 1846 """ 1847 return '{0.name}<###>{0.seq}<###>{0.coords}'.format(self)
1848
1849 - def initialize_coordinates(self):
1850 """Sets the coordinates to random numbers in [-1, 1]""" 1851 self.coords = random.random(3) * 2 - 1
1852
1853 - def add_hsp(self, other, value):
1854 """ 1855 Creates an HSP from self to other with the given value. 1856 1857 @param other: the other entry 1858 @type other: L{ClansEntry} instance 1859 1860 @param value: the value of the HSP 1861 @type value: float 1862 """ 1863 self.hsp[other] = value 1864 other.hsp[self] = value
1865
1866 - def remove_hsp(self, other):
1867 """ 1868 Removes the HSP between C{self} and C{other}; if none exists, does 1869 nothing. 1870 1871 @param other: the other entry 1872 @type other: L{ClansEntry} instance 1873 """ 1874 if other in self.hsp: 1875 self.hsp.pop(other) 1876 1877 if self in other.hsp: 1878 other.hsp.pop(self)
1879
1880 - def output_string_seq(self):
1881 """ 1882 Creates the CLANS <seq> block format representation of the entry. 1883 1884 @rtype: str 1885 @return: entrys\' representation in CLANS <seq> block format 1886 """ 1887 1888 return '>{0}\n{1}\n'.format(self.name, self.seq)
1889
1890 - def output_string_pos(self):
1891 """ 1892 Create the CLANS <pos> block format representation of the entry. 1893 1894 @rtype: str 1895 @return: entrys\' representation in CLANS <pos> block format 1896 """ 1897 return '{0} {1:.8f} {2:.8f} {3:.8f}'.format( 1898 *tuple([self.get_id()] + list(self.coords)))
1899
1900 - def output_string_hsp(self):
1901 """ 1902 Creates the CLANS <hsp> block format representation of the entry. 1903 1904 1905 @rtype: str 1906 @return: entrys\' representation in CLANS <hsp> block format 1907 """ 1908 return '\n'.join(['{0} {1}:{2:.8f}'.format(self.get_id(), 1909 other.get_id(), value) 1910 for (other, value) in self.hsp.items()])
1911
1912 1913 -class ClansSeqgroup(object):
1914 """ 1915 Class holding the data of one CLANS group (seqgroup). 1916 1917 @kwparam name: name of the seqgroup 1918 @type name: string 1919 1920 @kwparam type: symbol used to represent the seqgroup in the graphical 1921 output 1922 @type type: int 1923 1924 @kwparam size: size of the symbol used to represent the seqgroup in the 1925 graphical output 1926 @type size: int 1927 1928 @kwparam hide: if True, the seqgroup\'s symbols in the graphical output are 1929 not drawn; default: False 1930 @type hide: bool 1931 1932 @kwparam color: color of the seqgroup 1933 @type color: L{Color} or string formatted like \'x;y;z\' 1934 1935 @kwparam members: list of members of this seqgroup 1936 @type members: list 1937 """ 1938
1939 - def __init__(self, **kw):
1940 self._name = None 1941 self.name = kw.pop('name', 'NO NAME') 1942 1943 self._type = None 1944 self.type = kw.pop('type', 0) 1945 1946 self._size = None 1947 self.size = kw.pop('size', 4) 1948 1949 self._hide = None 1950 self.hide = kw.pop('hide', False) 1951 1952 self._color = None 1953 self.color = kw.pop('color', (255, 255, 255)) 1954 1955 self._members = [] 1956 if 'members' in kw: 1957 for member in kw['members']: 1958 self.add(member)
1959
1960 - def __repr__(self):
1961 return ('ClansSeqgroup {0.name}: type: {0.type}; size: {0.size}; hide:' 1962 + ' {0.hide}; color: {1}; #members: {2}').format( 1963 self, self.color.to_clans_color(), len(self.members))
1964
1965 - def __len__(self):
1966 return len(self.members)
1967 1968 @property
1969 - def name(self):
1970 """ 1971 name of the seqgroup 1972 1973 raises ValueError if C{value} is no string 1974 1975 @rtype: string 1976 """ 1977 return self._name
1978 1979 @name.setter
1980 - def name(self, value):
1981 if not isinstance(value, csb.core.string): 1982 raise ValueError('name must be a string') 1983 self._name = value
1984 1985 @property
1986 - def type(self):
1987 """ 1988 symbol used to represent the seqgroup in the graphical output 1989 1990 raises ValueError if C{value} is not castable to int 1991 1992 @rtype: int 1993 """ 1994 return self._type
1995 1996 @type.setter
1997 - def type(self, value):
1998 self._type = int(value)
1999 2000 @property
2001 - def size(self):
2002 """ 2003 size of the symbol used to represent the seqgroup in the graphical 2004 output 2005 2006 raises ValueError if C{value} is not castable to int 2007 2008 @rtype: int 2009 """ 2010 return self._size
2011 2012 @size.setter
2013 - def size(self, value):
2014 self._size = int(value)
2015 2016 @property
2017 - def hide(self):
2018 """ 2019 if True, the seqgroup\'s symbols in the graphical output are not drawn 2020 2021 raises ValueError if C{value} is no bool 2022 2023 @rtype: int 2024 """ 2025 return self._hide
2026 2027 @hide.setter
2028 - def hide(self, value):
2029 if not isinstance(value, bool): 2030 raise ValueError(('hide cannot be {0} (accepted values: ' 2031 + 'True/False)').format(value)) 2032 2033 self._hide = value
2034 2035 @property
2036 - def color(self):
2037 """ 2038 color of the seqgroup 2039 2040 raises ValueError if set to a wrongly formatted string (correct: 2041 \'{r};{g};{b}\') 2042 2043 @rtype: L{Color} 2044 """ 2045 return self._color
2046 2047 @color.setter
2048 - def color(self, value, separator=';'):
2049 # set values to those of existing Color instance 2050 if isinstance(value, Color): 2051 self._color = value 2052 return 2053 2054 ## parse color from string in format 'r;g;b' 2055 if isinstance(value, csb.core.string): 2056 self._color = Color.from_string(value) 2057 return 2058 2059 # parse 3-item iterables like (3, 5, 6) 2060 if len(value) == 3: 2061 self._color = Color(*tuple(map(int, value))) 2062 return 2063 2064 raise ValueError('cannot parse color from \'{0}\''.format(value))
2065 2066 2067 @property
2068 - def members(self):
2069 """ 2070 the members of this seqgroup 2071 2072 @rtype: list 2073 """ 2074 return self._members
2075
2076 - def is_empty(self):
2077 """ 2078 Checks if the group contains entries. 2079 2080 @rtype: bool 2081 @return: True if the group contains no entries, else False. 2082 """ 2083 return len(self) == 0
2084
2085 - def add(self, new_member):
2086 """ 2087 Adds entry C{new_member} to this L{ClansSeqgroup}. 2088 2089 @Note: L{ClansEntry}-s added using this method that are not part of the 2090 main L{Clans} instance need to be added to the L{Clans} instance manually. 2091 2092 @param new_member: the member that shall be added to this 2093 L{ClansSeqgroup} 2094 @type new_member: L{ClansEntry} instance 2095 2096 @raise TypeError: if C{new_member} is no L{ClansEntry} instance 2097 @raise ValueError: if C{new_member} is already contained in this 2098 L{ClansSeqgroup} 2099 """ 2100 if not isinstance(new_member, ClansEntry): 2101 raise TypeError('only ClansEntry instances can be added as ' + 2102 'group members') 2103 2104 if self.members.count(new_member) > 0: 2105 raise ValueError(('entry {0.name} is already contained in this ' 2106 + 'seqgroup').format(new_member)) 2107 2108 self.members.append(new_member) 2109 new_member.groups.append(self)
2110
2111 - def remove(self, member):
2112 """ 2113 Removes L{ClansEntry} C{member} from this group. 2114 2115 @param member: the member to be removed 2116 @type member: a L{ClansEntry} instance 2117 2118 @raise TypeError: if C{member} is no L{ClansEntry} instance 2119 @raise ValueError: if C{member} is not part of this L{ClansSeqgroup} 2120 """ 2121 if not isinstance(member, ClansEntry): 2122 raise TypeError('argument must be a ClansEntry instance') 2123 2124 if self.members.count(member) == 0: 2125 raise ValueError(('"{0.name}" is not a member of this ' 2126 + 'seqgroup').format(member)) 2127 2128 self.members.remove(member) 2129 member.groups.remove(self)
2130
2131 - def output_string(self):
2132 """ 2133 Creates the CLANS <seqgroup> block format representation of the 2134 group. 2135 2136 @rtype: str 2137 @return: entrys\' representation in CLANS <seqgroup> block format 2138 """ 2139 sorted_members = sorted([m.get_id() for m in self.members]) 2140 return ('name={0.name}\ntype={0.type}\nsize={0.size}\nhide={1}' 2141 + '\ncolor={2}\nnumbers={3}').format( 2142 self, int(self.hide), self.color.to_clans_color(), 2143 ';'.join([str(val) for val in sorted_members]) + ';')
2144