Mercurial > repos > devteam > fasta_concatenate_by_species
annotate utils/maf_utilities.py @ 3:25b8736c627a draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit 34a6c9f94a5722bb7d2f887618aafa410a770e91"
| author | devteam | 
|---|---|
| date | Mon, 02 Mar 2020 06:47:07 -0500 | 
| parents | 16df616b39e5 | 
| children | 
| rev | line source | 
|---|---|
| 0 | 1 #!/usr/bin/env python | 
| 2 """ | |
| 3 Provides wrappers and utilities for working with MAF files and alignments. | |
| 4 """ | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 5 # Dan Blankenberg | 
| 0 | 6 import bx.align.maf | 
| 7 import bx.intervals | |
| 8 import bx.interval_index_file | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 9 import sys | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 10 import os | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 11 import tempfile | 
| 0 | 12 import logging | 
| 13 from copy import deepcopy | |
| 14 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 15 try: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 16 maketrans = str.maketrans | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 17 except AttributeError: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 18 from string import maketrans | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 19 | 
| 0 | 20 log = logging.getLogger(__name__) | 
| 21 | |
| 22 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 23 GAP_CHARS = ['-'] | 
| 0 | 24 SRC_SPLIT_CHAR = '.' | 
| 25 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 26 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 27 def src_split(src): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 28 fields = src.split(SRC_SPLIT_CHAR, 1) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 29 spec = fields.pop(0) | 
| 0 | 30 if fields: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 31 chrom = fields.pop(0) | 
| 0 | 32 else: | 
| 33 chrom = spec | |
| 34 return spec, chrom | |
| 35 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 36 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 37 def src_merge(spec, chrom, contig=None): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 38 if None in [spec, chrom]: | 
| 0 | 39 spec = chrom = spec or chrom | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 40 return bx.align.maf.src_merge(spec, chrom, contig) | 
| 0 | 41 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 42 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 43 def get_species_in_block(block): | 
| 0 | 44 species = [] | 
| 45 for c in block.components: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 46 spec, chrom = src_split(c.src) | 
| 0 | 47 if spec not in species: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 48 species.append(spec) | 
| 0 | 49 return species | 
| 50 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 51 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 52 def tool_fail(msg="Unknown Error"): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 53 msg = "Fatal Error: %s" % msg | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 54 sys.exit(msg) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 55 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 56 # an object corresponding to a reference layered alignment | 
| 0 | 57 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 58 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 59 class RegionAlignment(object): | 
| 0 | 60 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 61 DNA_COMPLEMENT = maketrans("ACGTacgt", "TGCAtgca") | 
| 0 | 62 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 63 def __init__(self, size, species=[]): | 
| 0 | 64 self.size = size | 
| 65 self.sequences = {} | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 66 if not isinstance(species, list): | 
| 0 | 67 species = [species] | 
| 68 for spec in species: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 69 self.add_species(spec) | 
| 0 | 70 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 71 # add a species to the alignment | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 72 def add_species(self, species): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 73 # make temporary sequence files | 
| 0 | 74 self.sequences[species] = tempfile.TemporaryFile() | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 75 self.sequences[species].write("-" * self.size) | 
| 0 | 76 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 77 # returns the names for species found in alignment, skipping names as requested | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 78 def get_species_names(self, skip=[]): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 79 if not isinstance(skip, list): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 80 skip = [skip] | 
| 0 | 81 names = self.sequences.keys() | 
| 82 for name in skip: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 83 try: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 84 names.remove(name) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 85 except Exception: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 86 pass | 
| 0 | 87 return names | 
| 88 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 89 # returns the sequence for a species | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 90 def get_sequence(self, species): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 91 self.sequences[species].seek(0) | 
| 0 | 92 return self.sequences[species].read() | 
| 93 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 94 # returns the reverse complement of the sequence for a species | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 95 def get_sequence_reverse_complement(self, species): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 96 complement = [base for base in self.get_sequence(species).translate(self.DNA_COMPLEMENT)] | 
| 0 | 97 complement.reverse() | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 98 return "".join(complement) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 99 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 100 # sets a position for a species | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 101 def set_position(self, index, species, base): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 102 if len(base) != 1: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 103 raise Exception("A genomic position can only have a length of 1.") | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 104 return self.set_range(index, species, base) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 105 # sets a range for a species | 
| 0 | 106 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 107 def set_range(self, index, species, bases): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 108 if index >= self.size or index < 0: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 109 raise Exception("Your index (%i) is out of range (0 - %i)." % (index, self.size - 1)) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 110 if len(bases) == 0: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 111 raise Exception("A set of genomic positions can only have a positive length.") | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 112 if species not in self.sequences.keys(): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 113 self.add_species(species) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 114 self.sequences[species].seek(index) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 115 self.sequences[species].write(bases) | 
| 0 | 116 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 117 # Flush temp file of specified species, or all species | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 118 def flush(self, species=None): | 
| 0 | 119 if species is None: | 
| 120 species = self.sequences.keys() | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 121 elif not isinstance(species, list): | 
| 0 | 122 species = [species] | 
| 123 for spec in species: | |
| 124 self.sequences[spec].flush() | |
| 125 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 126 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 127 class GenomicRegionAlignment(RegionAlignment): | 
| 0 | 128 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 129 def __init__(self, start, end, species=[]): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 130 RegionAlignment.__init__(self, end - start, species) | 
| 0 | 131 self.start = start | 
| 132 self.end = end | |
| 133 | |
| 134 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 135 class SplicedAlignment(object): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 136 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 137 DNA_COMPLEMENT = maketrans("ACGTacgt", "TGCAtgca") | 
| 0 | 138 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 139 def __init__(self, exon_starts, exon_ends, species=[]): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 140 if not isinstance(exon_starts, list): | 
| 0 | 141 exon_starts = [exon_starts] | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 142 if not isinstance(exon_ends, list): | 
| 0 | 143 exon_ends = [exon_ends] | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 144 assert len(exon_starts) == len(exon_ends), "The number of starts does not match the number of sizes." | 
| 0 | 145 self.exons = [] | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 146 for i in range(len(exon_starts)): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 147 self.exons.append(GenomicRegionAlignment(exon_starts[i], exon_ends[i], species)) | 
| 0 | 148 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 149 # returns the names for species found in alignment, skipping names as requested | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 150 def get_species_names(self, skip=[]): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 151 if not isinstance(skip, list): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 152 skip = [skip] | 
| 0 | 153 names = [] | 
| 154 for exon in self.exons: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 155 for name in exon.get_species_names(skip=skip): | 
| 0 | 156 if name not in names: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 157 names.append(name) | 
| 0 | 158 return names | 
| 159 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 160 # returns the sequence for a species | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 161 def get_sequence(self, species): | 
| 0 | 162 sequence = tempfile.TemporaryFile() | 
| 163 for exon in self.exons: | |
| 164 if species in exon.get_species_names(): | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 165 sequence.write(exon.get_sequence(species)) | 
| 0 | 166 else: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 167 sequence.write("-" * exon.size) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 168 sequence.seek(0) | 
| 0 | 169 return sequence.read() | 
| 170 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 171 # returns the reverse complement of the sequence for a species | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 172 def get_sequence_reverse_complement(self, species): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 173 complement = [base for base in self.get_sequence(species).translate(self.DNA_COMPLEMENT)] | 
| 0 | 174 complement.reverse() | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 175 return "".join(complement) | 
| 0 | 176 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 177 # Start and end of coding region | 
| 0 | 178 @property | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 179 def start(self): | 
| 0 | 180 return self.exons[0].start | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 181 | 
| 0 | 182 @property | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 183 def end(self): | 
| 0 | 184 return self.exons[-1].end | 
| 185 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 186 # Open a MAF index using a UID | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 187 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 188 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 189 def maf_index_by_uid(maf_uid, index_location_file): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 190 for line in open(index_location_file): | 
| 0 | 191 try: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 192 # read each line, if not enough fields, go to next line | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 193 if line[0:1] == "#": | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 194 continue | 
| 0 | 195 fields = line.split('\t') | 
| 196 if maf_uid == fields[1]: | |
| 197 try: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 198 maf_files = fields[4].replace("\n", "").replace("\r", "").split(",") | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 199 return bx.align.maf.MultiIndexed(maf_files, keep_open=True, parse_e_rows=False) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 200 except Exception as e: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 201 raise Exception('MAF UID (%s) found, but configuration appears to be malformed: %s' % (maf_uid, e)) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 202 except Exception: | 
| 0 | 203 pass | 
| 204 return None | |
| 205 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 206 # return ( index, temp_index_filename ) for user maf, if available, or build one and return it, return None when no tempfile is created | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 207 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 208 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 209 def open_or_build_maf_index(maf_file, index_filename, species=None): | 
| 0 | 210 try: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 211 return (bx.align.maf.Indexed(maf_file, index_filename=index_filename, keep_open=True, parse_e_rows=False), None) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 212 except Exception: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 213 return build_maf_index(maf_file, species=species) | 
| 0 | 214 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 215 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 216 def build_maf_index_species_chromosomes(filename, index_species=None): | 
| 0 | 217 species = [] | 
| 218 species_chromosomes = {} | |
| 219 indexes = bx.interval_index_file.Indexes() | |
| 220 blocks = 0 | |
| 221 try: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 222 maf_reader = bx.align.maf.Reader(open(filename)) | 
| 0 | 223 while True: | 
| 224 pos = maf_reader.file.tell() | |
| 225 block = maf_reader.next() | |
| 226 if block is None: | |
| 227 break | |
| 228 blocks += 1 | |
| 229 for c in block.components: | |
| 230 spec = c.src | |
| 231 chrom = None | |
| 232 if "." in spec: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 233 spec, chrom = spec.split(".", 1) | 
| 0 | 234 if spec not in species: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 235 species.append(spec) | 
| 0 | 236 species_chromosomes[spec] = [] | 
| 237 if chrom and chrom not in species_chromosomes[spec]: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 238 species_chromosomes[spec].append(chrom) | 
| 0 | 239 if index_species is None or spec in index_species: | 
| 240 forward_strand_start = c.forward_strand_start | |
| 241 forward_strand_end = c.forward_strand_end | |
| 242 try: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 243 forward_strand_start = int(forward_strand_start) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 244 forward_strand_end = int(forward_strand_end) | 
| 0 | 245 except ValueError: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 246 continue # start and end are not integers, can't add component to index, goto next component | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 247 # this likely only occurs when parse_e_rows is True? | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 248 # could a species exist as only e rows? should the | 
| 0 | 249 if forward_strand_end > forward_strand_start: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 250 # require positive length; i.e. certain lines have start = end = 0 and cannot be indexed | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 251 indexes.add(c.src, forward_strand_start, forward_strand_end, pos, max=c.src_size) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 252 except Exception as e: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 253 # most likely a bad MAF | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 254 log.debug('Building MAF index on %s failed: %s' % (filename, e)) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 255 return (None, [], {}, 0) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 256 return (indexes, species, species_chromosomes, blocks) | 
| 0 | 257 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 258 # builds and returns ( index, index_filename ) for specified maf_file | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 259 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 260 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 261 def build_maf_index(maf_file, species=None): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 262 indexes, found_species, species_chromosomes, blocks = build_maf_index_species_chromosomes(maf_file, species) | 
| 0 | 263 if indexes is not None: | 
| 264 fd, index_filename = tempfile.mkstemp() | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 265 out = os.fdopen(fd, 'w') | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 266 indexes.write(out) | 
| 0 | 267 out.close() | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 268 return (bx.align.maf.Indexed(maf_file, index_filename=index_filename, keep_open=True, parse_e_rows=False), index_filename) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 269 return (None, None) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 270 | 
| 0 | 271 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 272 def component_overlaps_region(c, region): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 273 if c is None: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 274 return False | 
| 0 | 275 start, end = c.get_forward_strand_start(), c.get_forward_strand_end() | 
| 276 if region.start >= end or region.end <= start: | |
| 277 return False | |
| 278 return True | |
| 279 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 280 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 281 def chop_block_by_region(block, src, region, species=None, mincols=0): | 
| 0 | 282 # This chopping method was designed to maintain consistency with how start/end padding gaps have been working in Galaxy thus far: | 
| 283 # behavior as seen when forcing blocks to be '+' relative to src sequence (ref) and using block.slice_by_component( ref, slice_start, slice_end ) | |
| 284 # whether-or-not this is the 'correct' behavior is questionable, but this will at least maintain consistency | |
| 285 # comments welcome | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 286 slice_start = block.text_size # max for the min() | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 287 slice_end = 0 # min for the max() | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 288 old_score = block.score # save old score for later use | 
| 0 | 289 # We no longer assume only one occurance of src per block, so we need to check them all | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 290 for c in iter_components_by_src(block, src): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 291 if component_overlaps_region(c, region): | 
| 0 | 292 if c.text is not None: | 
| 293 rev_strand = False | |
| 294 if c.strand == "-": | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 295 # We want our coord_to_col coordinates to be returned from positive stranded component | 
| 0 | 296 rev_strand = True | 
| 297 c = c.reverse_complement() | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 298 start = max(region.start, c.start) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 299 end = min(region.end, c.end) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 300 start = c.coord_to_col(start) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 301 end = c.coord_to_col(end) | 
| 0 | 302 if rev_strand: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 303 # need to orient slice coordinates to the original block direction | 
| 0 | 304 slice_len = end - start | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 305 end = len(c.text) - start | 
| 0 | 306 start = end - slice_len | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 307 slice_start = min(start, slice_start) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 308 slice_end = max(end, slice_end) | 
| 0 | 309 | 
| 310 if slice_start < slice_end: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 311 block = block.slice(slice_start, slice_end) | 
| 0 | 312 if block.text_size > mincols: | 
| 313 # restore old score, may not be accurate, but it is better than 0 for everything? | |
| 314 block.score = old_score | |
| 315 if species is not None: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 316 block = block.limit_to_species(species) | 
| 0 | 317 block.remove_all_gap_columns() | 
| 318 return block | |
| 319 return None | |
| 320 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 321 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 322 def orient_block_by_region(block, src, region, force_strand=None): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 323 # loop through components matching src, | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 324 # make sure each of these components overlap region | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 325 # cache strand for each of overlaping regions | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 326 # if force_strand / region.strand not in strand cache, reverse complement | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 327 # we could have 2 sequences with same src, overlapping region, on different strands, this would cause no reverse_complementing | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 328 strands = [c.strand for c in iter_components_by_src(block, src) if component_overlaps_region(c, region)] | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 329 if strands and (force_strand is None and region.strand not in strands) or (force_strand is not None and force_strand not in strands): | 
| 0 | 330 block = block.reverse_complement() | 
| 331 return block | |
| 332 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 333 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 334 def get_oriented_chopped_blocks_for_region(index, src, region, species=None, mincols=0, force_strand=None): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 335 for block, idx, offset in get_oriented_chopped_blocks_with_index_offset_for_region(index, src, region, species, mincols, force_strand): | 
| 0 | 336 yield block | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 337 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 338 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 339 def get_oriented_chopped_blocks_with_index_offset_for_region(index, src, region, species=None, mincols=0, force_strand=None): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 340 for block, idx, offset in get_chopped_blocks_with_index_offset_for_region(index, src, region, species, mincols): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 341 yield orient_block_by_region(block, src, region, force_strand), idx, offset | 
| 0 | 342 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 343 # split a block with multiple occurances of src into one block per src | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 344 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 345 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 346 def iter_blocks_split_by_src(block, src): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 347 for src_c in iter_components_by_src(block, src): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 348 new_block = bx.align.Alignment(score=block.score, attributes=deepcopy(block.attributes)) | 
| 0 | 349 new_block.text_size = block.text_size | 
| 350 for c in block.components: | |
| 351 if c == src_c or c.src != src: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 352 new_block.add_component(deepcopy(c)) # components have reference to alignment, dont want to loose reference to original alignment block in original components | 
| 0 | 353 yield new_block | 
| 354 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 355 # split a block into multiple blocks with all combinations of a species appearing only once per block | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 356 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 357 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 358 def iter_blocks_split_by_species(block, species=None): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 359 def __split_components_by_species(components_by_species, new_block): | 
| 0 | 360 if components_by_species: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 361 # more species with components to add to this block | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 362 components_by_species = deepcopy(components_by_species) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 363 spec_comps = components_by_species.pop(0) | 
| 0 | 364 for c in spec_comps: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 365 newer_block = deepcopy(new_block) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 366 newer_block.add_component(deepcopy(c)) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 367 for value in __split_components_by_species(components_by_species, newer_block): | 
| 0 | 368 yield value | 
| 369 else: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 370 # no more components to add, yield this block | 
| 0 | 371 yield new_block | 
| 372 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 373 # divide components by species | 
| 0 | 374 spec_dict = {} | 
| 375 if not species: | |
| 376 species = [] | |
| 377 for c in block.components: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 378 spec, chrom = src_split(c.src) | 
| 0 | 379 if spec not in spec_dict: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 380 spec_dict[spec] = [] | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 381 species.append(spec) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 382 spec_dict[spec].append(c) | 
| 0 | 383 else: | 
| 384 for spec in species: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 385 spec_dict[spec] = [] | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 386 for c in iter_components_by_src_start(block, spec): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 387 spec_dict[spec].append(c) | 
| 0 | 388 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 389 empty_block = bx.align.Alignment(score=block.score, attributes=deepcopy(block.attributes)) # should we copy attributes? | 
| 0 | 390 empty_block.text_size = block.text_size | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 391 # call recursive function to split into each combo of spec/blocks | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 392 for value in __split_components_by_species(spec_dict.values(), empty_block): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 393 sort_block_components_by_block(value, block) # restore original component order | 
| 0 | 394 yield value | 
| 395 | |
| 396 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 397 # generator yielding only chopped and valid blocks for a specified region | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 398 def get_chopped_blocks_for_region(index, src, region, species=None, mincols=0): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 399 for block, idx, offset in get_chopped_blocks_with_index_offset_for_region(index, src, region, species, mincols): | 
| 0 | 400 yield block | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 401 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 402 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 403 def get_chopped_blocks_with_index_offset_for_region(index, src, region, species=None, mincols=0): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 404 for block, idx, offset in index.get_as_iterator_with_index_and_offset(src, region.start, region.end): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 405 block = chop_block_by_region(block, src, region, species, mincols) | 
| 0 | 406 if block is not None: | 
| 407 yield block, idx, offset | |
| 408 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 409 # returns a filled region alignment for specified regions | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 410 | 
| 0 | 411 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 412 def get_region_alignment(index, primary_species, chrom, start, end, strand='+', species=None, mincols=0, overwrite_with_gaps=True): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 413 if species is not None: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 414 alignment = RegionAlignment(end - start, species) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 415 else: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 416 alignment = RegionAlignment(end - start, primary_species) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 417 return fill_region_alignment(alignment, index, primary_species, chrom, start, end, strand, species, mincols, overwrite_with_gaps) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 418 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 419 # reduces a block to only positions exisiting in the src provided | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 420 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 421 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 422 def reduce_block_by_primary_genome(block, species, chromosome, region_start): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 423 # returns ( startIndex, {species:texts} | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 424 # where texts' contents are reduced to only positions existing in the primary genome | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 425 src = "%s.%s" % (species, chromosome) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 426 ref = block.get_component_by_src(src) | 
| 0 | 427 start_offset = ref.start - region_start | 
| 428 species_texts = {} | |
| 429 for c in block.components: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 430 species_texts[c.src.split('.')[0]] = list(c.text) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 431 # remove locations which are gaps in the primary species, starting from the downstream end | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 432 for i in range(len(species_texts[species]) - 1, -1, -1): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 433 if species_texts[species][i] == '-': | 
| 0 | 434 for text in species_texts.values(): | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 435 text.pop(i) | 
| 0 | 436 for spec, text in species_texts.items(): | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 437 species_texts[spec] = ''.join(text) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 438 return (start_offset, species_texts) | 
| 0 | 439 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 440 # fills a region alignment | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 441 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 442 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 443 def fill_region_alignment(alignment, index, primary_species, chrom, start, end, strand='+', species=None, mincols=0, overwrite_with_gaps=True): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 444 region = bx.intervals.Interval(start, end) | 
| 0 | 445 region.chrom = chrom | 
| 446 region.strand = strand | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 447 primary_src = "%s.%s" % (primary_species, chrom) | 
| 0 | 448 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 449 # Order blocks overlaping this position by score, lowest first | 
| 0 | 450 blocks = [] | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 451 for block, idx, offset in index.get_as_iterator_with_index_and_offset(primary_src, start, end): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 452 score = float(block.score) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 453 for i in range(0, len(blocks)): | 
| 0 | 454 if score < blocks[i][0]: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 455 blocks.insert(i, (score, idx, offset)) | 
| 0 | 456 break | 
| 457 else: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 458 blocks.append((score, idx, offset)) | 
| 0 | 459 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 460 # gap_chars_tuple = tuple( GAP_CHARS ) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 461 gap_chars_str = ''.join(GAP_CHARS) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 462 # Loop through ordered blocks and layer by increasing score | 
| 0 | 463 for block_dict in blocks: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 464 for block in iter_blocks_split_by_species(block_dict[1].get_at_offset(block_dict[2])): # need to handle each occurance of sequence in block seperately | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 465 if component_overlaps_region(block.get_component_by_src(primary_src), region): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 466 block = chop_block_by_region(block, primary_src, region, species, mincols) # chop block | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 467 block = orient_block_by_region(block, primary_src, region) # orient block | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 468 start_offset, species_texts = reduce_block_by_primary_genome(block, primary_species, chrom, start) | 
| 0 | 469 for spec, text in species_texts.items(): | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 470 # we should trim gaps from both sides, since these are not positions in this species genome (sequence) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 471 text = text.rstrip(gap_chars_str) | 
| 0 | 472 gap_offset = 0 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 473 while True in [text.startswith(gap_char) for gap_char in GAP_CHARS]: # python2.4 doesn't accept a tuple for .startswith() | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 474 # while text.startswith( gap_chars_tuple ): | 
| 0 | 475 gap_offset += 1 | 
| 476 text = text[1:] | |
| 477 if not text: | |
| 478 break | |
| 479 if text: | |
| 480 if overwrite_with_gaps: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 481 alignment.set_range(start_offset + gap_offset, spec, text) | 
| 0 | 482 else: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 483 for i, char in enumerate(text): | 
| 0 | 484 if char not in GAP_CHARS: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 485 alignment.set_position(start_offset + gap_offset + i, spec, char) | 
| 0 | 486 return alignment | 
| 487 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 488 # returns a filled spliced region alignment for specified region with start and end lists | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 489 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 490 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 491 def get_spliced_region_alignment(index, primary_species, chrom, starts, ends, strand='+', species=None, mincols=0, overwrite_with_gaps=True): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 492 # create spliced alignment object | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 493 if species is not None: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 494 alignment = SplicedAlignment(starts, ends, species) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 495 else: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 496 alignment = SplicedAlignment(starts, ends, [primary_species]) | 
| 0 | 497 for exon in alignment.exons: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 498 fill_region_alignment(exon, index, primary_species, chrom, exon.start, exon.end, strand, species, mincols, overwrite_with_gaps) | 
| 0 | 499 return alignment | 
| 500 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 501 # loop through string array, only return non-commented lines | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 502 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 503 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 504 def line_enumerator(lines, comment_start='#'): | 
| 0 | 505 i = 0 | 
| 506 for line in lines: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 507 if not line.startswith(comment_start): | 
| 0 | 508 i += 1 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 509 yield (i, line) | 
| 0 | 510 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 511 # read a GeneBed file, return list of starts, ends, raw fields | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 512 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 513 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 514 def get_starts_ends_fields_from_gene_bed(line): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 515 # Starts and ends for exons | 
| 0 | 516 starts = [] | 
| 517 ends = [] | |
| 518 | |
| 519 fields = line.split() | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 520 # Requires atleast 12 BED columns | 
| 0 | 521 if len(fields) < 12: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 522 raise Exception("Not a proper 12 column BED line (%s)." % line) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 523 tx_start = int(fields[1]) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 524 strand = fields[5] | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 525 if strand != '-': | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 526 strand = '+' # Default strand is + | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 527 cds_start = int(fields[6]) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 528 cds_end = int(fields[7]) | 
| 0 | 529 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 530 # Calculate and store starts and ends of coding exons | 
| 0 | 531 region_start, region_end = cds_start, cds_end | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 532 exon_starts = map(int, fields[11].rstrip(',\n').split(',')) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 533 exon_starts = map((lambda x: x + tx_start), exon_starts) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 534 exon_ends = map(int, fields[10].rstrip(',').split(',')) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 535 exon_ends = map((lambda x, y: x + y), exon_starts, exon_ends) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 536 for start, end in zip(exon_starts, exon_ends): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 537 start = max(start, region_start) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 538 end = min(end, region_end) | 
| 0 | 539 if start < end: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 540 starts.append(start) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 541 ends.append(end) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 542 return (starts, ends, fields) | 
| 0 | 543 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 544 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 545 def iter_components_by_src(block, src): | 
| 0 | 546 for c in block.components: | 
| 547 if c.src == src: | |
| 548 yield c | |
| 549 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 550 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 551 def get_components_by_src(block, src): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 552 return [value for value in iter_components_by_src(block, src)] | 
| 0 | 553 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 554 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 555 def iter_components_by_src_start(block, src): | 
| 0 | 556 for c in block.components: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 557 if c.src.startswith(src): | 
| 0 | 558 yield c | 
| 559 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 560 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 561 def get_components_by_src_start(block, src): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 562 return [value for value in iter_components_by_src_start(block, src)] | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 563 | 
| 0 | 564 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 565 def sort_block_components_by_block(block1, block2): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 566 # orders the components in block1 by the index of the component in block2 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 567 # block1 must be a subset of block2 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 568 # occurs in-place | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 569 return block1.components.sort(cmp=lambda x, y: block2.components.index(x) - block2.components.index(y)) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 570 | 
| 0 | 571 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 572 def get_species_in_maf(maf_filename): | 
| 0 | 573 species = [] | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 574 for block in bx.align.maf.Reader(open(maf_filename)): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 575 for spec in get_species_in_block(block): | 
| 0 | 576 if spec not in species: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 577 species.append(spec) | 
| 0 | 578 return species | 
| 579 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 580 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 581 def parse_species_option(species): | 
| 0 | 582 if species: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 583 species = species.split(',') | 
| 0 | 584 if 'None' not in species: | 
| 585 return species | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 586 return None # provided species was '', None, or had 'None' in it | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 587 | 
| 0 | 588 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 589 def remove_temp_index_file(index_filename): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 590 try: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 591 os.unlink(index_filename) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 592 except Exception: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 593 pass | 
| 0 | 594 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 595 # Below are methods to deal with FASTA files | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 596 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 597 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 598 def get_fasta_header(component, attributes={}, suffix=None): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 599 header = ">%s(%s):%i-%i|" % (component.src, component.strand, component.get_forward_strand_start(), component.get_forward_strand_end()) | 
| 0 | 600 for key, value in attributes.iteritems(): | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 601 header = "%s%s=%s|" % (header, key, value) | 
| 0 | 602 if suffix: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 603 header = "%s%s" % (header, suffix) | 
| 0 | 604 else: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 605 header = "%s%s" % (header, src_split(component.src)[0]) | 
| 0 | 606 return header | 
| 607 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 608 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 609 def get_attributes_from_fasta_header(header): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 610 if not header: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 611 return {} | 
| 0 | 612 attributes = {} | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 613 header = header.lstrip('>') | 
| 0 | 614 header = header.strip() | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 615 fields = header.split('|') | 
| 0 | 616 try: | 
| 617 region = fields[0] | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 618 region = region.split('(', 1) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 619 temp = region[0].split('.', 1) | 
| 0 | 620 attributes['species'] = temp[0] | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 621 if len(temp) == 2: | 
| 0 | 622 attributes['chrom'] = temp[1] | 
| 623 else: | |
| 624 attributes['chrom'] = temp[0] | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 625 region = region[1].split(')', 1) | 
| 0 | 626 attributes['strand'] = region[0] | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 627 region = region[1].lstrip(':').split('-') | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 628 attributes['start'] = int(region[0]) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 629 attributes['end'] = int(region[1]) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 630 except Exception: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 631 # fields 0 is not a region coordinate | 
| 0 | 632 pass | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 633 if len(fields) > 2: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 634 for i in range(1, len(fields) - 1): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 635 prop = fields[i].split('=', 1) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 636 if len(prop) == 2: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 637 attributes[prop[0]] = prop[1] | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 638 if len(fields) > 1: | 
| 0 | 639 attributes['__suffix__'] = fields[-1] | 
| 640 return attributes | |
| 641 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 642 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 643 def iter_fasta_alignment(filename): | 
| 0 | 644 class fastaComponent: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 645 def __init__(self, species, text=""): | 
| 0 | 646 self.species = species | 
| 647 self.text = text | |
| 648 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 649 def extend(self, text): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 650 self.text = self.text + text.replace('\n', '').replace('\r', '').strip() | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 651 # yields a list of fastaComponents for a FASTA file | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 652 with open(filename, 'r') as f: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 653 components = [] | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 654 # cur_component = None | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 655 while True: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 656 line = f.readline() | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 657 if not line: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 658 if components: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 659 yield components | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 660 return | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 661 line = line.strip() | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 662 if not line: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 663 if components: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 664 yield components | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 665 components = [] | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 666 elif line.startswith('>'): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 667 attributes = get_attributes_from_fasta_header(line) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 668 components.append(fastaComponent(attributes['species'])) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 669 elif components: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
1diff
changeset | 670 components[-1].extend(line) | 
