Previous changeset 1:717aee069681 (2014-11-17) Next changeset 3:25b8736c627a (2020-03-02) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf" |
modified:
fasta_concatenate_by_species.py fasta_concatenate_by_species.xml utils/maf_utilities.py |
removed:
tool_dependencies.xml utils/odict.py |
b |
diff -r 717aee069681 -r 16df616b39e5 fasta_concatenate_by_species.py --- a/fasta_concatenate_by_species.py Mon Nov 17 10:15:05 2014 -0500 +++ b/fasta_concatenate_by_species.py Sun Mar 01 07:24:26 2020 -0500 |
[ |
@@ -1,39 +1,43 @@ #!/usr/bin/env python -#Dan Blankenberg +# Dan Blankenberg """ -Takes a Multiple Alignment FASTA file and concatenates -sequences for each species, resulting in one sequence +Takes a Multiple Alignment FASTA file and concatenates +sequences for each species, resulting in one sequence alignment per species. """ -import sys, tempfile +import sys +import tempfile +from collections import OrderedDict + from utils.maf_utilities import iter_fasta_alignment -from utils.odict import odict + def __main__(): input_filename = sys.argv[1] output_filename = sys.argv[2] - species = odict() + species = OrderedDict() cur_size = 0 - for components in iter_fasta_alignment( input_filename ): - species_not_written = species.keys() + for components in iter_fasta_alignment(input_filename): + species_not_written = list(species.keys()) for component in components: if component.species not in species: - species[component.species] = tempfile.TemporaryFile() - species[component.species].write( "-" * cur_size ) - species[component.species].write( component.text ) + species[component.species] = tempfile.TemporaryFile(mode="r+") + species[component.species].write("-" * cur_size) + species[component.species].write(component.text) try: - species_not_written.remove( component.species ) + species_not_written.remove(component.species) except ValueError: - #this is a new species + # this is a new species pass for spec in species_not_written: - species[spec].write( "-" * len( components[0].text ) ) - cur_size += len( components[0].text ) - out = open( output_filename, 'wb' ) - for spec, f in species.iteritems(): - f.seek( 0 ) - out.write( ">%s\n%s\n" % ( spec, f.read() ) ) - out.close() + species[spec].write("-" * len(components[0].text)) + cur_size += len(components[0].text) + with open(output_filename, 'w') as out: + for spec, f in species.items(): + f.seek(0) + out.write(">%s\n%s\n" % (spec, f.read())) -if __name__ == "__main__" : __main__() + +if __name__ == "__main__": + __main__() |
b |
diff -r 717aee069681 -r 16df616b39e5 fasta_concatenate_by_species.xml --- a/fasta_concatenate_by_species.xml Mon Nov 17 10:15:05 2014 -0500 +++ b/fasta_concatenate_by_species.xml Sun Mar 01 07:24:26 2020 -0500 |
[ |
@@ -1,9 +1,13 @@ -<tool id="fasta_concatenate0" name="Concatenate" version="0.0.0"> +<tool id="fasta_concatenate0" name="Concatenate" version="0.0.1" profile="16.04"> <description>FASTA alignment by species</description> <requirements> - <requirement type="package" version="0.7.1">bx-python</requirement> + <requirement type="package" version="0.8.8">bx-python</requirement> </requirements> - <command interpreter="python">fasta_concatenate_by_species.py $input1 $out_file1</command> + <command> + python '$__tool_directory__/fasta_concatenate_by_species.py' + '$input1' + '$out_file1' + </command> <inputs> <param name="input1" type="data" format="fasta" label="FASTA alignment"/> </inputs> @@ -16,7 +20,7 @@ <output name="out_file1" file="fasta_concatenate_out.fasta" /> </test> </tests> - <help> + <help><![CDATA[ **What it does** @@ -71,5 +75,5 @@ This tool will only work properly on files with Galaxy style FASTA headers. -</help> + ]]></help> </tool> |
b |
diff -r 717aee069681 -r 16df616b39e5 tool_dependencies.xml --- a/tool_dependencies.xml Mon Nov 17 10:15:05 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="bx-python" version="0.7.1"> - <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency> |
b |
diff -r 717aee069681 -r 16df616b39e5 utils/maf_utilities.py --- a/utils/maf_utilities.py Mon Nov 17 10:15:05 2014 -0500 +++ b/utils/maf_utilities.py Sun Mar 01 07:24:26 2020 -0500 |
[ |
b'@@ -2,197 +2,226 @@\n """\n Provides wrappers and utilities for working with MAF files and alignments.\n """\n-#Dan Blankenberg\n+# Dan Blankenberg\n import bx.align.maf\n import bx.intervals\n import bx.interval_index_file\n-import sys, os, string, tempfile\n+import sys\n+import os\n+import tempfile\n import logging\n from copy import deepcopy\n \n-assert sys.version_info[:2] >= ( 2, 4 )\n+try:\n+ maketrans = str.maketrans\n+except AttributeError:\n+ from string import maketrans\n+\n+assert sys.version_info[:2] >= (2, 4)\n \n log = logging.getLogger(__name__)\n \n \n-GAP_CHARS = [ \'-\' ]\n+GAP_CHARS = [\'-\']\n SRC_SPLIT_CHAR = \'.\'\n \n-def src_split( src ):\n- fields = src.split( SRC_SPLIT_CHAR, 1 )\n- spec = fields.pop( 0 )\n+\n+def src_split(src):\n+ fields = src.split(SRC_SPLIT_CHAR, 1)\n+ spec = fields.pop(0)\n if fields:\n- chrom = fields.pop( 0 )\n+ chrom = fields.pop(0)\n else:\n chrom = spec\n return spec, chrom\n \n-def src_merge( spec, chrom, contig = None ):\n- if None in [ spec, chrom ]:\n+\n+def src_merge(spec, chrom, contig=None):\n+ if None in [spec, chrom]:\n spec = chrom = spec or chrom\n- return bx.align.maf.src_merge( spec, chrom, contig )\n+ return bx.align.maf.src_merge(spec, chrom, contig)\n \n-def get_species_in_block( block ):\n+\n+def get_species_in_block(block):\n species = []\n for c in block.components:\n- spec, chrom = src_split( c.src )\n+ spec, chrom = src_split(c.src)\n if spec not in species:\n- species.append( spec )\n+ species.append(spec)\n return species\n \n-def tool_fail( msg = "Unknown Error" ):\n- print >> sys.stderr, "Fatal Error: %s" % msg\n- sys.exit()\n+\n+def tool_fail(msg="Unknown Error"):\n+ msg = "Fatal Error: %s" % msg\n+ sys.exit(msg)\n+\n+# an object corresponding to a reference layered alignment\n \n-#an object corresponding to a reference layered alignment\n-class RegionAlignment( object ):\n+\n+class RegionAlignment(object):\n \n- DNA_COMPLEMENT = string.maketrans( "ACGTacgt", "TGCAtgca" )\n- MAX_SEQUENCE_SIZE = sys.maxint #Maximum length of sequence allowed\n+ DNA_COMPLEMENT = maketrans("ACGTacgt", "TGCAtgca")\n \n- def __init__( self, size, species = [] ):\n- assert size <= self.MAX_SEQUENCE_SIZE, "Maximum length allowed for an individual sequence has been exceeded (%i > %i)." % ( size, self.MAX_SEQUENCE_SIZE )\n+ def __init__(self, size, species=[]):\n self.size = size\n self.sequences = {}\n- if not isinstance( species, list ):\n+ if not isinstance(species, list):\n species = [species]\n for spec in species:\n- self.add_species( spec )\n+ self.add_species(spec)\n \n- #add a species to the alignment\n- def add_species( self, species ):\n- #make temporary sequence files\n+ # add a species to the alignment\n+ def add_species(self, species):\n+ # make temporary sequence files\n self.sequences[species] = tempfile.TemporaryFile()\n- self.sequences[species].write( "-" * self.size )\n+ self.sequences[species].write("-" * self.size)\n \n- #returns the names for species found in alignment, skipping names as requested\n- def get_species_names( self, skip = [] ):\n- if not isinstance( skip, list ): skip = [skip]\n+ # returns the names for species found in alignment, skipping names as requested\n+ def get_species_names(self, skip=[]):\n+ if not isinstance(skip, list):\n+ skip = [skip]\n names = self.sequences.keys()\n for name in skip:\n- try: names.remove( name )\n- except: pass\n+ try:\n+ names.remove(name)\n+ except Exception:\n+ pass\n return names\n \n- #returns the sequence for a species\n- def get_sequence( self, species ):\n- self.sequences[species].seek( 0 )\n+ # returns the sequence for a species\n+ def get_sequence(self, species):\n+ self.sequences[species].seek('..b'header, suffix )\n+ header = "%s%s" % (header, suffix)\n else:\n- header = "%s%s" % ( header, src_split( component.src )[ 0 ] )\n+ header = "%s%s" % (header, src_split(component.src)[0])\n return header\n \n-def get_attributes_from_fasta_header( header ):\n- if not header: return {}\n+\n+def get_attributes_from_fasta_header(header):\n+ if not header:\n+ return {}\n attributes = {}\n- header = header.lstrip( \'>\' )\n+ header = header.lstrip(\'>\')\n header = header.strip()\n- fields = header.split( \'|\' )\n+ fields = header.split(\'|\')\n try:\n region = fields[0]\n- region = region.split( \'(\', 1 )\n- temp = region[0].split( \'.\', 1 )\n+ region = region.split(\'(\', 1)\n+ temp = region[0].split(\'.\', 1)\n attributes[\'species\'] = temp[0]\n- if len( temp ) == 2:\n+ if len(temp) == 2:\n attributes[\'chrom\'] = temp[1]\n else:\n attributes[\'chrom\'] = temp[0]\n- region = region[1].split( \')\', 1 )\n+ region = region[1].split(\')\', 1)\n attributes[\'strand\'] = region[0]\n- region = region[1].lstrip( \':\' ).split( \'-\' )\n- attributes[\'start\'] = int( region[0] )\n- attributes[\'end\'] = int( region[1] )\n- except:\n- #fields 0 is not a region coordinate\n+ region = region[1].lstrip(\':\').split(\'-\')\n+ attributes[\'start\'] = int(region[0])\n+ attributes[\'end\'] = int(region[1])\n+ except Exception:\n+ # fields 0 is not a region coordinate\n pass\n- if len( fields ) > 2:\n- for i in xrange( 1, len( fields ) - 1 ):\n- prop = fields[i].split( \'=\', 1 )\n- if len( prop ) == 2:\n- attributes[ prop[0] ] = prop[1]\n- if len( fields ) > 1:\n+ if len(fields) > 2:\n+ for i in range(1, len(fields) - 1):\n+ prop = fields[i].split(\'=\', 1)\n+ if len(prop) == 2:\n+ attributes[prop[0]] = prop[1]\n+ if len(fields) > 1:\n attributes[\'__suffix__\'] = fields[-1]\n return attributes\n \n-def iter_fasta_alignment( filename ):\n+\n+def iter_fasta_alignment(filename):\n class fastaComponent:\n- def __init__( self, species, text = "" ):\n+ def __init__(self, species, text=""):\n self.species = species\n self.text = text\n- def extend( self, text ):\n- self.text = self.text + text.replace( \'\\n\', \'\' ).replace( \'\\r\', \'\' ).strip()\n- #yields a list of fastaComponents for a FASTA file\n- f = open( filename, \'rb\' )\n- components = []\n- #cur_component = None\n- while True:\n- line = f.readline()\n- if not line:\n- if components:\n- yield components\n- return\n- line = line.strip()\n- if not line:\n- if components:\n- yield components\n- components = []\n- elif line.startswith( \'>\' ):\n- attributes = get_attributes_from_fasta_header( line )\n- components.append( fastaComponent( attributes[\'species\'] ) )\n- elif components:\n- components[-1].extend( line )\n \n+ def extend(self, text):\n+ self.text = self.text + text.replace(\'\\n\', \'\').replace(\'\\r\', \'\').strip()\n+ # yields a list of fastaComponents for a FASTA file\n+ with open(filename, \'r\') as f:\n+ components = []\n+ # cur_component = None\n+ while True:\n+ line = f.readline()\n+ if not line:\n+ if components:\n+ yield components\n+ return\n+ line = line.strip()\n+ if not line:\n+ if components:\n+ yield components\n+ components = []\n+ elif line.startswith(\'>\'):\n+ attributes = get_attributes_from_fasta_header(line)\n+ components.append(fastaComponent(attributes[\'species\']))\n+ elif components:\n+ components[-1].extend(line)\n' |
b |
diff -r 717aee069681 -r 16df616b39e5 utils/odict.py --- a/utils/odict.py Mon Nov 17 10:15:05 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,85 +0,0 @@ -""" -Ordered dictionary implementation. -""" - -from UserDict import UserDict - -class odict(UserDict): - """ - http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747 - - This dictionary class extends UserDict to record the order in which items are - added. Calling keys(), values(), items(), etc. will return results in this - order. - """ - def __init__( self, dict = None ): - self._keys = [] - UserDict.__init__( self, dict ) - - def __delitem__( self, key ): - UserDict.__delitem__( self, key ) - self._keys.remove( key ) - - def __setitem__( self, key, item ): - UserDict.__setitem__( self, key, item ) - if key not in self._keys: - self._keys.append( key ) - - def clear( self ): - UserDict.clear( self ) - self._keys = [] - - def copy(self): - new = odict() - new.update( self ) - return new - - def items( self ): - return zip( self._keys, self.values() ) - - def keys( self ): - return self._keys[:] - - def popitem( self ): - try: - key = self._keys[-1] - except IndexError: - raise KeyError( 'dictionary is empty' ) - val = self[ key ] - del self[ key ] - return ( key, val ) - - def setdefault( self, key, failobj=None ): - if key not in self._keys: - self._keys.append( key ) - return UserDict.setdefault( self, key, failobj ) - - def update( self, dict ): - for ( key, val ) in dict.items(): - self.__setitem__( key, val ) - - def values( self ): - return map( self.get, self._keys ) - - def iterkeys( self ): - return iter( self._keys ) - - def itervalues( self ): - for key in self._keys: - yield self.get( key ) - - def iteritems( self ): - for key in self._keys: - yield key, self.get( key ) - - def __iter__( self ): - for key in self._keys: - yield key - - def reverse( self ): - self._keys.reverse() - - def insert( self, index, key, item ): - if key not in self._keys: - self._keys.insert( index, key ) - UserDict.__setitem__( self, key, item ) |