Repository 'fasta_concatenate_by_species'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/fasta_concatenate_by_species

Changeset 2:16df616b39e5 (2020-03-01)
Previous changeset 1:717aee069681 (2014-11-17) Next changeset 3:25b8736c627a (2020-03-02)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
modified:
fasta_concatenate_by_species.py
fasta_concatenate_by_species.xml
utils/maf_utilities.py
removed:
tool_dependencies.xml
utils/odict.py
b
diff -r 717aee069681 -r 16df616b39e5 fasta_concatenate_by_species.py
--- a/fasta_concatenate_by_species.py Mon Nov 17 10:15:05 2014 -0500
+++ b/fasta_concatenate_by_species.py Sun Mar 01 07:24:26 2020 -0500
[
@@ -1,39 +1,43 @@
 #!/usr/bin/env python
-#Dan Blankenberg
+# Dan Blankenberg
 """
-Takes a Multiple Alignment FASTA file and concatenates 
-sequences for each species, resulting in one sequence 
+Takes a Multiple Alignment FASTA file and concatenates
+sequences for each species, resulting in one sequence
 alignment per species.
 """
 
-import sys, tempfile
+import sys
+import tempfile
+from collections import OrderedDict
+
 from utils.maf_utilities import iter_fasta_alignment
-from utils.odict import odict
+
 
 def __main__():
     input_filename = sys.argv[1]
     output_filename = sys.argv[2]
-    species = odict()
+    species = OrderedDict()
     cur_size = 0
-    for components in iter_fasta_alignment( input_filename ):
-        species_not_written = species.keys()
+    for components in iter_fasta_alignment(input_filename):
+        species_not_written = list(species.keys())
         for component in components:
             if component.species not in species:
-                species[component.species] = tempfile.TemporaryFile()
-                species[component.species].write( "-" * cur_size )
-            species[component.species].write( component.text )
+                species[component.species] = tempfile.TemporaryFile(mode="r+")
+                species[component.species].write("-" * cur_size)
+            species[component.species].write(component.text)
             try:
-                species_not_written.remove( component.species )
+                species_not_written.remove(component.species)
             except ValueError:
-                #this is a new species
+                # this is a new species
                 pass
         for spec in species_not_written:
-            species[spec].write( "-" * len( components[0].text ) )
-        cur_size += len( components[0].text )
-    out = open( output_filename, 'wb' )
-    for spec, f in species.iteritems():
-        f.seek( 0 )
-        out.write( ">%s\n%s\n" % ( spec, f.read() ) )
-    out.close()
+            species[spec].write("-" * len(components[0].text))
+        cur_size += len(components[0].text)
+    with open(output_filename, 'w') as out:
+        for spec, f in species.items():
+            f.seek(0)
+            out.write(">%s\n%s\n" % (spec, f.read()))
 
-if __name__ == "__main__" : __main__()
+
+if __name__ == "__main__":
+    __main__()
b
diff -r 717aee069681 -r 16df616b39e5 fasta_concatenate_by_species.xml
--- a/fasta_concatenate_by_species.xml Mon Nov 17 10:15:05 2014 -0500
+++ b/fasta_concatenate_by_species.xml Sun Mar 01 07:24:26 2020 -0500
[
@@ -1,9 +1,13 @@
-<tool id="fasta_concatenate0" name="Concatenate" version="0.0.0">
+<tool id="fasta_concatenate0" name="Concatenate" version="0.0.1" profile="16.04">
   <description>FASTA alignment by species</description>
   <requirements>
-    <requirement type="package" version="0.7.1">bx-python</requirement>
+    <requirement type="package" version="0.8.8">bx-python</requirement>
   </requirements>
-  <command interpreter="python">fasta_concatenate_by_species.py $input1 $out_file1</command>
+  <command>
+    python '$__tool_directory__/fasta_concatenate_by_species.py'
+      '$input1'
+      '$out_file1'
+  </command>
   <inputs>
     <param name="input1" type="data" format="fasta" label="FASTA alignment"/>
   </inputs>
@@ -16,7 +20,7 @@
       <output name="out_file1" file="fasta_concatenate_out.fasta" />
     </test>
   </tests>
-  <help>
+  <help><![CDATA[
   
 **What it does**
   
@@ -71,5 +75,5 @@
 
  This tool will only work properly on files with Galaxy style FASTA headers.
 
-</help>
+   ]]></help>
 </tool>
b
diff -r 717aee069681 -r 16df616b39e5 tool_dependencies.xml
--- a/tool_dependencies.xml Mon Nov 17 10:15:05 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="bx-python" version="0.7.1">
-        <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>
b
diff -r 717aee069681 -r 16df616b39e5 utils/maf_utilities.py
--- a/utils/maf_utilities.py Mon Nov 17 10:15:05 2014 -0500
+++ b/utils/maf_utilities.py Sun Mar 01 07:24:26 2020 -0500
[
b'@@ -2,197 +2,226 @@\n """\n Provides wrappers and utilities for working with MAF files and alignments.\n """\n-#Dan Blankenberg\n+# Dan Blankenberg\n import bx.align.maf\n import bx.intervals\n import bx.interval_index_file\n-import sys, os, string, tempfile\n+import sys\n+import os\n+import tempfile\n import logging\n from copy import deepcopy\n \n-assert sys.version_info[:2] >= ( 2, 4 )\n+try:\n+    maketrans = str.maketrans\n+except AttributeError:\n+    from string import maketrans\n+\n+assert sys.version_info[:2] >= (2, 4)\n \n log = logging.getLogger(__name__)\n \n \n-GAP_CHARS = [ \'-\' ]\n+GAP_CHARS = [\'-\']\n SRC_SPLIT_CHAR = \'.\'\n \n-def src_split( src ):\n-    fields = src.split( SRC_SPLIT_CHAR, 1 )\n-    spec = fields.pop( 0 )\n+\n+def src_split(src):\n+    fields = src.split(SRC_SPLIT_CHAR, 1)\n+    spec = fields.pop(0)\n     if fields:\n-        chrom = fields.pop( 0 )\n+        chrom = fields.pop(0)\n     else:\n         chrom = spec\n     return spec, chrom\n \n-def src_merge( spec, chrom, contig = None ):\n-    if None in [ spec, chrom ]:\n+\n+def src_merge(spec, chrom, contig=None):\n+    if None in [spec, chrom]:\n         spec = chrom = spec or chrom\n-    return bx.align.maf.src_merge( spec, chrom, contig )\n+    return bx.align.maf.src_merge(spec, chrom, contig)\n \n-def get_species_in_block( block ):\n+\n+def get_species_in_block(block):\n     species = []\n     for c in block.components:\n-        spec, chrom = src_split( c.src )\n+        spec, chrom = src_split(c.src)\n         if spec not in species:\n-            species.append( spec )\n+            species.append(spec)\n     return species\n \n-def tool_fail( msg = "Unknown Error" ):\n-    print >> sys.stderr, "Fatal Error: %s" % msg\n-    sys.exit()\n+\n+def tool_fail(msg="Unknown Error"):\n+    msg = "Fatal Error: %s" % msg\n+    sys.exit(msg)\n+\n+# an object corresponding to a reference layered alignment\n \n-#an object corresponding to a reference layered alignment\n-class RegionAlignment( object ):\n+\n+class RegionAlignment(object):\n \n-    DNA_COMPLEMENT = string.maketrans( "ACGTacgt", "TGCAtgca" )\n-    MAX_SEQUENCE_SIZE = sys.maxint #Maximum length of sequence allowed\n+    DNA_COMPLEMENT = maketrans("ACGTacgt", "TGCAtgca")\n \n-    def __init__( self, size, species = [] ):\n-        assert size <= self.MAX_SEQUENCE_SIZE, "Maximum length allowed for an individual sequence has been exceeded (%i > %i)." % ( size, self.MAX_SEQUENCE_SIZE )\n+    def __init__(self, size, species=[]):\n         self.size = size\n         self.sequences = {}\n-        if not isinstance( species, list ):\n+        if not isinstance(species, list):\n             species = [species]\n         for spec in species:\n-            self.add_species( spec )\n+            self.add_species(spec)\n \n-    #add a species to the alignment\n-    def add_species( self, species ):\n-        #make temporary sequence files\n+    # add a species to the alignment\n+    def add_species(self, species):\n+        # make temporary sequence files\n         self.sequences[species] = tempfile.TemporaryFile()\n-        self.sequences[species].write( "-" * self.size )\n+        self.sequences[species].write("-" * self.size)\n \n-    #returns the names for species found in alignment, skipping names as requested\n-    def get_species_names( self, skip = [] ):\n-        if not isinstance( skip, list ): skip = [skip]\n+    # returns the names for species found in alignment, skipping names as requested\n+    def get_species_names(self, skip=[]):\n+        if not isinstance(skip, list):\n+            skip = [skip]\n         names = self.sequences.keys()\n         for name in skip:\n-            try: names.remove( name )\n-            except: pass\n+            try:\n+                names.remove(name)\n+            except Exception:\n+                pass\n         return names\n \n-    #returns the sequence for a species\n-    def get_sequence( self, species ):\n-        self.sequences[species].seek( 0 )\n+    # returns the sequence for a species\n+    def get_sequence(self, species):\n+        self.sequences[species].seek('..b'header, suffix )\n+        header = "%s%s" % (header, suffix)\n     else:\n-        header = "%s%s" % ( header, src_split( component.src )[ 0 ] )\n+        header = "%s%s" % (header, src_split(component.src)[0])\n     return header\n \n-def get_attributes_from_fasta_header( header ):\n-    if not header: return {}\n+\n+def get_attributes_from_fasta_header(header):\n+    if not header:\n+        return {}\n     attributes = {}\n-    header = header.lstrip( \'>\' )\n+    header = header.lstrip(\'>\')\n     header = header.strip()\n-    fields = header.split( \'|\' )\n+    fields = header.split(\'|\')\n     try:\n         region = fields[0]\n-        region = region.split( \'(\', 1 )\n-        temp = region[0].split( \'.\', 1 )\n+        region = region.split(\'(\', 1)\n+        temp = region[0].split(\'.\', 1)\n         attributes[\'species\'] = temp[0]\n-        if len( temp ) == 2:\n+        if len(temp) == 2:\n             attributes[\'chrom\'] = temp[1]\n         else:\n             attributes[\'chrom\'] = temp[0]\n-        region = region[1].split( \')\', 1 )\n+        region = region[1].split(\')\', 1)\n         attributes[\'strand\'] = region[0]\n-        region = region[1].lstrip( \':\' ).split( \'-\' )\n-        attributes[\'start\'] = int( region[0] )\n-        attributes[\'end\'] = int( region[1] )\n-    except:\n-        #fields 0 is not a region coordinate\n+        region = region[1].lstrip(\':\').split(\'-\')\n+        attributes[\'start\'] = int(region[0])\n+        attributes[\'end\'] = int(region[1])\n+    except Exception:\n+        # fields 0 is not a region coordinate\n         pass\n-    if len( fields ) > 2:\n-        for i in xrange( 1, len( fields ) - 1 ):\n-            prop = fields[i].split( \'=\', 1 )\n-            if len( prop ) == 2:\n-                attributes[ prop[0] ] = prop[1]\n-    if len( fields ) > 1:\n+    if len(fields) > 2:\n+        for i in range(1, len(fields) - 1):\n+            prop = fields[i].split(\'=\', 1)\n+            if len(prop) == 2:\n+                attributes[prop[0]] = prop[1]\n+    if len(fields) > 1:\n         attributes[\'__suffix__\'] = fields[-1]\n     return attributes\n \n-def iter_fasta_alignment( filename ):\n+\n+def iter_fasta_alignment(filename):\n     class fastaComponent:\n-        def __init__( self, species, text = "" ):\n+        def __init__(self, species, text=""):\n             self.species = species\n             self.text = text\n-        def extend( self, text ):\n-            self.text = self.text + text.replace( \'\\n\', \'\' ).replace( \'\\r\', \'\' ).strip()\n-    #yields a list of fastaComponents for a FASTA file\n-    f = open( filename, \'rb\' )\n-    components = []\n-    #cur_component = None\n-    while True:\n-        line = f.readline()\n-        if not line:\n-            if components:\n-                yield components\n-            return\n-        line = line.strip()\n-        if not line:\n-            if components:\n-                yield components\n-            components = []\n-        elif line.startswith( \'>\' ):\n-            attributes = get_attributes_from_fasta_header( line )\n-            components.append( fastaComponent( attributes[\'species\'] ) )\n-        elif components:\n-            components[-1].extend( line )\n \n+        def extend(self, text):\n+            self.text = self.text + text.replace(\'\\n\', \'\').replace(\'\\r\', \'\').strip()\n+    # yields a list of fastaComponents for a FASTA file\n+    with open(filename, \'r\') as f:\n+        components = []\n+        # cur_component = None\n+        while True:\n+            line = f.readline()\n+            if not line:\n+                if components:\n+                    yield components\n+                return\n+            line = line.strip()\n+            if not line:\n+                if components:\n+                    yield components\n+                components = []\n+            elif line.startswith(\'>\'):\n+                attributes = get_attributes_from_fasta_header(line)\n+                components.append(fastaComponent(attributes[\'species\']))\n+            elif components:\n+                components[-1].extend(line)\n'
b
diff -r 717aee069681 -r 16df616b39e5 utils/odict.py
--- a/utils/odict.py Mon Nov 17 10:15:05 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,85 +0,0 @@
-"""
-Ordered dictionary implementation.
-"""
-
-from UserDict import UserDict
-
-class odict(UserDict):
-    """
-    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
-
-    This dictionary class extends UserDict to record the order in which items are
-    added. Calling keys(), values(), items(), etc. will return results in this
-    order.
-    """
-    def __init__( self, dict = None ):
-        self._keys = []
-        UserDict.__init__( self, dict )
-
-    def __delitem__( self, key ):
-        UserDict.__delitem__( self, key )
-        self._keys.remove( key )
-
-    def __setitem__( self, key, item ):
-        UserDict.__setitem__( self, key, item )
-        if key not in self._keys:
-            self._keys.append( key )
-
-    def clear( self ):
-        UserDict.clear( self )
-        self._keys = []
-
-    def copy(self):
-        new = odict()
-        new.update( self )
-        return new
-
-    def items( self ):
-        return zip( self._keys, self.values() )
-
-    def keys( self ):
-        return self._keys[:]
-
-    def popitem( self ):
-        try:
-            key = self._keys[-1]
-        except IndexError:
-            raise KeyError( 'dictionary is empty' )
-        val = self[ key ]
-        del self[ key ]
-        return ( key, val )
-
-    def setdefault( self, key, failobj=None ):
-        if key not in self._keys:
-            self._keys.append( key )
-        return UserDict.setdefault( self, key, failobj )
-
-    def update( self, dict ):
-        for ( key, val ) in dict.items():
-            self.__setitem__( key, val )
-
-    def values( self ):
-        return map( self.get, self._keys )
-
-    def iterkeys( self ):
-        return iter( self._keys )
-
-    def itervalues( self ):
-        for key in self._keys:
-            yield self.get( key )
-
-    def iteritems( self ):
-        for key in self._keys:
-            yield key, self.get( key )
-
-    def __iter__( self ):
-        for key in self._keys:
-            yield key
-
-    def reverse( self ):
-        self._keys.reverse()
-
-    def insert( self, index, key, item ):
-        if key not in self._keys:
-            self._keys.insert( index, key )
-            UserDict.__setitem__( self, key, item )