Galaxy |

Changeset 2:16df616b39e5 (2020-03-01)

Previous changeset 1:717aee069681 (2014-11-17) Next changeset 3:25b8736c627a (2020-03-02)

Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"

modified:
fasta_concatenate_by_species.py
fasta_concatenate_by_species.xml
utils/maf_utilities.py

removed:
tool_dependencies.xml
utils/odict.py

diff -r 717aee069681 -r 16df616b39e5 fasta_concatenate_by_species.py
--- a/fasta_concatenate_by_species.py Mon Nov 17 10:15:05 2014 -0500
+++ b/fasta_concatenate_by_species.py Sun Mar 01 07:24:26 2020 -0500

[

@@ -1,39 +1,43 @@
#!/usr/bin/env python
-#Dan Blankenberg
+# Dan Blankenberg
"""
-Takes a Multiple Alignment FASTA file and concatenates
-sequences for each species, resulting in one sequence
+Takes a Multiple Alignment FASTA file and concatenates
+sequences for each species, resulting in one sequence
alignment per species.
"""

-import sys, tempfile
+import sys
+import tempfile
+from collections import OrderedDict
+
from utils.maf_utilities import iter_fasta_alignment
-from utils.odict import odict
+

def __main__():
     input_filename = sys.argv[1]
     output_filename = sys.argv[2]
-    species = odict()
+    species = OrderedDict()
     cur_size = 0
-    for components in iter_fasta_alignment( input_filename ):
-        species_not_written = species.keys()
+    for components in iter_fasta_alignment(input_filename):
+        species_not_written = list(species.keys())
         for component in components:
             if component.species not in species:
-                species[component.species] = tempfile.TemporaryFile()
-                species[component.species].write( "-" * cur_size )
-            species[component.species].write( component.text )
+                species[component.species] = tempfile.TemporaryFile(mode="r+")
+                species[component.species].write("-" * cur_size)
+            species[component.species].write(component.text)
             try:
-                species_not_written.remove( component.species )
+                species_not_written.remove(component.species)
             except ValueError:
-                #this is a new species
+                # this is a new species
                 pass
         for spec in species_not_written:
-            species[spec].write( "-" * len( components[0].text ) )
-        cur_size += len( components[0].text )
-    out = open( output_filename, 'wb' )
-    for spec, f in species.iteritems():
-        f.seek( 0 )
-        out.write( ">%s\n%s\n" % ( spec, f.read() ) )
-    out.close()
+            species[spec].write("-" * len(components[0].text))
+        cur_size += len(components[0].text)
+    with open(output_filename, 'w') as out:
+        for spec, f in species.items():
+            f.seek(0)
+            out.write(">%s\n%s\n" % (spec, f.read()))

-if __name__ == "__main__" : __main__()
+
+if __name__ == "__main__":
+    __main__()

diff -r 717aee069681 -r 16df616b39e5 fasta_concatenate_by_species.xml
--- a/fasta_concatenate_by_species.xml Mon Nov 17 10:15:05 2014 -0500
+++ b/fasta_concatenate_by_species.xml Sun Mar 01 07:24:26 2020 -0500

[

@@ -1,9 +1,13 @@
-<tool id="fasta_concatenate0" name="Concatenate" version="0.0.0">
+<tool id="fasta_concatenate0" name="Concatenate" version="0.0.1" profile="16.04">
   <description>FASTA alignment by species</description>
   <requirements>
-    <requirement type="package" version="0.7.1">bx-python</requirement>
+    <requirement type="package" version="0.8.8">bx-python</requirement>
   </requirements>
-  <command interpreter="python">fasta_concatenate_by_species.py $input1 $out_file1</command>
+  <command>
+    python '$__tool_directory__/fasta_concatenate_by_species.py'
+      '$input1'
+      '$out_file1'
+  </command>
   <inputs>
     <param name="input1" type="data" format="fasta" label="FASTA alignment"/>
   </inputs>
@@ -16,7 +20,7 @@
       <output name="out_file1" file="fasta_concatenate_out.fasta" />
     </test>
   </tests>
-  <help>
+  <help><![CDATA[

**What it does**

@@ -71,5 +75,5 @@

  This tool will only work properly on files with Galaxy style FASTA headers.

-</help>
+   ]]></help>
</tool>

diff -r 717aee069681 -r 16df616b39e5 tool_dependencies.xml
--- a/tool_dependencies.xml Mon Nov 17 10:15:05 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="bx-python" version="0.7.1">
-        <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>

diff -r 717aee069681 -r 16df616b39e5 utils/maf_utilities.py
--- a/utils/maf_utilities.py Mon Nov 17 10:15:05 2014 -0500
+++ b/utils/maf_utilities.py Sun Mar 01 07:24:26 2020 -0500

[

b'@@ -2,197 +2,226 @@\n """\n Provides wrappers and utilities for working with MAF files and alignments.\n """\n-#Dan Blankenberg\n+# Dan Blankenberg\n import bx.align.maf\n import bx.intervals\n import bx.interval_index_file\n-import sys, os, string, tempfile\n+import sys\n+import os\n+import tempfile\n import logging\n from copy import deepcopy\n \n-assert sys.version_info[:2] >= ( 2, 4 )\n+try:\n+ maketrans = str.maketrans\n+except AttributeError:\n+ from string import maketrans\n+\n+assert sys.version_info[:2] >= (2, 4)\n \n log = logging.getLogger(__name__)\n \n \n-GAP_CHARS = [ \'-\' ]\n+GAP_CHARS = [\'-\']\n SRC_SPLIT_CHAR = \'.\'\n \n-def src_split( src ):\n- fields = src.split( SRC_SPLIT_CHAR, 1 )\n- spec = fields.pop( 0 )\n+\n+def src_split(src):\n+ fields = src.split(SRC_SPLIT_CHAR, 1)\n+ spec = fields.pop(0)\n if fields:\n- chrom = fields.pop( 0 )\n+ chrom = fields.pop(0)\n else:\n chrom = spec\n return spec, chrom\n \n-def src_merge( spec, chrom, contig = None ):\n- if None in [ spec, chrom ]:\n+\n+def src_merge(spec, chrom, contig=None):\n+ if None in [spec, chrom]:\n spec = chrom = spec or chrom\n- return bx.align.maf.src_merge( spec, chrom, contig )\n+ return bx.align.maf.src_merge(spec, chrom, contig)\n \n-def get_species_in_block( block ):\n+\n+def get_species_in_block(block):\n species = []\n for c in block.components:\n- spec, chrom = src_split( c.src )\n+ spec, chrom = src_split(c.src)\n if spec not in species:\n- species.append( spec )\n+ species.append(spec)\n return species\n \n-def tool_fail( msg = "Unknown Error" ):\n- print >> sys.stderr, "Fatal Error: %s" % msg\n- sys.exit()\n+\n+def tool_fail(msg="Unknown Error"):\n+ msg = "Fatal Error: %s" % msg\n+ sys.exit(msg)\n+\n+# an object corresponding to a reference layered alignment\n \n-#an object corresponding to a reference layered alignment\n-class RegionAlignment( object ):\n+\n+class RegionAlignment(object):\n \n- DNA_COMPLEMENT = string.maketrans( "ACGTacgt", "TGCAtgca" )\n- MAX_SEQUENCE_SIZE = sys.maxint #Maximum length of sequence allowed\n+ DNA_COMPLEMENT = maketrans("ACGTacgt", "TGCAtgca")\n \n- def __init__( self, size, species = [] ):\n- assert size <= self.MAX_SEQUENCE_SIZE, "Maximum length allowed for an individual sequence has been exceeded (%i > %i)." % ( size, self.MAX_SEQUENCE_SIZE )\n+ def __init__(self, size, species=[]):\n self.size = size\n self.sequences = {}\n- if not isinstance( species, list ):\n+ if not isinstance(species, list):\n species = [species]\n for spec in species:\n- self.add_species( spec )\n+ self.add_species(spec)\n \n- #add a species to the alignment\n- def add_species( self, species ):\n- #make temporary sequence files\n+ # add a species to the alignment\n+ def add_species(self, species):\n+ # make temporary sequence files\n self.sequences[species] = tempfile.TemporaryFile()\n- self.sequences[species].write( "-" * self.size )\n+ self.sequences[species].write("-" * self.size)\n \n- #returns the names for species found in alignment, skipping names as requested\n- def get_species_names( self, skip = [] ):\n- if not isinstance( skip, list ): skip = [skip]\n+ # returns the names for species found in alignment, skipping names as requested\n+ def get_species_names(self, skip=[]):\n+ if not isinstance(skip, list):\n+ skip = [skip]\n names = self.sequences.keys()\n for name in skip:\n- try: names.remove( name )\n- except: pass\n+ try:\n+ names.remove(name)\n+ except Exception:\n+ pass\n return names\n \n- #returns the sequence for a species\n- def get_sequence( self, species ):\n- self.sequences[species].seek( 0 )\n+ # returns the sequence for a species\n+ def get_sequence(self, species):\n+ self.sequences[species].seek('..b'header, suffix )\n+ header = "%s%s" % (header, suffix)\n else:\n- header = "%s%s" % ( header, src_split( component.src )[ 0 ] )\n+ header = "%s%s" % (header, src_split(component.src)[0])\n return header\n \n-def get_attributes_from_fasta_header( header ):\n- if not header: return {}\n+\n+def get_attributes_from_fasta_header(header):\n+ if not header:\n+ return {}\n attributes = {}\n- header = header.lstrip( \'>\' )\n+ header = header.lstrip(\'>\')\n header = header.strip()\n- fields = header.split( \'|\' )\n+ fields = header.split(\'|\')\n try:\n region = fields[0]\n- region = region.split( \'(\', 1 )\n- temp = region[0].split( \'.\', 1 )\n+ region = region.split(\'(\', 1)\n+ temp = region[0].split(\'.\', 1)\n attributes[\'species\'] = temp[0]\n- if len( temp ) == 2:\n+ if len(temp) == 2:\n attributes[\'chrom\'] = temp[1]\n else:\n attributes[\'chrom\'] = temp[0]\n- region = region[1].split( \')\', 1 )\n+ region = region[1].split(\')\', 1)\n attributes[\'strand\'] = region[0]\n- region = region[1].lstrip( \':\' ).split( \'-\' )\n- attributes[\'start\'] = int( region[0] )\n- attributes[\'end\'] = int( region[1] )\n- except:\n- #fields 0 is not a region coordinate\n+ region = region[1].lstrip(\':\').split(\'-\')\n+ attributes[\'start\'] = int(region[0])\n+ attributes[\'end\'] = int(region[1])\n+ except Exception:\n+ # fields 0 is not a region coordinate\n pass\n- if len( fields ) > 2:\n- for i in xrange( 1, len( fields ) - 1 ):\n- prop = fields[i].split( \'=\', 1 )\n- if len( prop ) == 2:\n- attributes[ prop[0] ] = prop[1]\n- if len( fields ) > 1:\n+ if len(fields) > 2:\n+ for i in range(1, len(fields) - 1):\n+ prop = fields[i].split(\'=\', 1)\n+ if len(prop) == 2:\n+ attributes[prop[0]] = prop[1]\n+ if len(fields) > 1:\n attributes[\'__suffix__\'] = fields[-1]\n return attributes\n \n-def iter_fasta_alignment( filename ):\n+\n+def iter_fasta_alignment(filename):\n class fastaComponent:\n- def __init__( self, species, text = "" ):\n+ def __init__(self, species, text=""):\n self.species = species\n self.text = text\n- def extend( self, text ):\n- self.text = self.text + text.replace( \'\\n\', \'\' ).replace( \'\\r\', \'\' ).strip()\n- #yields a list of fastaComponents for a FASTA file\n- f = open( filename, \'rb\' )\n- components = []\n- #cur_component = None\n- while True:\n- line = f.readline()\n- if not line:\n- if components:\n- yield components\n- return\n- line = line.strip()\n- if not line:\n- if components:\n- yield components\n- components = []\n- elif line.startswith( \'>\' ):\n- attributes = get_attributes_from_fasta_header( line )\n- components.append( fastaComponent( attributes[\'species\'] ) )\n- elif components:\n- components[-1].extend( line )\n \n+ def extend(self, text):\n+ self.text = self.text + text.replace(\'\\n\', \'\').replace(\'\\r\', \'\').strip()\n+ # yields a list of fastaComponents for a FASTA file\n+ with open(filename, \'r\') as f:\n+ components = []\n+ # cur_component = None\n+ while True:\n+ line = f.readline()\n+ if not line:\n+ if components:\n+ yield components\n+ return\n+ line = line.strip()\n+ if not line:\n+ if components:\n+ yield components\n+ components = []\n+ elif line.startswith(\'>\'):\n+ attributes = get_attributes_from_fasta_header(line)\n+ components.append(fastaComponent(attributes[\'species\']))\n+ elif components:\n+ components[-1].extend(line)\n'

diff -r 717aee069681 -r 16df616b39e5 utils/odict.py
--- a/utils/odict.py Mon Nov 17 10:15:05 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,85 +0,0 @@
-"""
-Ordered dictionary implementation.
-"""
-
-from UserDict import UserDict
-
-class odict(UserDict):
-    """
-    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
-
-    This dictionary class extends UserDict to record the order in which items are
-    added. Calling keys(), values(), items(), etc. will return results in this
-    order.
-    """
-    def __init__( self, dict = None ):
-        self._keys = []
-        UserDict.__init__( self, dict )
-
-    def __delitem__( self, key ):
-        UserDict.__delitem__( self, key )
-        self._keys.remove( key )
-
-    def __setitem__( self, key, item ):
-        UserDict.__setitem__( self, key, item )
-        if key not in self._keys:
-            self._keys.append( key )
-
-    def clear( self ):
-        UserDict.clear( self )
-        self._keys = []
-
-    def copy(self):
-        new = odict()
-        new.update( self )
-        return new
-
-    def items( self ):
-        return zip( self._keys, self.values() )
-
-    def keys( self ):
-        return self._keys[:]
-
-    def popitem( self ):
-        try:
-            key = self._keys[-1]
-        except IndexError:
-            raise KeyError( 'dictionary is empty' )
-        val = self[ key ]
-        del self[ key ]
-        return ( key, val )
-
-    def setdefault( self, key, failobj=None ):
-        if key not in self._keys:
-            self._keys.append( key )
-        return UserDict.setdefault( self, key, failobj )
-
-    def update( self, dict ):
-        for ( key, val ) in dict.items():
-            self.__setitem__( key, val )
-
-    def values( self ):
-        return map( self.get, self._keys )
-
-    def iterkeys( self ):
-        return iter( self._keys )
-
-    def itervalues( self ):
-        for key in self._keys:
-            yield self.get( key )
-
-    def iteritems( self ):
-        for key in self._keys:
-            yield key, self.get( key )
-
-    def __iter__( self ):
-        for key in self._keys:
-            yield key
-
-    def reverse( self ):
-        self._keys.reverse()
-
-    def insert( self, index, key, item ):
-        if key not in self._keys:
-            self._keys.insert( index, key )
-            UserDict.__setitem__( self, key, item )