Repository 'subtract'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/subtract

Changeset 4:7a2a604ae9c8 (2016-02-11)
Previous changeset 3:ecb36112b056 (2015-11-11) Next changeset 5:0145969324c4 (2017-06-22)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
modified:
tool_dependencies.xml
added:
utils/__init__.py
utils/gff_util.py
utils/odict.py
b
diff -r ecb36112b056 -r 7a2a604ae9c8 tool_dependencies.xml
--- a/tool_dependencies.xml Wed Nov 11 12:49:24 2015 -0500
+++ b/tool_dependencies.xml Thu Feb 11 12:11:59 2016 -0500
b
@@ -4,6 +4,6 @@
       <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="galaxy-ops" version="1.0.0">
-      <repository changeset_revision="9cbb20b85c01" name="package_galaxy_ops_1_0_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+      <repository changeset_revision="eef263ff9b95" name="package_galaxy_ops_1_0_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>
b
diff -r ecb36112b056 -r 7a2a604ae9c8 utils/gff_util.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/gff_util.py Thu Feb 11 12:11:59 2016 -0500
[
b'@@ -0,0 +1,430 @@\n+"""\n+Provides utilities for working with GFF files.\n+"""\n+\n+import copy\n+from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper\n+from bx.tabular.io import Header, Comment, ParseError\n+from utils.odict import odict\n+\n+\n+class GFFInterval( GenomicInterval ):\n+    """\n+    A GFF interval, including attributes. If file is strictly a GFF file,\n+    only attribute is \'group.\'\n+    """\n+    def __init__( self, reader, fields, chrom_col=0, feature_col=2, start_col=3, end_col=4,\n+                  strand_col=6, score_col=5, default_strand=\'.\', fix_strand=False ):\n+        # HACK: GFF format allows \'.\' for strand but GenomicInterval does not. To get around this,\n+        # temporarily set strand and then unset after initing GenomicInterval.\n+        unknown_strand = False\n+        if not fix_strand and fields[ strand_col ] == \'.\':\n+            unknown_strand = True\n+            fields[ strand_col ] = \'+\'\n+        GenomicInterval.__init__( self, reader, fields, chrom_col, start_col, end_col, strand_col,\n+                                  default_strand, fix_strand=fix_strand )\n+        if unknown_strand:\n+            self.strand = \'.\'\n+            self.fields[ strand_col ] = \'.\'\n+\n+        # Handle feature, score column.\n+        self.feature_col = feature_col\n+        if self.feature_col >= self.nfields:\n+            raise MissingFieldError( "No field for feature_col (%d)" % feature_col )\n+        self.feature = self.fields[ self.feature_col ]\n+        self.score_col = score_col\n+        if self.score_col >= self.nfields:\n+            raise MissingFieldError( "No field for score_col (%d)" % score_col )\n+        self.score = self.fields[ self.score_col ]\n+\n+        # GFF attributes.\n+        self.attributes = parse_gff_attributes( fields[8] )\n+\n+    def copy( self ):\n+        return GFFInterval(self.reader, list( self.fields ), self.chrom_col, self.feature_col, self.start_col,\n+                           self.end_col, self.strand_col, self.score_col, self.strand)\n+\n+\n+class GFFFeature( GFFInterval ):\n+    """\n+    A GFF feature, which can include multiple intervals.\n+    """\n+    def __init__( self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4,\n+                  strand_col=6, score_col=5, default_strand=\'.\', fix_strand=False, intervals=[],\n+                  raw_size=0 ):\n+        # Use copy so that first interval and feature do not share fields.\n+        GFFInterval.__init__( self, reader, copy.deepcopy( intervals[0].fields ), chrom_col, feature_col,\n+                              start_col, end_col, strand_col, score_col, default_strand,\n+                              fix_strand=fix_strand )\n+        self.intervals = intervals\n+        self.raw_size = raw_size\n+        # Use intervals to set feature attributes.\n+        for interval in self.intervals:\n+            # Error checking. NOTE: intervals need not share the same strand.\n+            if interval.chrom != self.chrom:\n+                raise ValueError( "interval chrom does not match self chrom: %s != %s" %\n+                                  ( interval.chrom, self.chrom ) )\n+            # Set start, end of interval.\n+            if interval.start < self.start:\n+                self.start = interval.start\n+            if interval.end > self.end:\n+                self.end = interval.end\n+\n+    def name( self ):\n+        """ Returns feature\'s name. """\n+        name = None\n+        # Preference for name: GTF, GFF3, GFF.\n+        for attr_name in [\'gene_id\', \'transcript_id\',  # GTF\n+                          \'ID\', \'id\',  # GFF3\n+                          \'group\' ]:  # GFF (TODO)\n+            name = self.attributes.get( attr_name, None )\n+            if name is not None:\n+                break\n+        return name\n+\n+    def copy( self ):\n+        intervals_copy = []\n+        for interval in self.intervals:\n+            intervals_copy.append( interval.copy() )\n+        return GFFFeature(self.reader, se'..b'f pair == \'\':\n+            continue\n+        name = pair[0].strip()\n+        if name == \'\':\n+            continue\n+        # Need to strip double quote from values\n+        value = pair[1].strip(" \\"")\n+        attributes[ name ] = value\n+\n+    if len( attributes ) == 0:\n+        # Could not split attributes string, so entire string must be\n+        # \'group\' attribute. This is the case for strictly GFF files.\n+        attributes[\'group\'] = attr_str\n+    return attributes\n+\n+\n+def gff_attributes_to_str( attrs, gff_format ):\n+    """\n+    Convert GFF attributes to string. Supported formats are GFF3, GTF.\n+    """\n+    if gff_format == \'GTF\':\n+        format_string = \'%s "%s"\'\n+        # Convert group (GFF) and ID, parent (GFF3) attributes to transcript_id, gene_id\n+        id_attr = None\n+        if \'group\' in attrs:\n+            id_attr = \'group\'\n+        elif \'ID\' in attrs:\n+            id_attr = \'ID\'\n+        elif \'Parent\' in attrs:\n+            id_attr = \'Parent\'\n+        if id_attr:\n+            attrs[\'transcript_id\'] = attrs[\'gene_id\'] = attrs[id_attr]\n+    elif gff_format == \'GFF3\':\n+        format_string = \'%s=%s\'\n+    attrs_strs = []\n+    for name, value in attrs.items():\n+        attrs_strs.append( format_string % ( name, value ) )\n+    return " ; ".join( attrs_strs )\n+\n+\n+def read_unordered_gtf( iterator, strict=False ):\n+    """\n+    Returns GTF features found in an iterator. GTF lines need not be ordered\n+    or clustered for reader to work. Reader returns GFFFeature objects sorted\n+    by transcript_id, chrom, and start position.\n+    """\n+\n+    # -- Get function that generates line/feature key. --\n+\n+    get_transcript_id = lambda fields: parse_gff_attributes( fields[8] )[ \'transcript_id\' ]\n+    if strict:\n+        # Strict GTF parsing uses transcript_id only to group lines into feature.\n+        key_fn = get_transcript_id\n+    else:\n+        # Use lenient parsing where chromosome + transcript_id is the key. This allows\n+        # transcripts with same ID on different chromosomes; this occurs in some popular\n+        # datasources, such as RefGenes in UCSC.\n+        key_fn = lambda fields: fields[0] + \'_\' + get_transcript_id( fields )\n+\n+    # Aggregate intervals by transcript_id and collect comments.\n+    feature_intervals = odict()\n+    comments = []\n+    for count, line in enumerate( iterator ):\n+        if line.startswith( \'#\' ):\n+            comments.append( Comment( line ) )\n+            continue\n+\n+        line_key = key_fn( line.split(\'\\t\') )\n+        if line_key in feature_intervals:\n+            feature = feature_intervals[ line_key ]\n+        else:\n+            feature = []\n+            feature_intervals[ line_key ] = feature\n+        feature.append( GFFInterval( None, line.split( \'\\t\' ) ) )\n+\n+    # Create features.\n+    chroms_features = {}\n+    for count, intervals in enumerate( feature_intervals.values() ):\n+        # Sort intervals by start position.\n+        intervals.sort( lambda a, b: cmp( a.start, b.start ) )\n+        feature = GFFFeature( None, intervals=intervals )\n+        if feature.chrom not in chroms_features:\n+            chroms_features[ feature.chrom ] = []\n+        chroms_features[ feature.chrom ].append( feature )\n+\n+    # Sort features by chrom, start position.\n+    chroms_features_sorted = []\n+    for chrom_features in chroms_features.values():\n+        chroms_features_sorted.append( chrom_features )\n+    chroms_features_sorted.sort( lambda a, b: cmp( a[0].chrom, b[0].chrom ) )\n+    for features in chroms_features_sorted:\n+        features.sort( lambda a, b: cmp( a.start, b.start ) )\n+\n+    # Yield comments first, then features.\n+    # FIXME: comments can appear anywhere in file, not just the beginning.\n+    # Ideally, then comments would be associated with features and output\n+    # just before feature/line.\n+    for comment in comments:\n+        yield comment\n+\n+    for chrom_features in chroms_features_sorted:\n+        for feature in chrom_features:\n+            yield feature\n'
b
diff -r ecb36112b056 -r 7a2a604ae9c8 utils/odict.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/odict.py Thu Feb 11 12:11:59 2016 -0500
[
@@ -0,0 +1,86 @@
+"""
+Ordered dictionary implementation.
+"""
+
+from UserDict import UserDict
+
+
+class odict(UserDict):
+    """
+    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
+
+    This dictionary class extends UserDict to record the order in which items are
+    added. Calling keys(), values(), items(), etc. will return results in this
+    order.
+    """
+    def __init__( self, dict=None ):
+        self._keys = []
+        UserDict.__init__( self, dict )
+
+    def __delitem__( self, key ):
+        UserDict.__delitem__( self, key )
+        self._keys.remove( key )
+
+    def __setitem__( self, key, item ):
+        UserDict.__setitem__( self, key, item )
+        if key not in self._keys:
+            self._keys.append( key )
+
+    def clear( self ):
+        UserDict.clear( self )
+        self._keys = []
+
+    def copy(self):
+        new = odict()
+        new.update( self )
+        return new
+
+    def items( self ):
+        return zip( self._keys, self.values() )
+
+    def keys( self ):
+        return self._keys[:]
+
+    def popitem( self ):
+        try:
+            key = self._keys[-1]
+        except IndexError:
+            raise KeyError( 'dictionary is empty' )
+        val = self[ key ]
+        del self[ key ]
+        return ( key, val )
+
+    def setdefault( self, key, failobj=None ):
+        if key not in self._keys:
+            self._keys.append( key )
+        return UserDict.setdefault( self, key, failobj )
+
+    def update( self, dict ):
+        for ( key, val ) in dict.items():
+            self.__setitem__( key, val )
+
+    def values( self ):
+        return map( self.get, self._keys )
+
+    def iterkeys( self ):
+        return iter( self._keys )
+
+    def itervalues( self ):
+        for key in self._keys:
+            yield self.get( key )
+
+    def iteritems( self ):
+        for key in self._keys:
+            yield key, self.get( key )
+
+    def __iter__( self ):
+        for key in self._keys:
+            yield key
+
+    def reverse( self ):
+        self._keys.reverse()
+
+    def insert( self, index, key, item ):
+        if key not in self._keys:
+            self._keys.insert( index, key )
+            UserDict.__setitem__( self, key, item )