Repository 'subtract'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/subtract

Changeset 5:0145969324c4 (2017-06-22)
Previous changeset 4:7a2a604ae9c8 (2016-02-11) Next changeset 6:0427ca314f3d (2022-06-13)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
modified:
gops_subtract.py
operation_filter.py
subtract.xml
utils/gff_util.py
added:
macros.xml
utils/__init__.pyc
utils/gff_util.pyc
utils/odict.pyc
removed:
tool_dependencies.xml
b
diff -r 7a2a604ae9c8 -r 0145969324c4 gops_subtract.py
--- a/gops_subtract.py Thu Feb 11 12:11:59 2016 -0500
+++ b/gops_subtract.py Thu Jun 22 18:53:03 2017 -0400
[
@@ -11,14 +11,18 @@
     -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval
     -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval
 """
+from __future__ import print_function
+
 import fileinput
 import sys
+
+from bx.cookbook import doc_optparse
 from bx.intervals.io import GenomicInterval, NiceReaderWrapper
 from bx.intervals.operations.subtract import subtract
-from bx.cookbook import doc_optparse
 from bx.tabular.io import ParseError
 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped
-from utils.gff_util import GFFFeature, GFFReaderWrapper, convert_bed_coords_to_gff
+
+from utils.gff_util import convert_bed_coords_to_gff, GFFFeature, GFFReaderWrapper
 
 assert sys.version_info[:2] >= ( 2, 4 )
 
@@ -81,16 +85,17 @@
                 out_file.write( "%s\n" % "\t".join( feature.fields ) )
             else:
                 out_file.write( "%s\n" % feature )
-    except ParseError, exc:
+    except ParseError as exc:
         out_file.close()
         fail( "Invalid file format: %s" % str( exc ) )
 
     out_file.close()
 
     if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 2nd dataset" )
+        print(skipped( g1, filedesc=" of 2nd dataset" ))
     if g2.skipped > 0:
-        print skipped( g2, filedesc=" of 1st dataset" )
+        print(skipped( g2, filedesc=" of 1st dataset" ))
+
 
 if __name__ == "__main__":
     main()
b
diff -r 7a2a604ae9c8 -r 0145969324c4 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jun 22 18:53:03 2017 -0400
b
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.7.1">bx-python</requirement>
+            <requirement type="package" version="1.0.0">galaxy-ops</requirement>
+        </requirements>
+    </xml>
+    <token name="@SCREENCASTS@">
+-----
+
+**Screencasts!**
+
+See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
+
+.. _Screencasts: https://galaxyproject.org/learn/interval-operations/
+
+-----
+    </token>
+</macros>
b
diff -r 7a2a604ae9c8 -r 0145969324c4 operation_filter.py
--- a/operation_filter.py Thu Feb 11 12:11:59 2016 -0500
+++ b/operation_filter.py Thu Jun 22 18:53:03 2017 -0400
b
@@ -1,8 +1,7 @@
 # runs after the job (and after the default post-filter)
+from galaxy.jobs.handler import JOB_ERROR
 from galaxy.tools.parameters import DataToolParameter
 
-from galaxy.jobs.handler import JOB_ERROR
-
 # Older py compatibility
 try:
     set()
@@ -14,7 +13,7 @@
     dbkeys = set()
     data_param_names = set()
     data_params = 0
-    for name, param in page_param_map.iteritems():
+    for name, param in page_param_map.items():
         if isinstance( param, DataToolParameter ):
             # for each dataset parameter
             if param_values.get(name, None) is not None:
@@ -53,7 +52,6 @@
         try:
             if stderr and len( stderr ) > 0:
                 raise Exception( stderr )
-
         except Exception:
             data.blurb = JOB_ERROR
             data.state = JOB_ERROR
b
diff -r 7a2a604ae9c8 -r 0145969324c4 subtract.xml
--- a/subtract.xml Thu Feb 11 12:11:59 2016 -0500
+++ b/subtract.xml Thu Jun 22 18:53:03 2017 -0400
[
@@ -1,110 +1,95 @@
 <tool id="gops_subtract_1" name="Subtract" version="1.0.0">
-  <description>the intervals of two datasets</description>
-  <requirements>
-    <requirement type="package" version="0.7.1">bx-python</requirement>
-    <requirement type="package" version="1.0.0">galaxy-ops</requirement>
-  </requirements>
-  <command interpreter="python">gops_subtract.py 
-      $input1 $input2 $output
-
-      #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-        -1 1,4,5,7 --gff1
-      #else:
-        -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
-      #end if
+    <description>the intervals of two datasets</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <code file="operation_filter.py"/>
+    <command><![CDATA[
+python '$__tool_directory__/gops_subtract.py'
+'$input1'
+'$input2'
+'$output'
 
-      #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-        -2 1,4,5,7 --gff2
-      #else:
-          -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} 
-      #end if
+#if $input1.is_of_type('gff')
+    -1 1,4,5,7 --gff1
+#else:
+    -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
+#end if
 
-      -m $min $returntype
-  </command>
-  <inputs>
-    <param format="interval,gff" name="input2" type="data" help="Second dataset">
-      <label>Subtract</label>
-    </param>
-
-    <param format="interval,gff" name="input1" type="data" help="First dataset">
-      <label>from</label>
-    </param>
+#if $input2.is_of_type('gff')
+    -2 1,4,5,7 --gff2
+#else:
+    -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
+#end if
 
-    <param name="returntype" type="select" label="Return" help="of the first dataset (see figure below)">
-      <option value="">Intervals with no overlap</option>
-      <option value="-p">Non-overlapping pieces of intervals</option>
-    </param>
-    
-    <param name="min" type="integer" value="1" min="1" help="(bp)">
-      <label>where minimal overlap is</label>
-    </param>
-   </inputs>
-  <outputs>
-    <data format="input" name="output" metadata_source="input1"/>
-  </outputs>
-  <code file="operation_filter.py"/>
-  <trackster_conf/>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops-subtract.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2_mod.bed" ftype="interval"/>
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_subtract_diffCols.dat" />
-    </test>
-    <test>
-      <param name="input1" value="gops_subtract_bigint.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops-subtract.dat" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="10" />
-      <param name="returntype" value="Non-overlapping pieces of intervals" />
-      <output name="output" file="gops-subtract-p.dat" />     
-    </test>
-    <!-- Subtract two GFF files. -->
-    <test>
-        <param name="input1" value="gops_subtract_in1.gff" />
-        <param name="input2" value="gops_subtract_in2.gff" />
-        <param name="min" value="1" />
-        <param name="returntype" value="" />
-        <output name="output" file="gops_subtract_out1.gff" />
-    </test>
-    <!-- Subtract BED file from GFF file. -->
-    <test>
-        <param name="input1" value="gops_subtract_in1.gff" />
-        <param name="input2" value="gops_subtract_in2.bed" />
-        <param name="min" value="1" />
-        <param name="returntype" value="" />
-        <output name="output" file="gops_subtract_out1.gff" />
-    </test>
-  </tests>
-  <help>
-
+-m $min
+$returntype
+    ]]></command>
+    <inputs>
+        <param name="input2" type="data" format="interval,gff" label="Subtract" help="Second dataset" />
+        <param name="input1" type="data" format="interval,gff" label="from" help="First dataset" />
+        <param name="returntype" type="select" label="Return" help="of the first dataset (see figure below)">
+            <option value="">Intervals with no overlap</option>
+            <option value="-p">Non-overlapping pieces of intervals</option>
+        </param>
+        <param name="min" type="integer" value="1" min="1" label="where minimal overlap is" help="(bp)" />
+    </inputs>
+    <outputs>
+        <data name="output" format_source="input1" metadata_source="input1"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="1.bed" />
+            <param name="input2" value="2.bed" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops-subtract.dat" />
+        </test>
+        <test>
+            <param name="input1" value="1.bed" />
+            <param name="input2" value="2_mod.bed" ftype="interval"/>
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_subtract_diffCols.dat" />
+        </test>
+        <test>
+            <param name="input1" value="gops_subtract_bigint.bed" />
+            <param name="input2" value="2.bed" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops-subtract.dat" />
+        </test>
+        <test>
+            <param name="input1" value="1.bed" />
+            <param name="input2" value="2.bed" />
+            <param name="min" value="10" />
+            <param name="returntype" value="Non-overlapping pieces of intervals" />
+            <output name="output" file="gops-subtract-p.dat" />
+        </test>
+        <!-- Subtract two GFF files. -->
+        <test>
+            <param name="input1" value="gops_subtract_in1.gff" />
+            <param name="input2" value="gops_subtract_in2.gff" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_subtract_out1.gff" />
+        </test>
+        <!-- Subtract BED file from GFF file. -->
+        <test>
+            <param name="input1" value="gops_subtract_in1.gff" />
+            <param name="input2" value="gops_subtract_in2.bed" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_subtract_out1.gff" />
+        </test>
+    </tests>
+    <help><![CDATA[
 .. class:: infomark
 
 **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
 
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
+@SCREENCASTS@
 
 **Syntax**
 
@@ -123,6 +108,5 @@
 Non-overlapping pieces of intervals:
 
 .. image:: gops_subtractOverlappingPieces.gif
-
-</help>
+    ]]></help>
 </tool>
b
diff -r 7a2a604ae9c8 -r 0145969324c4 tool_dependencies.xml
--- a/tool_dependencies.xml Thu Feb 11 12:11:59 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,9 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="bx-python" version="0.7.1">
-      <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="galaxy-ops" version="1.0.0">
-      <repository changeset_revision="eef263ff9b95" name="package_galaxy_ops_1_0_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>
b
diff -r 7a2a604ae9c8 -r 0145969324c4 utils/__init__.pyc
b
Binary file utils/__init__.pyc has changed
b
diff -r 7a2a604ae9c8 -r 0145969324c4 utils/gff_util.py
--- a/utils/gff_util.py Thu Feb 11 12:11:59 2016 -0500
+++ b/utils/gff_util.py Thu Jun 22 18:53:03 2017 -0400
[
@@ -1,11 +1,12 @@
 """
 Provides utilities for working with GFF files.
 """
+import copy
 
-import copy
 from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper
-from bx.tabular.io import Header, Comment, ParseError
-from utils.odict import odict
+from bx.tabular.io import Comment, Header, ParseError
+
+from .odict import odict
 
 
 class GFFInterval( GenomicInterval ):
@@ -144,7 +145,7 @@
                                 self.default_strand, fix_strand=self.fix_strand )
         return interval
 
-    def next( self ):
+    def __next__( self ):
         """ Returns next GFFFeature. """
 
         #
@@ -177,10 +178,10 @@
             while not self.seed_interval:
                 try:
                     self.seed_interval = GenomicIntervalReader.next( self )
-                except ParseError, e:
+                except ParseError as e:
                     handle_parse_error( e )
                 # TODO: When no longer supporting python 2.4 use finally:
-                #finally:
+                # finally:
                 raw_size += len( self.current_line )
 
         # If header or comment, clear seed interval and return it with its size.
@@ -205,19 +206,19 @@
             try:
                 interval = GenomicIntervalReader.next( self )
                 raw_size += len( self.current_line )
-            except StopIteration, e:
+            except StopIteration as e:
                 # No more intervals to read, but last feature needs to be
                 # returned.
                 interval = None
                 raw_size += len( self.current_line )
                 break
-            except ParseError, e:
+            except ParseError as e:
                 handle_parse_error( e )
                 raw_size += len( self.current_line )
                 continue
             # TODO: When no longer supporting python 2.4 use finally:
-            #finally:
-            #raw_size += len( self.current_line )
+            # finally:
+            # raw_size += len( self.current_line )
 
             # Ignore comments.
             if isinstance( interval, Comment ):
@@ -263,6 +264,7 @@
             convert_gff_coords_to_bed( feature )
 
         return feature
+    next = __next__  # This line should be removed once the bx-python port to Python3 is finished
 
 
 def convert_bed_coords_to_gff( interval ):
@@ -374,7 +376,9 @@
 
     # -- Get function that generates line/feature key. --
 
-    get_transcript_id = lambda fields: parse_gff_attributes( fields[8] )[ 'transcript_id' ]
+    def get_transcript_id(fields):
+        return parse_gff_attributes( fields[8] )[ 'transcript_id' ]
+
     if strict:
         # Strict GTF parsing uses transcript_id only to group lines into feature.
         key_fn = get_transcript_id
@@ -382,7 +386,8 @@
         # Use lenient parsing where chromosome + transcript_id is the key. This allows
         # transcripts with same ID on different chromosomes; this occurs in some popular
         # datasources, such as RefGenes in UCSC.
-        key_fn = lambda fields: fields[0] + '_' + get_transcript_id( fields )
+        def key_fn(fields):
+            return fields[0] + '_' + get_transcript_id( fields )
 
     # Aggregate intervals by transcript_id and collect comments.
     feature_intervals = odict()
b
diff -r 7a2a604ae9c8 -r 0145969324c4 utils/gff_util.pyc
b
Binary file utils/gff_util.pyc has changed
b
diff -r 7a2a604ae9c8 -r 0145969324c4 utils/odict.pyc
b
Binary file utils/odict.pyc has changed