changeset 5:33b3f3688db4 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
author devteam
date Thu, 22 Jun 2017 18:52:23 -0400
parents 8ddabc73af92
children 69c10b56f46d
files gops_intersect.py intersect.xml macros.xml operation_filter.py tool_dependencies.xml utils/__init__.pyc utils/gff_util.py utils/gff_util.pyc utils/odict.pyc
diffstat 9 files changed, 181 insertions(+), 177 deletions(-) [+]
line wrap: on
line diff
--- a/gops_intersect.py	Thu Feb 11 12:11:25 2016 -0500
+++ b/gops_intersect.py	Thu Jun 22 18:52:23 2017 -0400
@@ -11,14 +11,18 @@
     -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval
     -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval
 """
+from __future__ import print_function
+
 import fileinput
 import sys
+
+from bx.cookbook import doc_optparse
 from bx.intervals.io import GenomicInterval, NiceReaderWrapper
 from bx.intervals.operations.intersect import intersect
-from bx.cookbook import doc_optparse
 from bx.tabular.io import ParseError
 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped
-from utils.gff_util import GFFFeature, GFFReaderWrapper, convert_bed_coords_to_gff
+
+from utils.gff_util import convert_bed_coords_to_gff, GFFFeature, GFFReaderWrapper
 
 assert sys.version_info[:2] >= ( 2, 4 )
 
@@ -80,16 +84,17 @@
                 out_file.write( "%s\n" % "\t".join( feature.fields ) )
             else:
                 out_file.write( "%s\n" % feature )
-    except ParseError, e:
+    except ParseError as e:
         out_file.close()
         fail( "Invalid file format: %s" % str( e ) )
 
     out_file.close()
 
     if g1.skipped > 0:
-        print skipped( g1, filedesc=" of 1st dataset" )
+        print(skipped( g1, filedesc=" of 1st dataset" ))
     if g2.skipped > 0:
-        print skipped( g2, filedesc=" of 2nd dataset" )
+        print(skipped( g2, filedesc=" of 2nd dataset" ))
+
 
 if __name__ == "__main__":
     main()
--- a/intersect.xml	Thu Feb 11 12:11:25 2016 -0500
+++ b/intersect.xml	Thu Jun 22 18:52:23 2017 -0400
@@ -1,147 +1,132 @@
-<tool id="gops_intersect_1" name="Intersect" version="1.0.0">
-  <description>the intervals of two datasets</description>
-  <requirements>
-    <requirement type="package" version="0.7.1">bx-python</requirement>
-    <requirement type="package" version="1.0.0">galaxy-ops</requirement>
-  </requirements>
-  <command interpreter="python">gops_intersect.py 
-      $input1 $input2 $output
-
-      #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-        -1 1,4,5,7 --gff1
-      #else:
-        -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
-      #end if
-
-      #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
-        -2 1,4,5,7 --gff2
-      #else:
-          -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} 
-      #end if
-
-      -m $min $returntype
-  </command>
-  <inputs>
-      <param name="returntype" type="select" label="Return" help="(see figure below)">
-          <option value="">Overlapping Intervals</option>
-          <option value="-p">Overlapping pieces of Intervals</option>
-      </param>
-      <param format="interval,gff" name="input1" type="data" help="First dataset">
-          <label>of</label>
-      </param>
-      <param format="interval,gff" name="input2" type="data" help="Second dataset">
-          <label>that intersect</label>
-      </param>
-      <param name="min" type="integer" value="1" min="1" help="(bp)">
-          <label>for at least</label>
-      </param>
-  </inputs>
-  <outputs>
-      <data format="input" name="output" metadata_source="input1"/>
-  </outputs>
-  <code file="operation_filter.py"/>
-  <trackster_conf/>
-  <tests>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_out.bed" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2_mod.bed" ftype="interval"/>
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_diffCols.bed" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2_mod.bed" ftype="interval"/>
-      <param name="min" value="1" />
-      <param name="returntype" value="Overlapping pieces of Intervals" />
-      <output name="output" file="gops_intersect_p_diffCols.bed" />
-    </test>
-    <test>
-      <param name="input1" value="1.bed" />
-      <param name="input2" value="2.bed" />
-      <param name="min" value="10" />
-      <param name="returntype" value="Overlapping pieces of Intervals" />
-      <output name="output" file="gops_intersect_p_out.bed" />     
-    </test>
-    <test>
-      <param name="input1" value="gops_bigint.interval" ftype="interval" />
-      <param name="input2" value="gops_bigint2.interval" ftype="interval" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_bigint_out.interval" />     
-    </test>
-    <test>
-      <param name="input1" value="gops_bigint2.interval" ftype="interval" />
-      <param name="input2" value="gops_bigint.interval" ftype="interval" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_bigint_out.interval" />     
-    </test>
-    <test>
-      <param name="input1" value="12.bed" ftype="bed" />
-      <param name="input2" value="1.bed" ftype="bed" />
-      <param name="min" value="1" />
-      <param name="returntype" value="" />
-      <output name="output" file="gops_intersect_no_strand_out.bed" />     
-    </test>
-    <!-- Intersect two GFF files. -->
-    <test>
-        <param name="input1" value="gops_subtract_in1.gff" />
-        <param name="input2" value="gops_subtract_in2.gff" />
-        <param name="min" value="1" />
-        <param name="returntype" value="" />
-        <output name="output" file="gops_intersect_out2.gff" />        
-    </test>
-    <!-- Intersect GFF file and bed file. -->
-    <test>
-        <param name="input1" value="gops_subtract_in1.gff" />
-        <param name="input2" value="gops_subtract_in2.bed" />
-        <param name="min" value="1" />
-        <param name="returntype" value="" />
-        <output name="output" file="gops_intersect_out2.gff" />        
-    </test>
-    
-  </tests>
-  <help>
-
-.. class:: infomark
-
-**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
-
------
-
-**Screencasts!**
-
-See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
-
-.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
-
------
-
-**Syntax**
-
-- **Where overlap is at least** sets the minimum length (in base pairs) of overlap between elements of the two datasets
-- **Overlapping Intervals** returns entire intervals from the first dataset  that overlap the second dataset.  The returned intervals are completely unchanged, and this option only filters out intervals that do not overlap with the second dataset.
-- **Overlapping pieces of Intervals** returns intervals that indicate the exact base pair overlap between the first dataset and the second dataset.  The intervals returned are from the first dataset, and all fields besides start and end are guaranteed to remain unchanged.
-
------
-
-**Examples**
-
-Overlapping Intervals:
-
-.. image:: gops_intersectOverlappingIntervals.gif
-
-Overlapping Pieces of Intervals:
-
-.. image:: gops_intersectOverlappingPieces.gif
-
-</help>
-</tool>
+<tool id="gops_intersect_1" name="Intersect" version="1.0.0">
+    <description>the intervals of two datasets</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <code file="operation_filter.py"/>
+    <command><![CDATA[
+python '$__tool_directory__/gops_intersect.py'
+'$input1'
+'$input2'
+'$output'
+
+#if $input1.is_of_type('gff')
+    -1 1,4,5,7 --gff1
+#else:
+    -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
+#end if
+
+#if $input2.is_of_type('gff')
+    -2 1,4,5,7 --gff2
+#else:
+    -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
+#end if
+
+-m $min $returntype
+    ]]></command>
+    <inputs>
+        <param name="returntype" type="select" label="Return" help="(see figure below)">
+            <option value="">Overlapping Intervals</option>
+            <option value="-p">Overlapping pieces of Intervals</option>
+        </param>
+        <param name="input1" type="data" format="interval,gff" label="of" help="First dataset" />
+        <param name="input2" type="data" format="interval,gff" label="that intersect" help="Second dataset" />
+        <param name="min" type="integer" value="1" min="1" label="for at least" help="(bp)" />
+    </inputs>
+    <outputs>
+        <data name="output" format_source="input1" metadata_source="input1"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="1.bed" />
+            <param name="input2" value="2.bed" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_intersect_out.bed" />
+        </test>
+        <test>
+            <param name="input1" value="1.bed" />
+            <param name="input2" value="2_mod.bed" ftype="interval"/>
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_intersect_diffCols.bed" />
+        </test>
+        <test>
+            <param name="input1" value="1.bed" />
+            <param name="input2" value="2_mod.bed" ftype="interval"/>
+            <param name="min" value="1" />
+            <param name="returntype" value="Overlapping pieces of Intervals" />
+            <output name="output" file="gops_intersect_p_diffCols.bed" />
+        </test>
+        <test>
+            <param name="input1" value="1.bed" />
+            <param name="input2" value="2.bed" />
+            <param name="min" value="10" />
+            <param name="returntype" value="Overlapping pieces of Intervals" />
+            <output name="output" file="gops_intersect_p_out.bed" />
+        </test>
+        <test>
+            <param name="input1" value="gops_bigint.interval" ftype="interval" />
+            <param name="input2" value="gops_bigint2.interval" ftype="interval" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_intersect_bigint_out.interval" />
+        </test>
+        <test>
+            <param name="input1" value="gops_bigint2.interval" ftype="interval" />
+            <param name="input2" value="gops_bigint.interval" ftype="interval" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_intersect_bigint_out.interval" />
+        </test>
+        <test>
+            <param name="input1" value="12.bed" ftype="bed" />
+            <param name="input2" value="1.bed" ftype="bed" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_intersect_no_strand_out.bed" />
+        </test>
+        <!-- Intersect two GFF files. -->
+        <test>
+            <param name="input1" value="gops_subtract_in1.gff" />
+            <param name="input2" value="gops_subtract_in2.gff" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_intersect_out2.gff" />
+        </test>
+        <!-- Intersect GFF file and bed file. -->
+        <test>
+            <param name="input1" value="gops_subtract_in1.gff" />
+            <param name="input2" value="gops_subtract_in2.bed" />
+            <param name="min" value="1" />
+            <param name="returntype" value="" />
+            <output name="output" file="gops_intersect_out2.gff" />
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
+
+@SCREENCASTS@
+
+**Syntax**
+
+- **Where overlap is at least** sets the minimum length (in base pairs) of overlap between elements of the two datasets
+- **Overlapping Intervals** returns entire intervals from the first dataset  that overlap the second dataset.  The returned intervals are completely unchanged, and this option only filters out intervals that do not overlap with the second dataset.
+- **Overlapping pieces of Intervals** returns intervals that indicate the exact base pair overlap between the first dataset and the second dataset.  The intervals returned are from the first dataset, and all fields besides start and end are guaranteed to remain unchanged.
+
+-----
+
+**Examples**
+
+Overlapping Intervals:
+
+.. image:: gops_intersectOverlappingIntervals.gif
+
+Overlapping Pieces of Intervals:
+
+.. image:: gops_intersectOverlappingPieces.gif
+    ]]></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Jun 22 18:52:23 2017 -0400
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.7.1">bx-python</requirement>
+            <requirement type="package" version="1.0.0">galaxy-ops</requirement>
+        </requirements>
+    </xml>
+    <token name="@SCREENCASTS@">
+-----
+
+**Screencasts!**
+
+See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
+
+.. _Screencasts: https://galaxyproject.org/learn/interval-operations/
+
+-----
+    </token>
+</macros>
--- a/operation_filter.py	Thu Feb 11 12:11:25 2016 -0500
+++ b/operation_filter.py	Thu Jun 22 18:52:23 2017 -0400
@@ -1,8 +1,7 @@
 # runs after the job (and after the default post-filter)
+from galaxy.jobs.handler import JOB_ERROR
 from galaxy.tools.parameters import DataToolParameter
 
-from galaxy.jobs.handler import JOB_ERROR
-
 # Older py compatibility
 try:
     set()
@@ -14,7 +13,7 @@
     dbkeys = set()
     data_param_names = set()
     data_params = 0
-    for name, param in page_param_map.iteritems():
+    for name, param in page_param_map.items():
         if isinstance( param, DataToolParameter ):
             # for each dataset parameter
             if param_values.get(name, None) is not None:
@@ -53,7 +52,6 @@
         try:
             if stderr and len( stderr ) > 0:
                 raise Exception( stderr )
-
         except Exception:
             data.blurb = JOB_ERROR
             data.state = JOB_ERROR
--- a/tool_dependencies.xml	Thu Feb 11 12:11:25 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="bx-python" version="0.7.1">
-      <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="galaxy-ops" version="1.0.0">
-      <repository changeset_revision="eef263ff9b95" name="package_galaxy_ops_1_0_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>
Binary file utils/__init__.pyc has changed
--- a/utils/gff_util.py	Thu Feb 11 12:11:25 2016 -0500
+++ b/utils/gff_util.py	Thu Jun 22 18:52:23 2017 -0400
@@ -1,11 +1,12 @@
 """
 Provides utilities for working with GFF files.
 """
+import copy
 
-import copy
 from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper
-from bx.tabular.io import Header, Comment, ParseError
-from utils.odict import odict
+from bx.tabular.io import Comment, Header, ParseError
+
+from .odict import odict
 
 
 class GFFInterval( GenomicInterval ):
@@ -144,7 +145,7 @@
                                 self.default_strand, fix_strand=self.fix_strand )
         return interval
 
-    def next( self ):
+    def __next__( self ):
         """ Returns next GFFFeature. """
 
         #
@@ -177,10 +178,10 @@
             while not self.seed_interval:
                 try:
                     self.seed_interval = GenomicIntervalReader.next( self )
-                except ParseError, e:
+                except ParseError as e:
                     handle_parse_error( e )
                 # TODO: When no longer supporting python 2.4 use finally:
-                #finally:
+                # finally:
                 raw_size += len( self.current_line )
 
         # If header or comment, clear seed interval and return it with its size.
@@ -205,19 +206,19 @@
             try:
                 interval = GenomicIntervalReader.next( self )
                 raw_size += len( self.current_line )
-            except StopIteration, e:
+            except StopIteration as e:
                 # No more intervals to read, but last feature needs to be
                 # returned.
                 interval = None
                 raw_size += len( self.current_line )
                 break
-            except ParseError, e:
+            except ParseError as e:
                 handle_parse_error( e )
                 raw_size += len( self.current_line )
                 continue
             # TODO: When no longer supporting python 2.4 use finally:
-            #finally:
-            #raw_size += len( self.current_line )
+            # finally:
+            # raw_size += len( self.current_line )
 
             # Ignore comments.
             if isinstance( interval, Comment ):
@@ -263,6 +264,7 @@
             convert_gff_coords_to_bed( feature )
 
         return feature
+    next = __next__  # This line should be removed once the bx-python port to Python3 is finished
 
 
 def convert_bed_coords_to_gff( interval ):
@@ -374,7 +376,9 @@
 
     # -- Get function that generates line/feature key. --
 
-    get_transcript_id = lambda fields: parse_gff_attributes( fields[8] )[ 'transcript_id' ]
+    def get_transcript_id(fields):
+        return parse_gff_attributes( fields[8] )[ 'transcript_id' ]
+
     if strict:
         # Strict GTF parsing uses transcript_id only to group lines into feature.
         key_fn = get_transcript_id
@@ -382,7 +386,8 @@
         # Use lenient parsing where chromosome + transcript_id is the key. This allows
         # transcripts with same ID on different chromosomes; this occurs in some popular
         # datasources, such as RefGenes in UCSC.
-        key_fn = lambda fields: fields[0] + '_' + get_transcript_id( fields )
+        def key_fn(fields):
+            return fields[0] + '_' + get_transcript_id( fields )
 
     # Aggregate intervals by transcript_id and collect comments.
     feature_intervals = odict()
Binary file utils/gff_util.pyc has changed
Binary file utils/odict.pyc has changed