diff retrieve_ensembl_bed.xml @ 1:9c4a48f5d4e7 draft default tip

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 6babd357845126292cb202aaea0f70ff68819525"
author galaxyp
date Mon, 07 Oct 2019 16:14:39 -0400
parents da1b538b87e5
children
line wrap: on
line diff
--- a/retrieve_ensembl_bed.xml	Mon Jan 22 13:13:47 2018 -0500
+++ b/retrieve_ensembl_bed.xml	Mon Oct 07 16:14:39 2019 -0400
@@ -20,6 +20,16 @@
             #if $regions:
                 --regions '$regions'
             #end if
+            #if $interval_file:
+                #if $interval_file.ext.find('bed') > -1
+                    --interval_format bed
+                #elif $interval_file.ext in ['gff','gtf','gff3']
+                    --interval_format gff
+                #else
+                    --interval_format interval
+                #end if
+                --interval_file '$interval_file' 
+            #end if
             '$transcript_bed'
     ]]></command>
     <inputs>
@@ -40,6 +50,7 @@
             <help>Each region is specifed as: chr or chr:pos or chr:from-to</help>
             <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
         </param>
+        <param name="interval_file" type="data" format="bed,gff,interval" label="Retrieve the intervals from this file" optional="true"/>
     </inputs>
     <outputs>
         <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed">
@@ -60,6 +71,16 @@
                 </assert_contents>
             </output>
         </test>
+        <test>
+            <param name="species" value="mouse"/>
+            <param name="biotypes" value="protein_coding"/>
+            <param name="interval_file" ftype="bed" value="test.bed"/>
+            <output name="transcript_bed">
+                <assert_contents>
+                    <has_text_matching expression="(chr)?1\t\d+\t\d+\tENSMUST" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 Retrieve Ensembl cDNAs in BED format
@@ -69,25 +90,34 @@
                                output
 
 positional arguments:
-  output                Output BED filepath, or for stdout: "-"
+  output                   Output BED filepath, or for stdout: "-"
 
 optional arguments:
-  -h, --help            show this help message and exit
+  -h, --help               show this help message and exit
   -s SPECIES, --species SPECIES
-                        Ensembl Species to retrieve
+                           Ensembl Species to retrieve
   -R REGIONS, --regions REGIONS
-                        Restrict Ensembl retrieval to regions e.g.:
-                        X,2:20000-25000,3:100-500+
+                           Restrict Ensembl retrieval to regions e.g.
+                           X,2:20000-25000,3:100-500+
+  -i INTERVAL_FILE, --interval_file INTERVAL_FILE
+                           Regions from a bed, gff, or interval file
+
+  -f {bed,gff,interval},  --interval_format {bed,gff,interval}
+                           Interval format has TAB-separated
+                           columns: Seq, Start, End, Strand
+
   -B BIOTYPES, --biotypes BIOTYPES
-                        Restrict Ensembl biotypes to retrieve
-  -X, --extended_bed    Include the extended columns returned from Ensembl
+                           Restrict Ensembl biotypes to retrieve
+  -X, --extended_bed       Include the extended columns returned from Ensembl
   -U, --ucsc_chrom_names
-                        Use the UCSC names for Chromosomes
-  -t, --toplevel        Print Ensembl toplevel for species
-  -v, --verbose         Verbose
-  -d, --debug           Debug
+                           Use the UCSC names for Chromosomes
+  -t, --toplevel           Print Ensembl toplevel for species
+  -v, --verbose            Verbose
+  -d, --debug              Debug
 
 
+**Output**
+
 Ensembl REST API returns an extended BED format with these additional columns::
 
   second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type