comparison retrieve_ensembl_bed.xml @ 0:da1b538b87e5 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
author galaxyp
date Mon, 22 Jan 2018 13:13:47 -0500
parents
children 9c4a48f5d4e7
comparison
equal deleted inserted replaced
-1:000000000000 0:da1b538b87e5
1 <tool id="retrieve_ensembl_bed" name="Retrieve Ensembl features in BED format" version="0.1.0">
2 <description>using Ensembl REST API</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <requirements>
7 <expand macro="ensembl_requirements" />
8 <expand macro="bedutil_requirements" />
9 </requirements>
10 <command detect_errors="exit_code"><![CDATA[
11 python '$__tool_directory__/retrieve_ensembl_bed.py'
12 --species '$species'
13 #if $extended_bed:
14 --extended_bed
15 #end if
16 $ucsc_chrom_names
17 #if $biotypes:
18 --biotypes '$biotypes'
19 #end if
20 #if $regions:
21 --regions '$regions'
22 #end if
23 '$transcript_bed'
24 ]]></command>
25 <inputs>
26 <param name="species" type="text" value="" label="Ensembl species" >
27 <help>
28 </help>
29 <expand macro="species_options" />
30 <validator type="regex" message="Enter an Ensembl organism">^\w+.*$</validator>
31 </param>
32 <param name="extended_bed" type="boolean" truevalue=",second_name,cds_start_status,cds_end_status,exon_frames,type,gene_name,second_gene_name,gene_type" falsevalue="" checked="true"
33 label="Keep extra columns from ensembl BED"/>
34 <param name="ucsc_chrom_names" type="boolean" truevalue="--ucsc_chrom_names" falsevalue="" checked="false"
35 label="Use the UCSC names for Chromosomes"/>
36 <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature retrieval to these biotypes" >
37 <expand macro="biotypes_help" />
38 </param>
39 <param name="regions" type="text" value="" optional="true" label="Restrict Feature retrieval to comma-separated list of regions" >
40 <help>Each region is specifed as: chr or chr:pos or chr:from-to</help>
41 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
42 </param>
43 </inputs>
44 <outputs>
45 <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed">
46 <actions>
47 <action name="column_names" type="metadata"
48 default="chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts${extended_bed}"/>
49 </actions>
50 </data>
51 </outputs>
52 <tests>
53 <test>
54 <param name="species" value="human"/>
55 <param name="biotypes" value="protein_coding"/>
56 <param name="regions" value="1:51194990-51275150"/>
57 <output name="transcript_bed">
58 <assert_contents>
59 <has_text_matching expression="(chr)?1\t\d+\t\d+\tENST" />
60 </assert_contents>
61 </output>
62 </test>
63 </tests>
64 <help><![CDATA[
65 Retrieve Ensembl cDNAs in BED format
66
67 usage: retrieve_ensembl_bed.py [-h] [-s SPECIES] [-R REGIONS] [-B BIOTYPES]
68 [-X] [-U] [-t] [-v] [-d]
69 output
70
71 positional arguments:
72 output Output BED filepath, or for stdout: "-"
73
74 optional arguments:
75 -h, --help show this help message and exit
76 -s SPECIES, --species SPECIES
77 Ensembl Species to retrieve
78 -R REGIONS, --regions REGIONS
79 Restrict Ensembl retrieval to regions e.g.:
80 X,2:20000-25000,3:100-500+
81 -B BIOTYPES, --biotypes BIOTYPES
82 Restrict Ensembl biotypes to retrieve
83 -X, --extended_bed Include the extended columns returned from Ensembl
84 -U, --ucsc_chrom_names
85 Use the UCSC names for Chromosomes
86 -t, --toplevel Print Ensembl toplevel for species
87 -v, --verbose Verbose
88 -d, --debug Debug
89
90
91 Ensembl REST API returns an extended BED format with these additional columns::
92
93 second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type
94
95 ]]></help>
96 <citations>
97 <citation type="doi">10.1093/bioinformatics/btu613</citation>
98 <citation type="doi">10.1093/nar/gku1010</citation>
99 </citations>
100 </tool>