Mercurial > repos > galaxyp > retrieve_ensembl_bed
view retrieve_ensembl_bed.xml @ 1:9c4a48f5d4e7 draft default tip
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 6babd357845126292cb202aaea0f70ff68819525"
author | galaxyp |
---|---|
date | Mon, 07 Oct 2019 16:14:39 -0400 |
parents | da1b538b87e5 |
children |
line wrap: on
line source
<tool id="retrieve_ensembl_bed" name="Retrieve Ensembl features in BED format" version="0.1.0"> <description>using Ensembl REST API</description> <macros> <import>macros.xml</import> </macros> <requirements> <expand macro="ensembl_requirements" /> <expand macro="bedutil_requirements" /> </requirements> <command detect_errors="exit_code"><![CDATA[ python '$__tool_directory__/retrieve_ensembl_bed.py' --species '$species' #if $extended_bed: --extended_bed #end if $ucsc_chrom_names #if $biotypes: --biotypes '$biotypes' #end if #if $regions: --regions '$regions' #end if #if $interval_file: #if $interval_file.ext.find('bed') > -1 --interval_format bed #elif $interval_file.ext in ['gff','gtf','gff3'] --interval_format gff #else --interval_format interval #end if --interval_file '$interval_file' #end if '$transcript_bed' ]]></command> <inputs> <param name="species" type="text" value="" label="Ensembl species" > <help> </help> <expand macro="species_options" /> <validator type="regex" message="Enter an Ensembl organism">^\w+.*$</validator> </param> <param name="extended_bed" type="boolean" truevalue=",second_name,cds_start_status,cds_end_status,exon_frames,type,gene_name,second_gene_name,gene_type" falsevalue="" checked="true" label="Keep extra columns from ensembl BED"/> <param name="ucsc_chrom_names" type="boolean" truevalue="--ucsc_chrom_names" falsevalue="" checked="false" label="Use the UCSC names for Chromosomes"/> <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature retrieval to these biotypes" > <expand macro="biotypes_help" /> </param> <param name="regions" type="text" value="" optional="true" label="Restrict Feature retrieval to comma-separated list of regions" > <help>Each region is specifed as: chr or chr:pos or chr:from-to</help> <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator> </param> <param name="interval_file" type="data" format="bed,gff,interval" label="Retrieve the intervals from this file" optional="true"/> </inputs> <outputs> <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed"> <actions> <action name="column_names" type="metadata" default="chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts${extended_bed}"/> </actions> </data> </outputs> <tests> <test> <param name="species" value="human"/> <param name="biotypes" value="protein_coding"/> <param name="regions" value="1:51194990-51275150"/> <output name="transcript_bed"> <assert_contents> <has_text_matching expression="(chr)?1\t\d+\t\d+\tENST" /> </assert_contents> </output> </test> <test> <param name="species" value="mouse"/> <param name="biotypes" value="protein_coding"/> <param name="interval_file" ftype="bed" value="test.bed"/> <output name="transcript_bed"> <assert_contents> <has_text_matching expression="(chr)?1\t\d+\t\d+\tENSMUST" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ Retrieve Ensembl cDNAs in BED format usage: retrieve_ensembl_bed.py [-h] [-s SPECIES] [-R REGIONS] [-B BIOTYPES] [-X] [-U] [-t] [-v] [-d] output positional arguments: output Output BED filepath, or for stdout: "-" optional arguments: -h, --help show this help message and exit -s SPECIES, --species SPECIES Ensembl Species to retrieve -R REGIONS, --regions REGIONS Restrict Ensembl retrieval to regions e.g. X,2:20000-25000,3:100-500+ -i INTERVAL_FILE, --interval_file INTERVAL_FILE Regions from a bed, gff, or interval file -f {bed,gff,interval}, --interval_format {bed,gff,interval} Interval format has TAB-separated columns: Seq, Start, End, Strand -B BIOTYPES, --biotypes BIOTYPES Restrict Ensembl biotypes to retrieve -X, --extended_bed Include the extended columns returned from Ensembl -U, --ucsc_chrom_names Use the UCSC names for Chromosomes -t, --toplevel Print Ensembl toplevel for species -v, --verbose Verbose -d, --debug Debug **Output** Ensembl REST API returns an extended BED format with these additional columns:: second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btu613</citation> <citation type="doi">10.1093/nar/gku1010</citation> </citations> </tool>