comparison retrieve_ensembl_bed.xml @ 1:9c4a48f5d4e7 draft default tip

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 6babd357845126292cb202aaea0f70ff68819525"
author galaxyp
date Mon, 07 Oct 2019 16:14:39 -0400
parents da1b538b87e5
children
comparison
equal deleted inserted replaced
0:da1b538b87e5 1:9c4a48f5d4e7
18 --biotypes '$biotypes' 18 --biotypes '$biotypes'
19 #end if 19 #end if
20 #if $regions: 20 #if $regions:
21 --regions '$regions' 21 --regions '$regions'
22 #end if 22 #end if
23 #if $interval_file:
24 #if $interval_file.ext.find('bed') > -1
25 --interval_format bed
26 #elif $interval_file.ext in ['gff','gtf','gff3']
27 --interval_format gff
28 #else
29 --interval_format interval
30 #end if
31 --interval_file '$interval_file'
32 #end if
23 '$transcript_bed' 33 '$transcript_bed'
24 ]]></command> 34 ]]></command>
25 <inputs> 35 <inputs>
26 <param name="species" type="text" value="" label="Ensembl species" > 36 <param name="species" type="text" value="" label="Ensembl species" >
27 <help> 37 <help>
38 </param> 48 </param>
39 <param name="regions" type="text" value="" optional="true" label="Restrict Feature retrieval to comma-separated list of regions" > 49 <param name="regions" type="text" value="" optional="true" label="Restrict Feature retrieval to comma-separated list of regions" >
40 <help>Each region is specifed as: chr or chr:pos or chr:from-to</help> 50 <help>Each region is specifed as: chr or chr:pos or chr:from-to</help>
41 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator> 51 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
42 </param> 52 </param>
53 <param name="interval_file" type="data" format="bed,gff,interval" label="Retrieve the intervals from this file" optional="true"/>
43 </inputs> 54 </inputs>
44 <outputs> 55 <outputs>
45 <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed"> 56 <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed">
46 <actions> 57 <actions>
47 <action name="column_names" type="metadata" 58 <action name="column_names" type="metadata"
58 <assert_contents> 69 <assert_contents>
59 <has_text_matching expression="(chr)?1\t\d+\t\d+\tENST" /> 70 <has_text_matching expression="(chr)?1\t\d+\t\d+\tENST" />
60 </assert_contents> 71 </assert_contents>
61 </output> 72 </output>
62 </test> 73 </test>
74 <test>
75 <param name="species" value="mouse"/>
76 <param name="biotypes" value="protein_coding"/>
77 <param name="interval_file" ftype="bed" value="test.bed"/>
78 <output name="transcript_bed">
79 <assert_contents>
80 <has_text_matching expression="(chr)?1\t\d+\t\d+\tENSMUST" />
81 </assert_contents>
82 </output>
83 </test>
63 </tests> 84 </tests>
64 <help><![CDATA[ 85 <help><![CDATA[
65 Retrieve Ensembl cDNAs in BED format 86 Retrieve Ensembl cDNAs in BED format
66 87
67 usage: retrieve_ensembl_bed.py [-h] [-s SPECIES] [-R REGIONS] [-B BIOTYPES] 88 usage: retrieve_ensembl_bed.py [-h] [-s SPECIES] [-R REGIONS] [-B BIOTYPES]
68 [-X] [-U] [-t] [-v] [-d] 89 [-X] [-U] [-t] [-v] [-d]
69 output 90 output
70 91
71 positional arguments: 92 positional arguments:
72 output Output BED filepath, or for stdout: "-" 93 output Output BED filepath, or for stdout: "-"
73 94
74 optional arguments: 95 optional arguments:
75 -h, --help show this help message and exit 96 -h, --help show this help message and exit
76 -s SPECIES, --species SPECIES 97 -s SPECIES, --species SPECIES
77 Ensembl Species to retrieve 98 Ensembl Species to retrieve
78 -R REGIONS, --regions REGIONS 99 -R REGIONS, --regions REGIONS
79 Restrict Ensembl retrieval to regions e.g.: 100 Restrict Ensembl retrieval to regions e.g.
80 X,2:20000-25000,3:100-500+ 101 X,2:20000-25000,3:100-500+
102 -i INTERVAL_FILE, --interval_file INTERVAL_FILE
103 Regions from a bed, gff, or interval file
104
105 -f {bed,gff,interval}, --interval_format {bed,gff,interval}
106 Interval format has TAB-separated
107 columns: Seq, Start, End, Strand
108
81 -B BIOTYPES, --biotypes BIOTYPES 109 -B BIOTYPES, --biotypes BIOTYPES
82 Restrict Ensembl biotypes to retrieve 110 Restrict Ensembl biotypes to retrieve
83 -X, --extended_bed Include the extended columns returned from Ensembl 111 -X, --extended_bed Include the extended columns returned from Ensembl
84 -U, --ucsc_chrom_names 112 -U, --ucsc_chrom_names
85 Use the UCSC names for Chromosomes 113 Use the UCSC names for Chromosomes
86 -t, --toplevel Print Ensembl toplevel for species 114 -t, --toplevel Print Ensembl toplevel for species
87 -v, --verbose Verbose 115 -v, --verbose Verbose
88 -d, --debug Debug 116 -d, --debug Debug
89 117
118
119 **Output**
90 120
91 Ensembl REST API returns an extended BED format with these additional columns:: 121 Ensembl REST API returns an extended BED format with these additional columns::
92 122
93 second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type 123 second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type
94 124