comparison augustus.xml @ 5:a4fab0c1ae1a

Uploaded
author bjoern-gruening
date Sun, 09 Jun 2013 07:54:25 -0400
parents 796814f16b12
children
comparison
equal deleted inserted replaced
4:796814f16b12 5:a4fab0c1ae1a
1 <tool id="augustus" name="Augustus" version="0.2"> 1 <tool id="augustus" name="Augustus" version="0.3">
2 <description>gene prediction for eukaryotic genomes</description> 2 <description>gene prediction for eukaryotic genomes</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2.6.1">augustus</requirement> 4 <requirement type="package" version="2.7">augustus</requirement>
5 <requirement type="set_environment">AUGUSTUS_SCRIPT_PATH</requirement>
5 </requirements> 6 </requirements>
6 <command>augustus 7 <command>
7 --strand=$strand 8 ## please set export AUGUSTUS_CONFIG_PATH=/path_to_augustus/augustus/config
8 $noInFrameStop 9 ## or use the --AUGUSTUS_CONFIG_PATH=path if you are not installing through the toolshed
9 $gff 10 ## Augustus writes the protein and coding sequences as comment into the gff/gtf file an external script is used to extract the sequences into additional files
10 $protein 11
11 $introns 12 augustus
12 $start 13 --strand=$strand
13 $stop 14 $noInFrameStop
14 $cds 15 $gff
15 $codingseq 16 $protein
16 $singlestrand 17 $introns
17 $input_genome 18 $start
18 --genemodel=$genemodel 19 $stop
19 --species=$organism 20 $cds
20 --outfile=$output 21 $codingseq
21 22 $singlestrand
22 #please set export AUGUSTUS_CONFIG_PATH=/path_to_augustus/augustus/config 23 $input_genome
23 #or use the --AUGUSTUS_CONFIG_PATH=path switch 24 $mea
24 25 $utr
26 --genemodel=$genemodel
27 --species=$organism
28 ##--outfile=$output
29 | tee $output
30 #if $protein or $codingseq:
31 | python \$AUGUSTUS_SCRIPT_PATH/extract_features.py
32 #if $protein:
33 --protein $protein_output
34 #end if
35 #if $codingseq:
36 --codingseq $codingseq_output
37 #end if
38 #end if
25 </command> 39 </command>
26 <inputs> 40 <inputs>
27 <param name="input_genome" type="data" format="fasta" label="Genome Sequence"/> 41 <param name="input_genome" type="data" format="fasta" label="Genome Sequence"/>
28 <param name="noInFrameStop" type="boolean" label="Don't report transcripts with in-frame stop codons. Otherwise, intron-spanning stop codons could occur" truevalue="--noInFrameStop=true" falsevalue="--noInFrameStop=false" checked="false" /> 42 <param name="noInFrameStop" type="boolean" label="Don't report transcripts with in-frame stop codons (--noInFrameStop)" truevalue="--noInFrameStop=true" falsevalue="--noInFrameStop=false" checked="false" help="Otherwise, intron-spanning stop codons could occur" />
29 <param name="gff" type="boolean" label="GFF formated output, standard is GTF" truevalue="--gff3=on" falsevalue="--gff3=off" checked="false" /> 43 <param name="singlestrand" type="boolean" label="Predict genes independently on each strand, allow overlapping genes on opposite strands (--singlestrand)" truevalue="--singlestrand=true" falsevalue="--singlestrand=false" checked="false" />
30 <param name="protein" type="boolean" label="Output predicted protein sequences" truevalue="--protein=on" falsevalue="--protein=off" checked="false" /> 44 <param name="mea" type="boolean" label="Using the maximum expected accuracy approach (--mea)" truevalue="--mea=1" falsevalue="" checked="false" help="MEA is an alternative decoding approach." />
31 <param name="introns" type="boolean" label="Output predicted intron sequences" truevalue="--introns=on" falsevalue="--introns=off" checked="false" /> 45 <param name="utr" type="boolean" label="Predict the untranslated regions in addition to the coding sequence (--UTR)" truevalue="--UTR=on" falsevalue="--UTR=off" checked="false" help="This currently works only for human, galdieria, toxoplasma and caenorhabditis." />
32 <param name="start" type="boolean" label="Output predicted start codons" truevalue="--start=on" falsevalue="--start=off" checked="false" />
33 <param name="stop" type="boolean" label="Output predicted stop codons" truevalue="--stop=on" falsevalue="--stop=off" checked="false" />
34 <param name="cds" type="boolean" label="Output CDS region" truevalue="--cds=on" falsevalue="--cds=off" checked="true" />
35 <param name="codingseq" type="boolean" label="Output coding sequence as comment in the output file" truevalue="--codingseq=on" falsevalue="--codingseq=off" checked="false" />
36
37 <param name="singlestrand" type="boolean" label="Predict genes independently on each strand, allow overlapping genes on opposite strands" truevalue="--singlestrand=true" falsevalue="--singlestrand=false" checked="false" />
38 46
39 <param name="organism" label="Model Organism" type="select" multiple="false" format="text" help="Choose a specialised trainingset."> 47 <param name="organism" label="Model Organism" type="select" multiple="false" format="text" help="Choose a specialised trainingset.">
40 <option value="human">Homo sapiens</option> 48 <option value="human">Homo sapiens</option>
41 <option value="fly">Drosophila melanogaster</option> 49 <option value="fly">Drosophila melanogaster</option>
42 <option value="arabidopsis">Arabidopsis thaliana</option> 50 <option value="arabidopsis">Arabidopsis thaliana</option>
43 <option value="brugia ">Brugia malayi</option> 51 <option value="brugia ">Brugia malayi</option>
44 <option value="aedes">Aedes aegypti</option> 52 <option value="aedes">Aedes aegypti</option>
45 <option value="tribolium">Tribolium castaneum</option> 53 <option value="tribolium2012">Tribolium castaneum</option>
46 <option value="schistosoma">Schistosoma mansoni</option> 54 <option value="schistosoma">Schistosoma mansoni</option>
47 <option value="tetrahymena">Tetrahymena thermophila</option> 55 <option value="tetrahymena">Tetrahymena thermophila</option>
48 <option value="galdieria">Galdieria sulphuraria</option> 56 <option value="galdieria">Galdieria sulphuraria</option>
49 <option value="maize">Zea mays</option> 57 <option value="maize">Zea mays</option>
50 <option value="toxoplasma ">Toxoplasma gondii</option> 58 <option value="toxoplasma ">Toxoplasma gondii</option>
86 <option value="schizosaccharomyces_pombe">Schizosaccharomyces pombe</option> 94 <option value="schizosaccharomyces_pombe">Schizosaccharomyces pombe</option>
87 <option value="trichinella">Trichinella spiralis</option> 95 <option value="trichinella">Trichinella spiralis</option>
88 <option value="ustilago_maydis">Ustilago maydis</option> 96 <option value="ustilago_maydis">Ustilago maydis</option>
89 <option value="yarrowia_lipolytica">Yarrowia lipolytica</option> 97 <option value="yarrowia_lipolytica">Yarrowia lipolytica</option>
90 <option value="nasonia">Nasonia vitripennis</option> 98 <option value="nasonia">Nasonia vitripennis</option>
91 <option value="tomato ">Solanum lycopersicum</option> 99 <option value="tomato">Solanum lycopersicum</option>
92 <option value="chlamydomonas">Chlamydomonas reinhardtii</option> 100 <option value="chlamydomonas">Chlamydomonas reinhardtii</option>
93 <option value="amphimedon ">Amphimedon queenslandica</option> 101 <option value="amphimedon">Amphimedon queenslandica</option>
94 <option value="pneumocystis ">Pneumocystis jirovecii</option> 102 <option value="pneumocystis">Pneumocystis jirovecii</option>
103 <option value="chicken">Gallus gallus domesticus (chicken)</option>
104 <option value="cacao">Theobroma cacao (cacao)</option>
105 <option value="heliconius_melpomene1">Heliconius melpomene</option>
106 <option value="xenoturbella">Xenoturbella</option>
95 </param> 107 </param>
96 108
97 <param name="strand" type="select" multiple="false" format="text" help="Report predicted genes on both strands, just the forward or just the backward strand."> 109 <param name="strand" type="select" multiple="false" format="text" help="Report predicted genes on both strands, just the forward or just the backward strand.">
98 <option value="both">both</option> 110 <option value="both">both</option>
99 <option value="forward">forward</option> 111 <option value="forward">forward</option>
104 <option value="complete">complete</option> 116 <option value="complete">complete</option>
105 <option value="partial">partial</option> 117 <option value="partial">partial</option>
106 <option value="intronless">intronless</option> 118 <option value="intronless">intronless</option>
107 <option value="atleastone">atleastone</option> 119 <option value="atleastone">atleastone</option>
108 <option value="exactlyone">exactlyone</option> 120 <option value="exactlyone">exactlyone</option>
121 <option value="bacterium">bacterium (beta version)</option>
109 </param> 122 </param>
110 123
111 </inputs> 124 <param name="protein" type="boolean" label="Output predicted protein sequences (--protein)" truevalue="--protein=on" falsevalue="--protein=off" checked="true" />
112 <outputs> 125 <param name="codingseq" type="boolean" label="Output coding sequence as comment in the output file (codingseq)" truevalue="--codingseq=on" falsevalue="--codingseq=off" checked="true" />
126 <param name="introns" type="boolean" label="Output predicted intron sequences (--introns)" truevalue="--introns=on" falsevalue="--introns=off" checked="false" />
127 <param name="start" type="boolean" label="Output predicted start codons (--start)" truevalue="--start=on" falsevalue="--start=off" checked="false" />
128 <param name="stop" type="boolean" label="Output predicted stop codons (--stop)" truevalue="--stop=on" falsevalue="--stop=off" checked="false" />
129 <param name="cds" type="boolean" label="Output CDS region (--cds)" truevalue="--cds=on" falsevalue="--cds=off" checked="true" />
130 <param name="gff" type="boolean" label="GFF formated output, standard is GTF (--gff3)" truevalue="--gff3=on" falsevalue="--gff3=off" checked="false" />
131
132 </inputs>
133 <outputs>
113 <data format="gtf" name="output"> 134 <data format="gtf" name="output">
114 <change_format> 135 <change_format>
115 <when input="gff" value="--gff3=on" format="gff" /> 136 <when input="gff" value="--gff3=on" format="gff" />
116 </change_format> 137 </change_format>
117 </data> 138 </data>
139 <data format="fasta" name="protein_output">
140 <filter>protein == True</filter>
141 </data>
142 <data format="fasta" name="codingseq_output">
143 <filter>codingseq == True</filter>
144 </data>
118 </outputs> 145 </outputs>
146 <tests>
147 <test>
148 <param name="input_genome" value="human_augustus.fa" ftype="fasta" />
149 <param name="organism" value="human" />
150 <param name="utr" value="--UTR=on" />
151 <output name="output" file="human_augustus_utr-on.gtf" ftype="gtf" lines_diff="2"/>
152 </test>
153 <test>
154 <param name="input_genome" value="human_augustus.fa" ftype="fasta" />
155 <param name="organism" value="human" />
156 <param name="utr" value="--UTR=on" />
157 <param name="gff" value="--gff3=on" />
158 <output name="output" file="human_augustus_utr-on.gff" ftype="gff3" lines_diff="2"/>
159 </test>
160 <test>
161 <param name="input_genome" value="arabidopsis_augustus.fa" ftype="fasta" />
162 <param name="organism" value="arabidopsis" />
163 <param name="singlestrand" value="--singlestrand=true" />
164 <param name="mea" value="--mea=1" />
165 <output name="output" file="arabidopsis_augustus_utr-off_singlestrand-on_mea-on.gtf" ftype="gtf" lines_diff="2"/>
166 </test>
167 <test>
168 <param name="input_genome" value="human_augustus.fa" ftype="fasta" />
169 <param name="organism" value="human" />
170 <param name="protein" value="--protein=on" />
171 <param name="codingseq" value="--codingseq=on" />
172 <param name="introns" value="--introns=on" />
173 <param name="cds" value="--cds=on" />
174 <output name="output" file="human_augustus_protein_codingseq_introns_cds_main.gtf" ftype="gff" lines_diff="2"/>
175 <output name="codingseq_output" file="human_augustus_protein_codingseq_introns_cds_codingseq.fasta" ftype="fasta" />
176 <output name="protein_output" file="human_augustus_protein_codingseq_introns_cds_protein.fasta" ftype="fasta" />
177 </test>
178 </tests>
119 <help> 179 <help>
120 180
121 **What it does** 181 **What it does**
122 182
123 AUGUSTUS is a gene prediction program for eukaryotes written by Mario Stanke and Oliver Keller. 183 AUGUSTUS is a gene prediction program for eukaryotes written by Mario Stanke and Oliver Keller.
124 It can be used as an ab initio program, which means it bases its prediction purely on the 184 It can be used as an ab initio program, which means it bases its prediction purely on the
125 sequence. AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic sources 185 sequence. AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic sources
126 such as EST, MS/MS, protein alignments and synthenic genomic alignments. 186 such as EST, MS/MS, protein alignments and synthenic genomic alignments.
127 187
128 ----- 188 -----
129 189
130 **Parameters** 190 **Parameters**
131 191
132 Gene Model:: 192 Gene Model::
133 193
180 Mario Stanke and Stephan Waack (2003) 240 Mario Stanke and Stephan Waack (2003)
181 Gene Prediction with a Hidden-Markov Model and a new Intron Submodel. 241 Gene Prediction with a Hidden-Markov Model and a new Intron Submodel.
182 Bioinformatics, Vol. 19, Suppl. 2, pages ii215-ii225 242 Bioinformatics, Vol. 19, Suppl. 2, pages ii215-ii225
183 243
184 244
185 </help> 245 </help>
186 </tool> 246 </tool>