comparison augustus.xml @ 0:af307d3285c5 draft

Uploaded
author bgruening
date Sat, 06 Jul 2013 10:07:41 -0400
parents
children e2b822d7408f
comparison
equal deleted inserted replaced
-1:000000000000 0:af307d3285c5
1 <tool id="augustus" name="Augustus" version="0.3">
2 <description>gene prediction for eukaryotic genomes</description>
3 <requirements>
4 <requirement type="package" version="2.7">augustus</requirement>
5 <requirement type="set_environment">AUGUSTUS_SCRIPT_PATH</requirement>
6 </requirements>
7 <command>
8 ## please set export AUGUSTUS_CONFIG_PATH=/path_to_augustus/augustus/config
9 ## or use the --AUGUSTUS_CONFIG_PATH=path if you are not installing through the toolshed
10 ## Augustus writes the protein and coding sequences as comment into the gff/gtf file an external script is used to extract the sequences into additional files
11
12 augustus
13 --strand=$strand
14 $noInFrameStop
15 $gff
16 $protein
17 $introns
18 $start
19 $stop
20 $cds
21 $codingseq
22 $singlestrand
23 $input_genome
24 $mea
25 $utr
26 --genemodel=$genemodel
27 --species=$organism
28 ##--outfile=$output
29 | tee $output
30 #if $protein or $codingseq:
31 | python \$AUGUSTUS_SCRIPT_PATH/extract_features.py
32 #if $protein:
33 --protein $protein_output
34 #end if
35 #if $codingseq:
36 --codingseq $codingseq_output
37 #end if
38 #end if
39 </command>
40 <inputs>
41 <param name="input_genome" type="data" format="fasta" label="Genome Sequence"/>
42 <param name="noInFrameStop" type="boolean" label="Don't report transcripts with in-frame stop codons (--noInFrameStop)" truevalue="--noInFrameStop=true" falsevalue="--noInFrameStop=false" checked="false" help="Otherwise, intron-spanning stop codons could occur" />
43 <param name="singlestrand" type="boolean" label="Predict genes independently on each strand, allow overlapping genes on opposite strands (--singlestrand)" truevalue="--singlestrand=true" falsevalue="--singlestrand=false" checked="false" />
44 <param name="mea" type="boolean" label="Using the maximum expected accuracy approach (--mea)" truevalue="--mea=1" falsevalue="" checked="false" help="MEA is an alternative decoding approach." />
45 <param name="utr" type="boolean" label="Predict the untranslated regions in addition to the coding sequence (--UTR)" truevalue="--UTR=on" falsevalue="--UTR=off" checked="false" help="This currently works only for human, galdieria, toxoplasma and caenorhabditis." />
46
47 <param name="organism" label="Model Organism" type="select" multiple="false" format="text" help="Choose a specialised trainingset.">
48 <option value="human">Homo sapiens</option>
49 <option value="fly">Drosophila melanogaster</option>
50 <option value="arabidopsis">Arabidopsis thaliana</option>
51 <option value="brugia ">Brugia malayi</option>
52 <option value="aedes">Aedes aegypti</option>
53 <option value="tribolium2012">Tribolium castaneum</option>
54 <option value="schistosoma">Schistosoma mansoni</option>
55 <option value="tetrahymena">Tetrahymena thermophila</option>
56 <option value="galdieria">Galdieria sulphuraria</option>
57 <option value="maize">Zea mays</option>
58 <option value="toxoplasma ">Toxoplasma gondii</option>
59 <option value="caenorhabditis ">Caenorhabditis elegans</option>
60 <option value="aspergillus_fumigatus">Aspergillus fumigatus</option>
61 <option value="aspergillus_nidulans ">Aspergillus nidulans</option>
62 <option value="aspergillus_oryzae ">Aspergillus oryzae</option>
63 <option value="aspergillus_terreus">Aspergillus terreus</option>
64 <option value="botrytis_cinerea ">Botrytis cinerea</option>
65 <option value="candida_albicans ">Candida albicans</option>
66 <option value="candida_guilliermondii ">Candida guilliermondii</option>
67 <option value="candida_tropicalis ">Candida tropicalis</option>
68 <option value="chaetomium_globosum">Chaetomium globosum</option>
69 <option value="coccidioides_immitis ">Coccidioides immitis</option>
70 <option value="coprinus ">Coprinus cinereus</option>
71 <option value="coprinus_cinereus">Coprinus cinereus</option>
72 <option value="cryptococcus_neoformans_gattii ">Cryptococcus neoformans gattii</option>
73 <option value="cryptococcus_neoformans_neoformans_B ">Cryptococcus neoformans neoformans</option>
74 <option value="cryptococcus_neoformans_neoformans_JEC21 ">Cryptococcus neoformans neoformans</option>
75 <option value="debaryomyces_hansenii">Debaryomyces hansenii</option>
76 <option value="encephalitozoon_cuniculi_GB">Encephalitozoon cuniculi</option>
77 <option value="eremothecium_gossypii">Eremothecium gossypii</option>
78 <option value="fusarium_graminearum ">Fusarium graminearum</option>
79 <option value="histoplasma_capsulatum ">Histoplasma capsulatum</option>
80 <option value="(histoplasma)">Histoplasma capsulatum</option>
81 <option value="kluyveromyces_lactis ">Kluyveromyces lactis</option>
82 <option value="laccaria_bicolor ">Laccaria bicolor</option>
83 <option value="lamprey">Petromyzon marinus</option>
84 <option value="leishmania_tarentolae">Leishmania tarentolae</option>
85 <option value="lodderomyces_elongisporus">Lodderomyces elongisporus</option>
86 <option value="magnaporthe_grisea ">Magnaporthe grisea</option>
87 <option value="neurospora_crassa">Neurospora crassa</option>
88 <option value="phanerochaete_chrysosporium">Phanerochaete chrysosporium</option>
89 <option value="pichia_stipitis">Pichia stipitis</option>
90 <option value="rhizopus_oryzae">Rhizopus oryzae</option>
91 <option value="saccharomyces_cerevisiae_S288C ">Saccharomyces cerevisiae</option>
92 <option value="saccharomyces_cerevisiae_rm11-1a_1 ">Saccharomyces cerevisiae</option>
93 <option value="(saccharomyces)">Saccharomyces cerevisiae</option>
94 <option value="schizosaccharomyces_pombe">Schizosaccharomyces pombe</option>
95 <option value="trichinella">Trichinella spiralis</option>
96 <option value="ustilago_maydis">Ustilago maydis</option>
97 <option value="yarrowia_lipolytica">Yarrowia lipolytica</option>
98 <option value="nasonia">Nasonia vitripennis</option>
99 <option value="tomato">Solanum lycopersicum</option>
100 <option value="chlamydomonas">Chlamydomonas reinhardtii</option>
101 <option value="amphimedon">Amphimedon queenslandica</option>
102 <option value="pneumocystis">Pneumocystis jirovecii</option>
103 <option value="chicken">Gallus gallus domesticus (chicken)</option>
104 <option value="cacao">Theobroma cacao (cacao)</option>
105 <option value="heliconius_melpomene1">Heliconius melpomene</option>
106 <option value="xenoturbella">Xenoturbella</option>
107 </param>
108
109 <param name="strand" type="select" multiple="false" format="text" help="Report predicted genes on both strands, just the forward or just the backward strand.">
110 <option value="both">both</option>
111 <option value="forward">forward</option>
112 <option value="backward">backward</option>
113 </param>
114
115 <param name="genemodel" label="Gene Model" type="select" multiple="false" format="text" help="Gene Model to predict, for more information please refere to the help.">
116 <option value="complete">complete</option>
117 <option value="partial">partial</option>
118 <option value="intronless">intronless</option>
119 <option value="atleastone">atleastone</option>
120 <option value="exactlyone">exactlyone</option>
121 <option value="bacterium">bacterium (beta version)</option>
122 </param>
123
124 <param name="protein" type="boolean" label="Output predicted protein sequences (--protein)" truevalue="--protein=on" falsevalue="--protein=off" checked="true" />
125 <param name="codingseq" type="boolean" label="Output coding sequence as comment in the output file (codingseq)" truevalue="--codingseq=on" falsevalue="--codingseq=off" checked="true" />
126 <param name="introns" type="boolean" label="Output predicted intron sequences (--introns)" truevalue="--introns=on" falsevalue="--introns=off" checked="false" />
127 <param name="start" type="boolean" label="Output predicted start codons (--start)" truevalue="--start=on" falsevalue="--start=off" checked="false" />
128 <param name="stop" type="boolean" label="Output predicted stop codons (--stop)" truevalue="--stop=on" falsevalue="--stop=off" checked="false" />
129 <param name="cds" type="boolean" label="Output CDS region (--cds)" truevalue="--cds=on" falsevalue="--cds=off" checked="true" />
130 <param name="gff" type="boolean" label="GFF formated output, standard is GTF (--gff3)" truevalue="--gff3=on" falsevalue="--gff3=off" checked="false" />
131
132 </inputs>
133 <outputs>
134 <data format="gtf" name="output" label="${tool.name} on ${on_string}: GTF/GFF">
135 <change_format>
136 <when input="gff" value="--gff3=on" format="gff" />
137 </change_format>
138 </data>
139 <data format="fasta" name="protein_output" label="${tool.name} on ${on_string}: Protein sequence">
140 <filter>protein == True</filter>
141 </data>
142 <data format="fasta" name="codingseq_output" label="${tool.name} on ${on_string}: Coding sequence">
143 <filter>codingseq == True</filter>
144 </data>
145 </outputs>
146 <tests>
147 <test>
148 <param name="input_genome" value="human_augustus.fa" ftype="fasta" />
149 <param name="organism" value="human" />
150 <param name="utr" value="--UTR=on" />
151 <output name="output" file="human_augustus_utr-on.gtf" ftype="gtf" lines_diff="2"/>
152 </test>
153 <test>
154 <param name="input_genome" value="human_augustus.fa" ftype="fasta" />
155 <param name="organism" value="human" />
156 <param name="utr" value="--UTR=on" />
157 <param name="gff" value="--gff3=on" />
158 <output name="output" file="human_augustus_utr-on.gff" ftype="gff3" lines_diff="2"/>
159 </test>
160 <test>
161 <param name="input_genome" value="arabidopsis_augustus.fa" ftype="fasta" />
162 <param name="organism" value="arabidopsis" />
163 <param name="singlestrand" value="--singlestrand=true" />
164 <param name="mea" value="--mea=1" />
165 <output name="output" file="arabidopsis_augustus_utr-off_singlestrand-on_mea-on.gtf" ftype="gtf" lines_diff="2"/>
166 </test>
167 <test>
168 <param name="input_genome" value="human_augustus.fa" ftype="fasta" />
169 <param name="organism" value="human" />
170 <param name="protein" value="--protein=on" />
171 <param name="codingseq" value="--codingseq=on" />
172 <param name="introns" value="--introns=on" />
173 <param name="cds" value="--cds=on" />
174 <output name="output" file="human_augustus_protein_codingseq_introns_cds_main.gtf" ftype="gff" lines_diff="2"/>
175 <output name="codingseq_output" file="human_augustus_protein_codingseq_introns_cds_codingseq.fasta" ftype="fasta" />
176 <output name="protein_output" file="human_augustus_protein_codingseq_introns_cds_protein.fasta" ftype="fasta" />
177 </test>
178 </tests>
179 <help>
180
181 **What it does**
182
183 AUGUSTUS is a gene prediction program for eukaryotes written by Mario Stanke and Oliver Keller.
184 It can be used as an ab initio program, which means it bases its prediction purely on the
185 sequence. AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic sources
186 such as EST, MS/MS, protein alignments and synthenic genomic alignments.
187
188 -----
189
190 **Parameters**
191
192 Gene Model::
193
194 partial : allow prediction of incomplete genes at the sequence boundaries (default)
195 intronless : only predict single-exon genes like in prokaryotes and some eukaryotes
196 complete : only predict complete genes
197 atleastone : predict at least one complete gene
198 exactlyone : predict exactly one complete gene
199
200
201
202 **Example**
203
204 Suppose you have the following DNA formatted sequences::
205
206 >Seq1
207 cccgcggagcgggtaccacatcgctgcgcgatgtgcgagcgaacacccgggctgcgcccg
208 ggtgttgcgctcccgctccgcgggagcgctggcgggacgctgcgcgtcccgctcaccaag
209 cccgcttcgcgggcttggtgacgctccgtccgctgcgcttccggagttgcggggcttcgc
210 cccgctaaccctgggcctcgcttcgctccgccttgggcctgcggcgggtccgctgcgctc
211 ccccgcctcaagggcccttccggctgcgcctccaggacccaaccgcttgcgcgggcctgg
212
213 Running this tool will produce this::
214
215 # ----- prediction on sequence number 1 (length = 1992969, name = scaffold1|size1992969) -----
216 #
217 # Constraints/Hints:
218 # (none)
219 # Predicted genes for sequence number 1 on both strands
220 # start gene g1
221 scaffold1|size1992969 AUGUSTUS gene 17453 19382 0.11 + . g6
222 scaffold1|size1992969 AUGUSTUS transcript 17453 19382 0.11 + . g6.t1
223 scaffold1|size1992969 AUGUSTUS start_codon 17453 17455 . + 0 transcript_id "g6.t1"; gene_id "g6";
224 scaffold1|size1992969 AUGUSTUS intron 17615 17660 0.38 + . transcript_id "g6.t1"; gene_id "g6";
225 scaffold1|size1992969 AUGUSTUS intron 17708 17772 0.54 + . transcript_id "g6.t1"; gene_id "g6";
226 scaffold1|size1992969 AUGUSTUS intron 17902 18035 0.58 + . transcript_id "g6.t1"; gene_id "g6";
227 scaffold1|size1992969 AUGUSTUS intron 18313 18367 0.99 + . transcript_id "g6.t1"; gene_id "g6";
228 scaffold1|size1992969 AUGUSTUS intron 19014 19080 0.44 + . transcript_id "g6.t1"; gene_id "g6";
229 scaffold1|size1992969 AUGUSTUS CDS 17453 17614 0.55 + 0 transcript_id "g6.t1"; gene_id "g6";
230 scaffold1|size1992969 AUGUSTUS CDS 17661 17707 0.38 + 0 transcript_id "g6.t1"; gene_id "g6";
231 scaffold1|size1992969 AUGUSTUS CDS 17773 17901 0.54 + 1 transcript_id "g6.t1"; gene_id "g6";
232 scaffold1|size1992969 AUGUSTUS CDS 18036 18312 0.52 + 1 transcript_id "g6.t1"; gene_id "g6";
233 scaffold1|size1992969 AUGUSTUS CDS 18368 19013 0.99 + 0 transcript_id "g6.t1"; gene_id "g6";
234 scaffold1|size1992969 AUGUSTUS CDS 19081 19379 0.31 + 2 transcript_id "g6.t1"; gene_id "g6";
235 scaffold1|size1992969 AUGUSTUS stop_codon 19380 19382 . + 0 transcript_id "g6.t1"; gene_id "g6";
236
237
238 **References**
239
240 Mario Stanke and Stephan Waack (2003)
241 Gene Prediction with a Hidden-Markov Model and a new Intron Submodel.
242 Bioinformatics, Vol. 19, Suppl. 2, pages ii215-ii225
243
244
245 </help>
246 </tool>