comparison glimmer_build-icm.xml @ 0:841357e0acbf draft

Uploaded
author bgruening
date Sat, 06 Jul 2013 10:09:30 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:841357e0acbf
1 <tool id="glimmer_build-icm" name="Glimmer ICM builder" version="0.2">
2 <description></description>
3 <requirements>
4 <requirement type="package" version="3.02b">glimmer</requirement>
5 </requirements>
6 <command>
7 build-icm
8 --depth $depth
9 #if $no_stops:
10 --no_stops
11 #end if
12 --period $period
13 --width $width
14
15 #if $stop_codon_opts.stop_codon_opts_selector == "gb":
16 --trans_table "${stop_codon_opts.genbank_gencode}"
17 #else:
18 --stop_codons "${stop_codon_opts.stop_codons}"
19 #end if
20
21 $outfile &lt; $infile 2>&#38;1;
22 </command>
23 <inputs>
24 <param name="infile" type="data" format="fasta" label="Trainings Dataset" help="A set of known genes in FASTA format." />
25 <param name="depth" type="integer" value="7" label="Set the depth of the ICM" help="The depth is the maximum number of positions in the context window that will be used to determine the probability of the predicted position." />
26 <param name="period" type="integer" value="3" label="Set the period of the ICM" help="The period is the number of different submodels for different positions in the text in a cyclic pattern. E.g., if the period is 3, the first submodel will determine positions 1, 4, 7, ..." />
27 <param name="width" type="integer" value="12" label="Set the width of the ICM" help="The width includes the predicted position." />
28 <param name="no_stops" type="boolean" truevalue="--no_stops" falsevalue="" checked="false" label="Do not use any input strings with in-frame stop codons" />
29
30 <conditional name="stop_codon_opts">
31 <param name="stop_codon_opts_selector" type="select" label="Specify start codons as">
32 <option value="gb" selected="True">Genbank translation table entry</option>
33 <option value="free_form">Comma-separated list</option>
34 </param>
35 <when value="gb">
36 <param name="genbank_gencode" type="select" label="Use Genbank translation table to specify stop codons">
37 <option value="1" select="True">1. Standard</option>
38 <option value="2">2. Vertebrate Mitochondrial</option>
39 <option value="3">3. Yeast Mitochondrial</option>
40 <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
41 <option value="5">5. Invertebrate Mitochondrial</option>
42 <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
43 <option value="9">9. Echinoderm Mitochondrial</option>
44 <option value="10">10. Euplotid Nuclear</option>
45 <option value="11">11. Bacteria and Archaea</option>
46 <option value="12">12. Alternative Yeast Nuclear</option>
47 <option value="13">13. Ascidian Mitochondrial</option>
48 <option value="14">14. Flatworm Mitochondrial</option>
49 <option value="15">15. Blepharisma Macronuclear</option>
50 <option value="16">16. Chlorophycean Mitochondrial</option>
51 <option value="21">21. Trematode Mitochondrial</option>
52 <option value="22">22. Scenedesmus obliquus mitochondrial</option>
53 <option value="23">23. Thraustochytrium Mitochondrial</option>
54 <option value="24">24. Pterobranchia mitochondrial</option>
55 </param>
56 </when>
57 <when value="free_form">
58 <param name="stop_codons" type="text" value="tag,tga,taa" label="Specify stop codons as a comma-separated list" />
59 </when>
60 </conditional>
61 </inputs>
62 <outputs>
63 <data format="data" name="outfile" />
64 </outputs>
65 <tests>
66 <test>
67 <param name="infile" value='streptomyces_Tu6071_plasmid_genes.fasta' />
68 <param name="depth" value="7" />
69 <param name="period" value="3" />
70 <param name="width" value="12" />
71 <param name="no_stops" value="" />
72 <param name="genbank_gencode" value="11" />
73 <!-- compare files sizes, because the output is a binary -->
74 <output name="outfile" file='streptomyces_Tu6071_plasmid_genes.icm' compare="sim_size" delta="1000" ftype="data" />
75 </test>
76 </tests>
77
78 <help>
79
80 **What it does**
81
82 This program constructs an interpolated context model (ICM) from an input set of sequences.
83
84 This model can be used by Glimmer3 to predict genes.
85
86 **TIP** To extract CDS from a GenBank file use the tool *Extract ORF from a GenBank file*.
87
88 -----
89
90 **Example**
91
92 *Input*::
93
94 - Genome Sequence
95
96 >CELF22B7 C.aenorhabditis elegans (Bristol N2) cosmid F22B7
97 GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
98 GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
99 TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
100 TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
101 GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
102 ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
103 AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
104 CAATTCCTGATTTACGAACATCTTCTTCAAGCATTCTACAGATTTCTTGA
105 TGCTCTTCTAGGAGGATGTTGAAATCCGAAGTTGGAGAAAAAGTTCTCTC
106 AACTGAAATGCTTTTTCTTCGTGGATCCGATTCAGATGGACGACCTGGCA
107 GTCCGAGAGCCGTTCGAAGGAAAGATTCTTGTGAGAGAGGCGTGAAACAC
108 AAAGGGTATAGGTTCTTCTTCAGATTCATATCACCAACAGTTTGAATATC
109 CATTGCTTTCAGTTGAGCTTCGCATACACGACCAATTCCTCCAACCTAAA
110 AAATTATCTAGGTAAAACTAGAAGGTTATGCTTTAATAGTCTCACCTTAC
111 GAATCGGTAAATCCTTCAAAAACTCCATAATCGCGTTTTTATCATTTTCT
112 .....
113
114 *Output*::
115 interpolated context model (ICM)
116
117 -------
118
119 **References**
120
121 A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
122
123
124 </help>
125 </tool>