0
|
1 <tool id="glimmer_build-icm" name="Glimmer ICM builder" version="0.2">
|
|
2 <description></description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="3.02b">glimmer</requirement>
|
|
5 </requirements>
|
|
6 <command>
|
|
7 build-icm
|
|
8 --depth $depth
|
|
9 #if $no_stops:
|
|
10 --no_stops
|
|
11 #end if
|
|
12 --period $period
|
|
13 --width $width
|
|
14
|
|
15 #if $stop_codon_opts.stop_codon_opts_selector == "gb":
|
|
16 --trans_table "${stop_codon_opts.genbank_gencode}"
|
|
17 #else:
|
|
18 --stop_codons "${stop_codon_opts.stop_codons}"
|
|
19 #end if
|
|
20
|
|
21 $outfile < $infile 2>&1;
|
|
22 </command>
|
|
23 <inputs>
|
|
24 <param name="infile" type="data" format="fasta" label="Trainings Dataset" help="A set of known genes in FASTA format." />
|
|
25 <param name="depth" type="integer" value="7" label="Set the depth of the ICM" help="The depth is the maximum number of positions in the context window that will be used to determine the probability of the predicted position." />
|
|
26 <param name="period" type="integer" value="3" label="Set the period of the ICM" help="The period is the number of different submodels for different positions in the text in a cyclic pattern. E.g., if the period is 3, the first submodel will determine positions 1, 4, 7, ..." />
|
|
27 <param name="width" type="integer" value="12" label="Set the width of the ICM" help="The width includes the predicted position." />
|
|
28 <param name="no_stops" type="boolean" truevalue="--no_stops" falsevalue="" checked="false" label="Do not use any input strings with in-frame stop codons" />
|
|
29
|
|
30 <conditional name="stop_codon_opts">
|
|
31 <param name="stop_codon_opts_selector" type="select" label="Specify start codons as">
|
|
32 <option value="gb" selected="True">Genbank translation table entry</option>
|
|
33 <option value="free_form">Comma-separated list</option>
|
|
34 </param>
|
|
35 <when value="gb">
|
|
36 <param name="genbank_gencode" type="select" label="Use Genbank translation table to specify stop codons">
|
|
37 <option value="1" select="True">1. Standard</option>
|
|
38 <option value="2">2. Vertebrate Mitochondrial</option>
|
|
39 <option value="3">3. Yeast Mitochondrial</option>
|
|
40 <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
|
|
41 <option value="5">5. Invertebrate Mitochondrial</option>
|
|
42 <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
|
|
43 <option value="9">9. Echinoderm Mitochondrial</option>
|
|
44 <option value="10">10. Euplotid Nuclear</option>
|
|
45 <option value="11">11. Bacteria and Archaea</option>
|
|
46 <option value="12">12. Alternative Yeast Nuclear</option>
|
|
47 <option value="13">13. Ascidian Mitochondrial</option>
|
|
48 <option value="14">14. Flatworm Mitochondrial</option>
|
|
49 <option value="15">15. Blepharisma Macronuclear</option>
|
|
50 <option value="16">16. Chlorophycean Mitochondrial</option>
|
|
51 <option value="21">21. Trematode Mitochondrial</option>
|
|
52 <option value="22">22. Scenedesmus obliquus mitochondrial</option>
|
|
53 <option value="23">23. Thraustochytrium Mitochondrial</option>
|
|
54 <option value="24">24. Pterobranchia mitochondrial</option>
|
|
55 </param>
|
|
56 </when>
|
|
57 <when value="free_form">
|
|
58 <param name="stop_codons" type="text" value="tag,tga,taa" label="Specify stop codons as a comma-separated list" />
|
|
59 </when>
|
|
60 </conditional>
|
|
61 </inputs>
|
|
62 <outputs>
|
|
63 <data format="data" name="outfile" />
|
|
64 </outputs>
|
|
65 <tests>
|
|
66 <test>
|
|
67 <param name="infile" value='streptomyces_Tu6071_plasmid_genes.fasta' />
|
|
68 <param name="depth" value="7" />
|
|
69 <param name="period" value="3" />
|
|
70 <param name="width" value="12" />
|
|
71 <param name="no_stops" value="" />
|
|
72 <param name="genbank_gencode" value="11" />
|
|
73 <!-- compare files sizes, because the output is a binary -->
|
|
74 <output name="outfile" file='streptomyces_Tu6071_plasmid_genes.icm' compare="sim_size" delta="1000" ftype="data" />
|
|
75 </test>
|
|
76 </tests>
|
|
77
|
|
78 <help>
|
|
79
|
|
80 **What it does**
|
|
81
|
|
82 This program constructs an interpolated context model (ICM) from an input set of sequences.
|
|
83
|
|
84 This model can be used by Glimmer3 to predict genes.
|
|
85
|
|
86 **TIP** To extract CDS from a GenBank file use the tool *Extract ORF from a GenBank file*.
|
|
87
|
|
88 -----
|
|
89
|
|
90 **Example**
|
|
91
|
|
92 *Input*::
|
|
93
|
|
94 - Genome Sequence
|
|
95
|
|
96 >CELF22B7 C.aenorhabditis elegans (Bristol N2) cosmid F22B7
|
|
97 GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
|
|
98 GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
|
|
99 TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
|
|
100 TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
|
|
101 GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
|
|
102 ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
|
|
103 AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
|
|
104 CAATTCCTGATTTACGAACATCTTCTTCAAGCATTCTACAGATTTCTTGA
|
|
105 TGCTCTTCTAGGAGGATGTTGAAATCCGAAGTTGGAGAAAAAGTTCTCTC
|
|
106 AACTGAAATGCTTTTTCTTCGTGGATCCGATTCAGATGGACGACCTGGCA
|
|
107 GTCCGAGAGCCGTTCGAAGGAAAGATTCTTGTGAGAGAGGCGTGAAACAC
|
|
108 AAAGGGTATAGGTTCTTCTTCAGATTCATATCACCAACAGTTTGAATATC
|
|
109 CATTGCTTTCAGTTGAGCTTCGCATACACGACCAATTCCTCCAACCTAAA
|
|
110 AAATTATCTAGGTAAAACTAGAAGGTTATGCTTTAATAGTCTCACCTTAC
|
|
111 GAATCGGTAAATCCTTCAAAAACTCCATAATCGCGTTTTTATCATTTTCT
|
|
112 .....
|
|
113
|
|
114 *Output*::
|
|
115 interpolated context model (ICM)
|
|
116
|
|
117 -------
|
|
118
|
|
119 **References**
|
|
120
|
|
121 A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
|
|
122
|
|
123
|
|
124 </help>
|
|
125 </tool>
|