Mercurial > repos > iuc > mothur_shhh_flows
comparison shhh.flows.xml @ 2:cb0fe730391d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit 3418f23b9768f5aafb86488f5ec1cb97530d4fb3
author | iuc |
---|---|
date | Tue, 20 Mar 2018 22:21:51 -0400 |
parents | 4f5213348132 |
children | 7cca6773bc2a |
comparison
equal
deleted
inserted
replaced
1:aebe4d85e503 | 2:cb0fe730391d |
---|---|
5 </macros> | 5 </macros> |
6 <expand macro="requirements"/> | 6 <expand macro="requirements"/> |
7 <expand macro="stdio"/> | 7 <expand macro="stdio"/> |
8 <expand macro="version_command"/> | 8 <expand macro="version_command"/> |
9 <command><![CDATA[ | 9 <command><![CDATA[ |
10 @SHELL_OPTIONS@ | 10 @SHELL_OPTIONS@ |
11 | 11 |
12 ## create symlinks to input datasets | 12 ## create symlinks to input datasets |
13 ln -s '$flow' flow.dat && | 13 ln -s '$flow' flow.dat && |
14 ln -s '$prob.lookup' lookup.dat && | 14 ln -s '$prob.lookup' lookup.dat && |
15 | 15 |
16 ## Mothur can't handle scientific notation (i.e. 1e-6) | 16 ## Mothur can't handle scientific notation (i.e. 1e-6) |
17 #set mindelta_decimal = "{:.12f}".format(float($mindelta)) | 17 #set mindelta_decimal = "{:.12f}".format(float($mindelta)) |
18 echo 'shhh.flows( | 18 echo 'shhh.flows( |
19 flow=flow.dat, | 19 flow=flow.dat, |
20 lookup=lookup.dat, | 20 lookup=lookup.dat, |
21 maxiter=$maxiter, | 21 maxiter=$maxiter, |
22 mindelta=$mindelta_decimal, | 22 mindelta=$mindelta_decimal, |
23 cutoff=$cutoff, | 23 cutoff=$cutoff, |
24 sigma=$sigma, | 24 sigma=$sigma, |
25 order=$order, | 25 order=$order, |
26 large=$large, | 26 large=$large, |
27 processors='\${GALAXY_SLOTS:-8}' | 27 processors='\${GALAXY_SLOTS:-8}' |
28 )' | 28 )' |
29 | sed 's/ //g' ## mothur trips over whitespace | 29 | sed 's/ //g' ## mothur trips over whitespace |
30 | mothur | 30 | mothur |
31 | tee mothur.out.log | 31 | tee mothur.out.log |
32 ]]></command> | 32 ]]></command> |
33 <inputs> | 33 <inputs> |
34 <param name="flow" type="data" format="mothur.sff.flow" label="flow - flowgram data" help="Use sffinfo to generate flow data from an sff file and usually trimmed by trim.flows"/> | 34 <param name="flow" type="data" format="mothur.sff.flow" label="flow - flowgram data" help="Use sffinfo to generate flow data from an sff file and usually trimmed by trim.flows"/> |
35 <conditional name="prob"> | 35 <conditional name="prob"> |
36 <param name="source" type="select" label="Select Taxonomy from" help=""> | 36 <param name="source" type="select" label="Select Taxonomy from" help=""> |
50 <param name="mindelta" type="float" value="0.000001" min="0.0" max="0.1" label="mindelta - threshold for determining how much change in the flowgram correction is allowed" help="default .000001 (10^-6)"/> | 50 <param name="mindelta" type="float" value="0.000001" min="0.0" max="0.1" label="mindelta - threshold for determining how much change in the flowgram correction is allowed" help="default .000001 (10^-6)"/> |
51 <param name="cutoff" type="float" value="0.01" min="0.0" max="1.0" label="cutoff - seed the expectation-maximizaton step" help="default .01 (usually doesn't need to be changed)"/> | 51 <param name="cutoff" type="float" value="0.01" min="0.0" max="1.0" label="cutoff - seed the expectation-maximizaton step" help="default .01 (usually doesn't need to be changed)"/> |
52 <param name="sigma" type="float" value="0.06" min="0.0" max="1.0" label="sigma - the dispersion of the data in the expectation-maximization step of the algorithm" help="default .06 (usually doesn't need to be changed)"/> | 52 <param name="sigma" type="float" value="0.06" min="0.0" max="1.0" label="sigma - the dispersion of the data in the expectation-maximization step of the algorithm" help="default .06 (usually doesn't need to be changed)"/> |
53 <param name="large" type="integer" value="10000" min="1" label="large - split your flow file and process the pieces separately (default 10000)" help=""/> | 53 <param name="large" type="integer" value="10000" min="1" label="large - split your flow file and process the pieces separately (default 10000)" help=""/> |
54 <param name="order" type="text" value="A" label="order - flow order for nucleotides in the sequencer" help="default is A, was TACG. Also accepts B or I"/> | 54 <param name="order" type="text" value="A" label="order - flow order for nucleotides in the sequencer" help="default is A, was TACG. Also accepts B or I"/> |
55 <expand macro="param-savelog"/> | |
55 </inputs> | 56 </inputs> |
56 <outputs> | 57 <outputs> |
57 <expand macro="logfile-output"/> | 58 <expand macro="logfile-output"/> |
58 <data name="shhh_fasta" format="fasta" from_work_dir="flow*.fasta" label="${tool.name} on ${on_string}: shhh.fasta"/> | 59 <data name="shhh_fasta" format="fasta" from_work_dir="flow*.fasta" label="${tool.name} on ${on_string}: shhh.fasta"/> |
59 <data name="shhh_qual" format="qual454" from_work_dir="flow*.qual" label="${tool.name} on ${on_string}: shhh.qual"/> | 60 <data name="shhh_qual" format="qual454" from_work_dir="flow*.qual" label="${tool.name} on ${on_string}: shhh.qual"/> |
69 <output name="shhh_fasta" md5="9e7ba8213d039ba2360648c79f6380da" ftype="fasta"/> | 70 <output name="shhh_fasta" md5="9e7ba8213d039ba2360648c79f6380da" ftype="fasta"/> |
70 <output name="shhh_qual" md5="3ff0e8f575adc95ced285cc1704097ef" ftype="qual454"/> | 71 <output name="shhh_qual" md5="3ff0e8f575adc95ced285cc1704097ef" ftype="qual454"/> |
71 <output name="shhh_names" md5="71c8fd857b547f237e4215306762ab3d" ftype="mothur.names"/> | 72 <output name="shhh_names" md5="71c8fd857b547f237e4215306762ab3d" ftype="mothur.names"/> |
72 <output name="shhh_groups" md5="75a5293a71beeafe9f330e6f497ae350" ftype="mothur.groups"/> | 73 <output name="shhh_groups" md5="75a5293a71beeafe9f330e6f497ae350" ftype="mothur.groups"/> |
73 <output name="shhh_counts" md5="2257f2079668a0f992a14e4576063cf0" ftype="tabular"/> | 74 <output name="shhh_counts" md5="2257f2079668a0f992a14e4576063cf0" ftype="tabular"/> |
75 <param name="savelog" value="true"/> | |
74 <expand macro="logfile-test"/> | 76 <expand macro="logfile-test"/> |
75 </test> | 77 </test> |
76 </tests> | 78 </tests> |
77 <help> | 79 <help><![CDATA[ |
78 <![CDATA[ | |
79 | 80 |
80 @MOTHUR_OVERVIEW@ | 81 @MOTHUR_OVERVIEW@ |
81 | 82 |
82 **Command Documentation** | 83 **Command Documentation** |
83 | 84 |
84 The shhh.flows_ command is Pat Schloss's translation of Chris Quince's PyroNoise algorithm [1] from C to C++ with the incorporation of mothur's bells and whistles. Based on processing of test datasets provided by Quince, shhh.flows gives the same/similar output to AmpliconNoise. shhh.flows uses a expectation-maximization algorithm to correct flowgrams to identify the idealized form of each flowgram and translate that flowgram to a DNA sequence. Our testing has shown that when Titanium data are trimmed to 450 flows using trim.flows, shhh.flows provides the highest quality data for any other method available. In contrast, when we use the min/max number of flows suggested by Quince of 360/720, the error rate is not that great. This much improved error rate does come at a computational cost. Whereas the features in trim.seqs take on the order of minutes, shhh.flows can take on the order of hours. You will also need a lookup file that tells shhh.flows the probability of observing an intensity value for a given homopolymer length. You can get mothur-compatible files at: https://www.mothur.org/wiki/Lookup_files | 85 The shhh.flows_ command is Pat Schloss's translation of Chris Quince's PyroNoise algorithm [1] from C to C++ with the incorporation of mothur's bells and whistles. Based on processing of test datasets provided by Quince, shhh.flows gives the same/similar output to AmpliconNoise. shhh.flows uses a expectation-maximization algorithm to correct flowgrams to identify the idealized form of each flowgram and translate that flowgram to a DNA sequence. Our testing has shown that when Titanium data are trimmed to 450 flows using trim.flows, shhh.flows provides the highest quality data for any other method available. In contrast, when we use the min/max number of flows suggested by Quince of 360/720, the error rate is not that great. This much improved error rate does come at a computational cost. Whereas the features in trim.seqs take on the order of minutes, shhh.flows can take on the order of hours. You will also need a lookup file that tells shhh.flows the probability of observing an intensity value for a given homopolymer length. You can get mothur-compatible files at: https://www.mothur.org/wiki/Lookup_files |
85 | 86 |
86 .. _shhh.flows: https://www.mothur.org/wiki/Shhh.flows | 87 .. _shhh.flows: https://www.mothur.org/wiki/Shhh.flows |
87 | 88 |
88 ]]> | 89 ]]></help> |
89 </help> | |
90 <expand macro="citations"/> | 90 <expand macro="citations"/> |
91 </tool> | 91 </tool> |