Mercurial > repos > iuc > instrain_compare
comparison instrain_compare.xml @ 0:dff92aac9f75 draft
"planemo upload for repository https://github.com/MrOlm/inStrain commit e6eae71231e551c08aa96afc9f15b8ba87676101"
author | iuc |
---|---|
date | Wed, 11 Aug 2021 21:11:53 +0000 |
parents | |
children | 92a7945118a9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dff92aac9f75 |
---|---|
1 <tool id="instrain_compare" name="InStrain Compare" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>Compares multiple inStrain profiles (popANI, coverage_overlap, etc.) </description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="edam_ontology"/> | |
7 <expand macro="requirements"/> | |
8 <version_command>inStrain compare --version</version_command> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 #if $stb | |
11 ln -s '$stb' 'stb_file.stb' && | |
12 #end if | |
13 #if $other.genome | |
14 ln -s '$other.genome' 'genome_file.stb' && | |
15 #end if | |
16 #for $i, $s in enumerate($input_is) | |
17 #if $s | |
18 input_count=$i | |
19 mkdir -p $i-input.IS && | |
20 unzip '$s' -d '$i-input.IS/' && | |
21 #end if | |
22 #end for | |
23 inStrain compare | |
24 --input | |
25 #for $i, $s in enumerate($input_is) | |
26 #if $s | |
27 '$i-input.IS' | |
28 #end if | |
29 #end for | |
30 --output 'output.IS.COMPARE' | |
31 --processes "\${GALAXY_SLOTS:-6}" | |
32 #if $stb | |
33 --stb 'stb_file.stb' | |
34 #end if | |
35 --min_cov $variant_calling.min_cov | |
36 --min_freq $variant_calling.min_freq | |
37 --fdr $variant_calling.fdr | |
38 $database.database_mode | |
39 --breadth $database.breadth | |
40 #if $other.scaffolds | |
41 --scaffolds '$other.scaffolds' | |
42 #end if | |
43 #if $other.genome | |
44 --genome 'genome_file.stb' | |
45 #end if | |
46 $other.store_coverage_overlap | |
47 $other.store_mismatch_locations | |
48 $other.include_self_comparisons | |
49 $other.skip_plot_generation | |
50 --group_length $other.group_length | |
51 --ani_threshold $genome_clustering.ani_threshold | |
52 --coverage_treshold $genome_clustering.coverage_treshold | |
53 --clusterAlg '$genome_clustering.clusterAlg' | |
54 ]]></command> | |
55 <inputs> | |
56 <param name="input_is" type="data" format="zip" multiple="true" label="inStrain Profile IS folder" help=" The Zip files for the IS profiles outputs you want to compare"/> | |
57 <param argument="--stb" type="data" format="tabular" optional="true" label="Scaffold to bin" help="This can be a file with each line listing a scaffold and a bin name, tab-seperated. This can also be a space-seperated list of .fasta files, with one genome per .fasta file. If nothing is provided, all scaffolds will be treated as belonging to the same genome"/> | |
58 <section name="variant_calling" title="Variant Calling Options" expanded="true"> | |
59 <param argument="--min_cov" type="integer" value="5" label=" Minimum coverage to call an variant"/> | |
60 <param argument="--min_freq" type="float" value="0.05" label="Minimum SNP frequency to confirm a SNV" help="Both this AND the FDR snp count cutoff must be true to call a SNP."/> | |
61 <param argument="--fdr" type="float" value="1e-06" min="0" max="1" help="SNP false discovery rate- based on simulation data with a 0.1 percent error rate (Q30)"/> | |
62 </section> | |
63 <section name="database" title="Database Mode Parameters" expanded="true"> | |
64 <param argument="--database_mode" type="boolean" truevalue="--debugdatabase_mode" falsevalue="" checked="false" label="Automatically determine which genomes are present in each Profile and only compare scaffolds from those genomes." help="All profiles must have run Profile with the same .stb"/> | |
65 <param argument="--breadth" type="float" value="0.5" label="Minimum breadth_minCov required to count a genome present"/> | |
66 </section> | |
67 <section name="other" title="Other Options" expanded="true"> | |
68 <param argument="--scaffolds" type="data" format="fasta" optional="true" label="Location to a list of scaffolds to compare. You can also make this a .fasta file and it will load the scaffold names"/> | |
69 <param argument="--genome" type="data" format="tabular" optional="true" label="Run scaffolds belonging to this single genome only. Must provide an .stb file"/> | |
70 <param argument="--store_coverage_overlap" type="boolean" truevalue="--store_coverage_overlap" falsevalue="" checked="false" label="Store coverage overlap on an mm level"/> | |
71 <param argument="--store_mismatch_locations" type="boolean" truevalue="--store_mismatch_locations" falsevalue="" checked="false" label="Store the locations of SNPs"/> | |
72 <param argument="--include_self_comparisons" type="boolean" truevalue="--include_self_comparisons" falsevalue="" checked="false" label="Compare IS profiles against themself"/> | |
73 <param argument="--skip_plot_generation" type="boolean" truevalue="--skip_plot_generation" falsevalue="" checked="false" label="Dont create plots at the end of the run"/> | |
74 <param argument="--group_length" type="integer" value="10000000" label="How many bp to compare simultaneously" help="higher will use more RAM and run more quickly"/> | |
75 </section> | |
76 <section name="genome_clustering" title="Genome Clustering Options" expanded="true"> | |
77 <param argument="--ani_threshold" type="float" value="0.99999" label="popANI threshold to cluster genomes at" help="Must provide .stb file to do so"/> | |
78 <param argument="--coverage_treshold" type="float" value="0.1" label="Minimum percent_genome_compared for a genome comparison to count" help="if below the popANI will be set to 0"/> | |
79 <param argument="--clusterAlg" type="select" label="Algorithm used to cluster genomes"> | |
80 <option value="average" selected="true">Average</option> | |
81 <option value="single">Single</option> | |
82 <option value="ward">Ward</option> | |
83 <option value="complete">complete</option> | |
84 <option value="centroid">centroid</option> | |
85 <option value="weighted">weighted</option> | |
86 <option value="median">median</option> | |
87 </param> | |
88 </section> | |
89 </inputs> | |
90 <outputs> | |
91 <data name="comparisonsTable" format="tabular" from_work_dir="output.IS.COMPARE/output/output.IS.COMPARE_comparisonsTable.tsv" label="Comparisons Table: Summarizes the differences between two inStrain profiles on a scaffold by scaffold level" /> | |
92 <data name="pairwise_SNP_locations" format="tabular" from_work_dir="output.IS.COMPARE/output/output.IS.COMPARE_pairwise_SNP_locations.tsv" label="Pairwise SNP locations: Lists the locations of all differences between profiles." /> | |
93 <data name="genomeWide_compare" format="tabular" from_work_dir="output.IS.COMPARE/output/output.IS.COMPARE_genomeWide_compare.tsv" label="Genome Wide compare: A genome-level summary of the differences detected by inStrain compare." /> | |
94 <data format="tabular" name="strain_clusters" from_work_dir="output.IS.COMPARE/output/output.IS.COMPARE_strain_clusters.tsv" label="Strain clusters: Generate strain-level clusters" /> | |
95 <data format="pdf" name="inStrainCompare_dendrograms" from_work_dir="output.IS.COMPARE/figures/output.IS.COMPARE_inStrainCompare_dendrograms.pdf" label="inStrain Compare dendrograms: genomeWide microdiveristy metrics" /> | |
96 </outputs> | |
97 <tests> | |
98 <test expect_num_outputs="5"> | |
99 <param name="stb" value="N5_271_010G1.maxbin2.stb"/> | |
100 <param name="input_is" value="N5_271_010G1_scaffold_min1000.fa-vs-N5_271_010G1.IS.zip,N5_271_010G1_scaffold_min1000.fa-vs-N5_271_010G2.IS.zip"/> | |
101 <section name="variant_calling"> | |
102 <param name="min_cov" value="5"/> | |
103 <param name="min_freq" value="0.05"/> | |
104 <param name="fdr" value="1e-06"/> | |
105 </section> | |
106 <section name="database"> | |
107 <param name="database_mode" value="false"/> | |
108 <param name="breadth" value="0.5"/> | |
109 </section> | |
110 <section name="other"> | |
111 <param name="store_coverage_overlap" value="false"/> | |
112 <param name="store_mismatch_locations" value="false"/> | |
113 <param name="include_self_comparisons" value="false"/> | |
114 <param name="skip_plot_generation" value="false"/> | |
115 <param name="group_length" value="10000000"/> | |
116 </section> | |
117 <section name="genome_clustering"> | |
118 <param name="ani_threshold" value="0.99999"/> | |
119 <param name="coverage_treshold" value="0.1"/> | |
120 <param name="clusterAlg" value="average"/> | |
121 </section> | |
122 <output name="comparisonsTable"> | |
123 <assert_contents> | |
124 <has_text text="N5_271_010G1_scaffold_73"/> | |
125 <has_n_lines n="168"/> | |
126 <has_n_columns n="11"/> | |
127 </assert_contents> | |
128 </output> | |
129 <output name="pairwise_SNP_locations"> | |
130 <assert_contents> | |
131 <has_n_lines n="0"/> | |
132 </assert_contents> | |
133 </output> | |
134 <output name="genomeWide_compare"> | |
135 <assert_contents> | |
136 <has_text text="name1"/> | |
137 <has_n_lines n="3"/> | |
138 <has_n_columns n="10"/> | |
139 </assert_contents> | |
140 </output> | |
141 <output name="strain_clusters"> | |
142 <assert_contents> | |
143 <has_text text="1_1"/> | |
144 <has_n_lines n="5"/> | |
145 <has_n_columns n="3"/> | |
146 </assert_contents> | |
147 </output> | |
148 <output name="inStrainCompare_dendrograms"> | |
149 <assert_contents> | |
150 <has_size value="384512" delta="10000" /> | |
151 </assert_contents> | |
152 </output> | |
153 </test> | |
154 </tests> | |
155 <help><![CDATA[ | |
156 @HELP_HEADER@ | |
157 | |
158 Compare | |
159 ======= | |
160 | |
161 is part of the inStrain module that provides the ability to compare multiple inStrain profiles (created by running inStrain profile). | |
162 | |
163 Note | |
164 ==== | |
165 | |
166 inStrain can only compare inStrain profiles that have been mapped to the same .fasta file | |
167 | |
168 inStrain compare does pairwise comparisons between each input inStrain profile. For each pair, a series of steps are undertaken: | |
169 | |
170 1. All positions in which both IS_profile objects have at least min_cov coverage (5x by default) are identified. This information can be stored in the output by using the flag –store_coverage_overlap, but due to it’s size, it’s not stored by default. | |
171 | |
172 | |
173 2. Each position identified in step 1 is compared to calculate both conANI and popANI. The way that it compares positions is by testing whether the consensus base in sample 1 is detected at all in sample 2 and vice-versa. Detection of an allele in a sample is based on that allele being above the set -min_freq and -fdr. All detected differences between each pair of samples can be reported if the flag –store_mismatch_locations is set. | |
174 | |
175 | |
176 3. The coverage overlap and the average nucleotide identity for each scaffold is reported. For details on how this is done. | |
177 | |
178 | |
179 Inputs | |
180 ====== | |
181 | |
182 Multiple inStrain profiles IS outputs (zip files), all mapped to the same .fasta file | |
183 | |
184 | |
185 Outputs | |
186 ======= | |
187 | |
188 1. comparisonsTable.tsv | |
189 | |
190 Summarizes the differences between two inStrain profiles on a scaffold by scaffold level | |
191 | |
192 2. pairwise_SNP_locations.tsv | |
193 | |
194 Lists the locations of all differences between profiles. Because it’s a big file, this will only be created is you include the flag --store_mismatch_locations in your inStrain compare command. | |
195 | |
196 3. genomeWide_compare.tsv | |
197 | |
198 A genome-level summary of the differences detected by inStrain compare. Generated by running inStrain genome_wide on the results of inStrain compare | |
199 | |
200 4. strain_clusters.tsv | |
201 | |
202 The result of clustering the pairwise comparison data provided in genomeWide_compare.tsv to generate strain-level clusters. Performed using hierarchical clustering in the same manner as the program dRep | |
203 | |
204 5. Compare dendrograms (PDF) figure/plot | |
205 | |
206 A dendrogram comparing all samples based on popANI and based on shared_bases. | |
207 | |
208 ]]></help> | |
209 <citations> | |
210 <citation type="doi">10.1101/2020.01.22.915579</citation> | |
211 </citations> | |
212 </tool> |