Mercurial > repos > galaxy-australia > kmc
diff kmc_tools_transform.xml @ 0:ca2743037241 draft
"planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/kmc commit 29b98036c21809c923a92feb38b736c007d2e303"
author | galaxy-australia |
---|---|
date | Tue, 27 Sep 2022 05:20:06 +0000 |
parents | |
children | c7fda6e88567 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmc_tools_transform.xml Tue Sep 27 05:20:06 2022 +0000 @@ -0,0 +1,320 @@ +<tool id="kmc_transform" name="KMC transform" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" > + <description>single KMC's database</description> + <xrefs> + <xref type='bio.tools'>kmc</xref> + </xrefs> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command><![CDATA[ + mkdir outdir && + #for $f in $input_collection: + #if '.kmc_suf' in str($f.element_identifier) + ln -s '${f}' db.kmc_suf && + #else + ln -s '${f}' db.kmc_pre && + #end if + #end for + kmc_tools + -t\${GALAXY_SLOTS:-2} + transform + db + #set $operation_arr=[] + #for $i,$opt in enumerate($operations) + #silent $operation_arr.append(str($opt.ops.transform_operation)) + #set $i = $i + 1 + #if str($opt.ops.transform_operation) == "sort" or str($opt.ops.transform_operation) == "reduce" or str($opt.ops.transform_operation) == "compact": + #if str($opt.ops.input_opt.min_kmer_occurrence) != '': + -ci'$opt.ops.input_opt.min_kmer_occurrence' + #end if + #if str($opt.ops.input_opt.max_kmer_occurrence) != '': + -cx'$opt.ops.input_opt.max_kmer_occurrence' + #end if + $opt.ops.transform_operation + '${opt.ops.transform_operation}_${i}_db' + #if str($opt.ops.output_opt.min_kmer_occurrence) != '': + -ci'$opt.ops.output_opt.min_kmer_occurrence' + #end if + #if str($opt.ops.output_opt.max_kmer_occurrence) != '': + -cx'$opt.ops.output_opt.max_kmer_occurrence' + #end if + #if str($opt.ops.output_opt.max_counter_value) != '': + -cs'$opt.ops.output_opt.max_counter_value' + #end if + #elif str($opt.ops.transform_operation) == "histogram": + #if str($opt.ops.input_opt.min_kmer_occurrence) != '': + -ci'$opt.ops.input_opt.min_kmer_occurrence' + #end if + #if str($opt.ops.input_opt.max_kmer_occurrence) != '': + -cx'$opt.ops.input_opt.max_kmer_occurrence' + #end if + $opt.ops.transform_operation + '${opt.ops.transform_operation}'.txt + #if str($opt.ops.output_opt.min_kmer_value) != '': + -ci'$opt.ops.output_opt.min_kmer_value' + #end if + #if str($opt.ops.output_opt.max_kmer_value) != '': + -cx'$opt.ops.output_opt.max_kmer_value' + #end if + #elif str($opt.ops.transform_operation) == "dump": + #if str($opt.ops.input_opt.min_kmer_occurrence) != '': + -ci'$opt.ops.input_opt.min_kmer_occurrence' + #end if + #if str($opt.ops.input_opt.max_kmer_occurrence) != '': + -cx'$opt.ops.input_opt.max_kmer_occurrence' + #end if + $opt.ops.transform_operation + #if str($opt.ops.output_opt.sort_output) == "true": + -s + #end if + '${opt.ops.transform_operation}'.txt + #elif str($opt.ops.transform_operation) == "set_counts": + #if str($opt.ops.input_opt.min_kmer_occurrence) != '': + -ci'$opt.ops.input_opt.min_kmer_occurrence' + #end if + #if str($opt.ops.input_opt.max_kmer_occurrence) != '': + -cx'$opt.ops.input_opt.max_kmer_occurrence' + #end if + $opt.ops.transform_operation + $opt.ops.input_opt.kmer_counts_value + '${opt.ops.transform_operation}_${i}_db' + #end if + #end for + + #for $i,$op in enumerate($operation_arr): + #set $i = $i + 1 + #if str($op) == "sort" or str($op) == "reduce" or str($op) == "compact" or str($op) == "set_counts": + && cp '${op}_${i}'_db.kmc_suf outdir/ + && cp '${op}_${i}'_db.kmc_pre outdir/ + #else if str($op) == "histogram": + && cp histogram.txt outdir/ + #else if str($op) == "dump": + && cp '${op}'.txt outdir/ + #end if + #end for + + + ]]></command> + <inputs> + <param name="input_collection" type="data_collection" collection_type="list" label="KMC db"/> + <repeat name="operations" title="Operations" min="1" max="5"> + <conditional name="ops"> + <expand macro="transform_operation"/> + <when value="sort"> + <expand macro="transform_option"/> + </when> + <when value="reduce"> + <expand macro="transform_option"/> + </when> + <when value="compact"> + <expand macro="transform_option"/> + </when> + <when value="histogram"> + <expand macro="histogram_option"/> + </when> + <when value="dump"> + <expand macro="dump_option"/> + </when> + <when value="set_counts"> + <expand macro="set_count_option"/> + </when> + </conditional> + </repeat> + </inputs> + <outputs> + <collection name="kmc_db" type="list" label="${tool.name} on ${on_string}: kmc outputs"> + <discover_datasets pattern="(?P<designation>.+)" format="binary" directory="outdir/" /> + <discover_datasets pattern="(?P<designation>.+)\.txt" format="tabular" directory="outdir/" /> + </collection> + </outputs> + + <tests> + <test> + <!-- #1 test perform sort operation on KMC DB --> + <param name="input_collection"> + <collection type="list"> + <element name="db.kmc_suf" value="db.kmc_suf"/> + <element name="db.kmc_pre" value="db.kmc_pre"/> + </collection> + </param> + <conditional name="ops"> + <param name="transform_operation" value="sort"/> + </conditional> + <section name="input_opt"> + <param name="min_kmer_occurrence" value="2"/> + <param name="max_kmer_occurrence" value="20"/> + </section> + <section name="output_opt"> + <param name="min_kmer_occurrence" value="3"/> + <param name="max_kmer_occurrence" value="30"/> + </section> + <output_collection name="kmc_db" type="list"> + <element name="sort_1_db.kmc_suf" file="sort_1_db.kmc_suf"/> + <element name="sort_1_db.kmc_suf" file="sort_1_db.kmc_suf"/> + </output_collection> + </test> + <test> + <!-- #2 test Generate Histogram Table --> + <param name="input_collection"> + <collection type="list"> + <element name="db.kmc_suf" value="db.kmc_suf"/> + <element name="db.kmc_pre" value="db.kmc_pre"/> + </collection> + </param> + <conditional name="ops"> + <param name="transform_operation" value="histogram"/> + </conditional> + <section name="input_opt"> + <param name="min_kmer_occurrence" value="3"/> + <param name="max_kmer_occurrence" value="30"/> + </section> + <section name="output_opt"> + <param name="min_kmer_occurrence" value="2"/> + <param name="max_kmer_occurrence" value="255"/> + </section> + <output_collection name="kmc_db" type="list"> + <element name="histogram" file="histogram.txt" ftype="tabular"/> + </output_collection> + </test> + <test> + <!-- #3 test Generate dump Table --> + <param name="input_collection"> + <collection type="list"> + <element name="db.kmc_suf" value="db.kmc_suf"/> + <element name="db.kmc_pre" value="db.kmc_pre"/> + </collection> + </param> + <conditional name="ops"> + <param name="transform_operation" value="dump"/> + </conditional> + <section name="input_opt"> + <param name="min_kmer_occurrence" value="3"/> + <param name="max_kmer_occurrence" value="30"/> + </section> + <section name="output_opt"> + <param name="min_kmer_occurrence" value="2"/> + <param name="max_kmer_occurrence" value="255"/> + </section> + <output_collection name="kmc_db" type="list"> + <element name="dump" file="dump.txt" ftype="tabular"/> + </output_collection> + </test> + <test> + <!-- #4 test perform reduce operation --> + <param name="input_collection"> + <collection type="list"> + <element name="db.kmc_suf" value="db.kmc_suf"/> + <element name="db.kmc_pre" value="db.kmc_pre"/> + </collection> + </param> + <repeat name="operations"> + <conditional name="ops"> + <param name="transform_operation" value="reduce"/> + </conditional> + <section name="input_opt"> + <param name="min_kmer_occurrence" value="2"/> + <param name="max_kmer_occurrence" value="20"/> + </section> + </repeat> + <repeat name="operations"> + <conditional name="ops"> + <param name="transform_operation" value="sort"/> + </conditional> + <section name="input_opt"> + <param name="min_kmer_occurrence" value="3"/> + <param name="max_kmer_occurrence" value="30"/> + </section> + </repeat> + <repeat name="operations"> + <conditional name="ops"> + <param name="transform_operation" value="compact"/> + </conditional> + <section name="input_opt"> + <param name="min_kmer_occurrence" value="4"/> + <param name="max_kmer_occurrence" value="40"/> + </section> + </repeat> + <output_collection name="kmc_db" type="list"> + <element name="reduce_1_db.kmc_suf" file="reduce_1_db.kmc_suf"/> + <element name="reduce_1_db.kmc_pre" file="reduce_1_db.kmc_pre"/> + <element name="sort_2_db.kmc_suf" file="sort_2_db.kmc_suf"/> + <element name="sort_2_db.kmc_pre" file="sort_2_db.kmc_pre"/> + <element name="compact_3_db.kmc_suf" file="compact_3_db.kmc_suf"/> + <element name="compact_3_db.kmc_pre" file="compact_3_db.kmc_pre"/> + </output_collection> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**What it does** + +*This operation transforms single KMC database to one or more KMC database(s) or text file(s).* + +**Input** + +- input file - path to databases generated by KMC (KMC generates 2 files with the same name, but different extensions (i.e kmc_suf and kmc_pre). + +**Available Operations:** + +- sort - converts database produced by KMC2.x to KMC1.x database format (which contains k-mers in sorted order) +- reduce - exclude too rare and too frequent k-mers +- compact - remove counters of k-mers +- histogram - produce histogram of k-mers occurrences +- dump - produce text dump of kmc database +- set_counts <value> - set all k-mer counts to specific value + +**Generate KMC DB** + +- kmc -k27 file.fastq kmers_db kmc_tmp_dir + +**Example 1: split k-mers on a valid and invalid database** + +*Let's suppose k-mers with occurences below 11 are erroneous due to sequencing erros. With reduce we can split k-mer set to one set with valid k-mers and one with invalid:* + +- kmc_tools transform kmers_db reduce reduce_1_db -cx10 reduce reduce_2_db -ci11 histogram histogram.txt dump dump.txt + +**Example 2: perform all operations** + +- kmc_tools transform kmers_db reduce -ci10 reduce_1_db sort sort_2_db compact compact_3_db histogram histogram.txt dump dump.txt + +**Output** + +*Example 1:* + +- reduce_1_db.kmc_suf +- reduce_1_db.kmc_pre +- reduce_2_db.kmc_suf +- reduce_2_db.kmc_pre +- histogram +- dump + +*Example 2:* + +- reduce_1_db.kmc_suf +- reduce_1_db.kmc_pre +- sort_2_db.kmc_suf +- sort_2_db.kmc_pre +- compact_3_db.kmc_suf +- compact_3_db.kmc_pre +- histogram +- dump + + + +.. class:: infomark + +**References** + +More information are available on `website <https://github.com/refresh-bio/KMC/blob/master/kmc_tools.pdf>`_. + ]]></help> + <citations> + <citation type="doi">DOI: 10.1093/bioinformatics/btx304</citation> + <citation type="doi">DOI: 10.1093/bioinformatics/btv022</citation> + <citation type="doi">DOI: 10.1186/1471-2105-14-160</citation> + </citations> +</tool>