changeset 1:8f32dcfde5d9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit 344ee746d6e93c583331c0f74d234ab200a8ce43
author iuc
date Fri, 26 Sep 2025 14:27:48 +0000
parents 3b275e1a1581
children e0c0fda064eb
files mash_paste.xml mash_sketch.xml
diffstat 2 files changed, 36 insertions(+), 195 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mash_paste.xml	Fri Sep 26 14:27:48 2025 +0000
@@ -0,0 +1,36 @@
+<tool id="mash_paste" name="mash paste" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+    <description>Create a single sketch file from multiple sketch files.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xref"/>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+		#set $list_inputs = []
+		#for $file in $msh_files
+			ln -s $file "${file.element_identifier}.${file.ext}" &&
+			$list_inputs.append("%s.%s" % ($file.element_identifier, $file.ext))
+		#end for
+
+		#set list_files=' '.join([ '\'%s\'' % $input for $input in $list_inputs])
+        mash paste result.msh $list_files &&
+		mv result.msh $output
+            ]]></command>
+    <inputs>
+        <param name="msh_files" type="data" format='msh' multiple="true" label="Sketch files to merge"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="msh" label="${tool.name} on ${on_string}: Output Sketch File"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="msh_files" value="test_01_mash_sketch.msh,test_02_mash_sketch.msh"/>
+			<output name="output" file="res_paste.msh" compare="sim_size" />
+        </test>
+    </tests>
+    <help><![CDATA[
+	Create a single sketch file from multiple sketch files.
+  ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
--- a/mash_sketch.xml	Wed Sep 24 13:47:21 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,195 +0,0 @@
-<tool id="mash_sketch" name="mash sketch" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@">
-    <description>Create a reduced sequence representation based on min-hashes</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="xref"/>
-    <expand macro="requirements" />
-    <expand macro="version_command" />
-    <command detect_errors="exit_code"><![CDATA[
-    
-        #if str ( $reads_assembly.reads_assembly_selector ) == "reads":
-                #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired":
-                    ln -s '$reads_assembly.reads_input.reads_1' "${reads_assembly.reads_input.reads_1.element_identifier}" &&
-                    ln -s '$reads_assembly.reads_input.reads_2' "${reads_assembly.reads_input.reads_1.element_identifier}" &&
-                #end if
-                #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection":
-                    ln -s '$reads_assembly.reads_input.reads.forward' "${reads_assembly.reads_input.reads.forward.element_identifier}" &&
-                    ln -s '$reads_assembly.reads_input.reads.reverse' "${reads_assembly.reads_input.reads.reverse.element_identifier}" &&
-                #end if
-                #if str( $reads_assembly.reads_input.reads_input_selector ) == "single":
-                    ln -s '$reads_assembly.reads_input.reads' "${reads_assembly.reads_input.reads.element_identifier}" &&
-                #end if
-        #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly":
-            ln -s '${reads_assembly.assembly}' "${reads_assembly.assembly.element_identifier}" &&
-        #end if
-
-        mash sketch
-            -s '${sketch_size}'
-            -k '${kmer_size}'
-            -w '${prob_threshold}'
-            #if str( $reads_assembly.reads_assembly_selector ) == "reads":
-                -m '${reads_assembly.minimum_kmer_copies}'
-                -r
-                #if $reads_assembly.target_coverage :
-                    -c '${reads_assembly.target_coverage}'
-                #end if
-                #if $reads_assembly.genome_size :
-                    -g '${reads_assembly.genome_size}'
-                #end if
-                #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired" :
-                    "${reads_assembly.reads_input.reads_1.element_identifier}" "${reads_assembly.reads_input.reads_1.element_identifier}}"
-                #end if
-                #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection":
-                    "${reads_assembly.reads_input.reads.forward.element_identifier}" "${reads_assembly.reads_input.reads.reverse.element_identifier}"
-                #end if
-                #if str( $reads_assembly.reads_input.reads_input_selector ) == "single":
-                    "${reads_assembly.reads_input.reads.element_identifier}"
-                #end if
-            #elif str( $reads_assembly.reads_assembly_selector ) == "assembly":
-                -p \${GALAXY_SLOTS:-1}
-                ${reads_assembly.individual_sequences}
-                "${reads_assembly.assembly.element_identifier}"
-            #end if
-            -o 'sketch'
-    ]]></command>
-    <inputs>
-        <conditional name="reads_assembly">
-            <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies">
-                <option selected="True" value="reads">Reads</option>
-                <option value="assembly">Assembly</option>
-            </param>
-            <when value="reads">
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
-                        <option value="paired">Paired</option>
-                        <option value="single">Single</option>
-                        <option value="paired_collection">Paired Collection</option>
-                    </param>
-                    <when value="paired">
-                        <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/>
-                        <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/>
-                    </when>
-                    <when value="single">
-                        <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/>
-                    </when>
-                    <when value="paired_collection">
-                        <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
-                    </when>
-                </conditional>
-                <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/>
-                <param type="integer" name="target_coverage" argument="-c" value="" min="0" max="500" optional="true" label="Target coverage" help="If specified, sketching will conclude if this coverage is reached before the end of the input file (estimated by average k-mer multiplicity)"/>
-                <param type="integer" name="genome_size" argument="-g" value="" min="1000" max="100000000000" optional="true" label="Genome size" help="If specified, will be used for p-value calculation instead of an estimated size from k-mer content"/>
-            </when>
-            <when value="assembly">
-                <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/>
-                <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences" help="Sketch individual sequences, rather than whole files, e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons"/>
-            </when>
-        </conditional>
-        <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" help="Each sketch will have at most this many non-redundant min-hashes"/>
-        <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" />
-        <param type="float" name="prob_threshold" argument="-w" value="0.01" min="0" max="1" label="Probability threshold for warning about low k-mer size" />   
-    </inputs>
-    <outputs>
-        <data name="sketch" format="msh" from_work_dir="sketch.msh"/>
-    </outputs>
-    <tests>
-        <test expect_num_outputs="1">
-            <conditional name="reads_assembly">
-                <param name="reads_assembly_selector" value="reads"/>
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" value="single"/>
-                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
-                </conditional>
-            </conditional>
-            <output name="sketch" file="test_01_mash_sketch.msh" compare="sim_size" />
-        </test>
-        <test expect_num_outputs="1">
-            <conditional name="reads_assembly">
-                <param name="reads_assembly_selector" value="reads"/>
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" value="single"/>
-                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
-                </conditional>
-                <param name="minimum_kmer_copies" value="10"/>
-            </conditional>
-            <output name="sketch" file="test_02_mash_sketch.msh" compare="sim_size" />
-        </test>
-        <test expect_num_outputs="1">
-            <conditional name="reads_assembly">
-                <param name="reads_assembly_selector" value="reads"/>
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" value="single"/>
-                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
-                </conditional>
-                <param name="target_coverage" value="1"/>
-            </conditional>
-            <output name="sketch" file="test_03_mash_sketch.msh" compare="sim_size" />
-        </test>
-        <test expect_num_outputs="1">
-            <conditional name="reads_assembly">
-                <param name="reads_assembly_selector" value="reads"/>
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" value="single"/>
-                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
-                </conditional>
-                <param name="genome_size" value="1000"/>
-            </conditional>
-            <output name="sketch" file="test_04_mash_sketch.msh" compare="sim_size" />
-        </test>
-        <test expect_num_outputs="1">
-            <conditional name="reads_assembly">
-                <param name="reads_assembly_selector" value="reads"/>
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" value="single"/>
-                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
-                </conditional>
-            </conditional>
-            <param name="sketch_size" value="500"/>
-            <output name="sketch" file="test_05_mash_sketch.msh" compare="sim_size" />
-        </test>
-        <test expect_num_outputs="1">
-            <conditional name="reads_assembly">
-                <param name="reads_assembly_selector" value="reads"/>
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" value="single"/>
-                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
-                </conditional>
-            </conditional>
-            <param name="kmer_size" value="17"/>
-            <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" />
-        </test>
-        <test expect_num_outputs="1">
-            <conditional name="reads_assembly">
-                <param name="reads_assembly_selector" value="reads"/>
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" value="single"/>
-                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
-                </conditional>
-            </conditional>
-            <param name="prob_threshold" value="0.1"/>
-            <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" />
-        </test>
-        <test expect_num_outputs="1">
-            <conditional name="reads_assembly">
-                <param name="reads_assembly_selector" value="assembly"/>
-                <param name="assembly" value="test_assembly.fasta"/>
-            </conditional>
-            <output name="sketch" file="test_07_mash_sketch.msh" compare="sim_size" />
-        </test>
-    </tests>
-    <help><![CDATA[
-
-**What it does**
-
-  Create a sketch file, which is a reduced representation of a sequence or set
-  of sequences (based on min-hashes) that can be used for fast distance
-  estimations. Inputs can be fasta or fastq files (gzipped or not), and "-" can
-  be given to read from standard input. Input files can also be files of file
-  names (see -l). For output, one sketch file will be generated, but it can have
-  multiple sketches within it, divided by sequences or files (see -i). By
-  default, the output file name will be the first input file with a '.msh'
-  extension, or 'stdin.msh' if standard input is used (see -o).
-  ]]></help>
-    <expand macro="citations"/>
-</tool>