Mercurial > repos > iuc > mash_paste
changeset 1:8f32dcfde5d9 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit 344ee746d6e93c583331c0f74d234ab200a8ce43
| author | iuc |
|---|---|
| date | Fri, 26 Sep 2025 14:27:48 +0000 |
| parents | 3b275e1a1581 |
| children | e0c0fda064eb |
| files | mash_paste.xml mash_sketch.xml |
| diffstat | 2 files changed, 36 insertions(+), 195 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mash_paste.xml Fri Sep 26 14:27:48 2025 +0000 @@ -0,0 +1,36 @@ +<tool id="mash_paste" name="mash paste" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> + <description>Create a single sketch file from multiple sketch files.</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xref"/> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ + #set $list_inputs = [] + #for $file in $msh_files + ln -s $file "${file.element_identifier}.${file.ext}" && + $list_inputs.append("%s.%s" % ($file.element_identifier, $file.ext)) + #end for + + #set list_files=' '.join([ '\'%s\'' % $input for $input in $list_inputs]) + mash paste result.msh $list_files && + mv result.msh $output + ]]></command> + <inputs> + <param name="msh_files" type="data" format='msh' multiple="true" label="Sketch files to merge"/> + </inputs> + <outputs> + <data name="output" format="msh" label="${tool.name} on ${on_string}: Output Sketch File"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="msh_files" value="test_01_mash_sketch.msh,test_02_mash_sketch.msh"/> + <output name="output" file="res_paste.msh" compare="sim_size" /> + </test> + </tests> + <help><![CDATA[ + Create a single sketch file from multiple sketch files. + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- a/mash_sketch.xml Wed Sep 24 13:47:21 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,195 +0,0 @@ -<tool id="mash_sketch" name="mash sketch" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@"> - <description>Create a reduced sequence representation based on min-hashes</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="xref"/> - <expand macro="requirements" /> - <expand macro="version_command" /> - <command detect_errors="exit_code"><![CDATA[ - - #if str ( $reads_assembly.reads_assembly_selector ) == "reads": - #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired": - ln -s '$reads_assembly.reads_input.reads_1' "${reads_assembly.reads_input.reads_1.element_identifier}" && - ln -s '$reads_assembly.reads_input.reads_2' "${reads_assembly.reads_input.reads_1.element_identifier}" && - #end if - #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection": - ln -s '$reads_assembly.reads_input.reads.forward' "${reads_assembly.reads_input.reads.forward.element_identifier}" && - ln -s '$reads_assembly.reads_input.reads.reverse' "${reads_assembly.reads_input.reads.reverse.element_identifier}" && - #end if - #if str( $reads_assembly.reads_input.reads_input_selector ) == "single": - ln -s '$reads_assembly.reads_input.reads' "${reads_assembly.reads_input.reads.element_identifier}" && - #end if - #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly": - ln -s '${reads_assembly.assembly}' "${reads_assembly.assembly.element_identifier}" && - #end if - - mash sketch - -s '${sketch_size}' - -k '${kmer_size}' - -w '${prob_threshold}' - #if str( $reads_assembly.reads_assembly_selector ) == "reads": - -m '${reads_assembly.minimum_kmer_copies}' - -r - #if $reads_assembly.target_coverage : - -c '${reads_assembly.target_coverage}' - #end if - #if $reads_assembly.genome_size : - -g '${reads_assembly.genome_size}' - #end if - #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired" : - "${reads_assembly.reads_input.reads_1.element_identifier}" "${reads_assembly.reads_input.reads_1.element_identifier}}" - #end if - #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection": - "${reads_assembly.reads_input.reads.forward.element_identifier}" "${reads_assembly.reads_input.reads.reverse.element_identifier}" - #end if - #if str( $reads_assembly.reads_input.reads_input_selector ) == "single": - "${reads_assembly.reads_input.reads.element_identifier}" - #end if - #elif str( $reads_assembly.reads_assembly_selector ) == "assembly": - -p \${GALAXY_SLOTS:-1} - ${reads_assembly.individual_sequences} - "${reads_assembly.assembly.element_identifier}" - #end if - -o 'sketch' - ]]></command> - <inputs> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies"> - <option selected="True" value="reads">Reads</option> - <option value="assembly">Assembly</option> - </param> - <when value="reads"> - <conditional name="reads_input"> - <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> - <option value="paired">Paired</option> - <option value="single">Single</option> - <option value="paired_collection">Paired Collection</option> - </param> - <when value="paired"> - <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/> - <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/> - </when> - <when value="single"> - <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/> - </when> - <when value="paired_collection"> - <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> - </when> - </conditional> - <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/> - <param type="integer" name="target_coverage" argument="-c" value="" min="0" max="500" optional="true" label="Target coverage" help="If specified, sketching will conclude if this coverage is reached before the end of the input file (estimated by average k-mer multiplicity)"/> - <param type="integer" name="genome_size" argument="-g" value="" min="1000" max="100000000000" optional="true" label="Genome size" help="If specified, will be used for p-value calculation instead of an estimated size from k-mer content"/> - </when> - <when value="assembly"> - <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/> - <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences" help="Sketch individual sequences, rather than whole files, e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons"/> - </when> - </conditional> - <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" help="Each sketch will have at most this many non-redundant min-hashes"/> - <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" /> - <param type="float" name="prob_threshold" argument="-w" value="0.01" min="0" max="1" label="Probability threshold for warning about low k-mer size" /> - </inputs> - <outputs> - <data name="sketch" format="msh" from_work_dir="sketch.msh"/> - </outputs> - <tests> - <test expect_num_outputs="1"> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" value="reads"/> - <conditional name="reads_input"> - <param name="reads_input_selector" value="single"/> - <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> - </conditional> - </conditional> - <output name="sketch" file="test_01_mash_sketch.msh" compare="sim_size" /> - </test> - <test expect_num_outputs="1"> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" value="reads"/> - <conditional name="reads_input"> - <param name="reads_input_selector" value="single"/> - <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> - </conditional> - <param name="minimum_kmer_copies" value="10"/> - </conditional> - <output name="sketch" file="test_02_mash_sketch.msh" compare="sim_size" /> - </test> - <test expect_num_outputs="1"> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" value="reads"/> - <conditional name="reads_input"> - <param name="reads_input_selector" value="single"/> - <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> - </conditional> - <param name="target_coverage" value="1"/> - </conditional> - <output name="sketch" file="test_03_mash_sketch.msh" compare="sim_size" /> - </test> - <test expect_num_outputs="1"> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" value="reads"/> - <conditional name="reads_input"> - <param name="reads_input_selector" value="single"/> - <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> - </conditional> - <param name="genome_size" value="1000"/> - </conditional> - <output name="sketch" file="test_04_mash_sketch.msh" compare="sim_size" /> - </test> - <test expect_num_outputs="1"> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" value="reads"/> - <conditional name="reads_input"> - <param name="reads_input_selector" value="single"/> - <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> - </conditional> - </conditional> - <param name="sketch_size" value="500"/> - <output name="sketch" file="test_05_mash_sketch.msh" compare="sim_size" /> - </test> - <test expect_num_outputs="1"> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" value="reads"/> - <conditional name="reads_input"> - <param name="reads_input_selector" value="single"/> - <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> - </conditional> - </conditional> - <param name="kmer_size" value="17"/> - <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" /> - </test> - <test expect_num_outputs="1"> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" value="reads"/> - <conditional name="reads_input"> - <param name="reads_input_selector" value="single"/> - <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> - </conditional> - </conditional> - <param name="prob_threshold" value="0.1"/> - <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" /> - </test> - <test expect_num_outputs="1"> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" value="assembly"/> - <param name="assembly" value="test_assembly.fasta"/> - </conditional> - <output name="sketch" file="test_07_mash_sketch.msh" compare="sim_size" /> - </test> - </tests> - <help><![CDATA[ - -**What it does** - - Create a sketch file, which is a reduced representation of a sequence or set - of sequences (based on min-hashes) that can be used for fast distance - estimations. Inputs can be fasta or fastq files (gzipped or not), and "-" can - be given to read from standard input. Input files can also be files of file - names (see -l). For output, one sketch file will be generated, but it can have - multiple sketches within it, divided by sequences or files (see -i). By - default, the output file name will be the first input file with a '.msh' - extension, or 'stdin.msh' if standard input is used (see -o). - ]]></help> - <expand macro="citations"/> -</tool>
