diff lexicmap-index.xml @ 0:d4134260b442 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/lexicmap commit d966ed462a49d1949345717ae4fdb685c6a1450a
author iuc
date Tue, 16 Sep 2025 13:52:03 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lexicmap-index.xml	Tue Sep 16 13:52:03 2025 +0000
@@ -0,0 +1,81 @@
+<tool id="lexicmap_index" name="LexicMap Index" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE_VERSION@">
+    <description>Builds LexicMap index</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+
+    <command detect_errors="exit_code"><![CDATA[
+
+echo '$input_genomes' | sed 's/,/\n/g' > fasta_list.tsv &&
+mkdir '$lexicmap_index.extra_files_path' &&
+lexicmap index 
+
+    --threads "\${GALAXY_SLOTS:-1}"
+
+    -X ./fasta_list.tsv -O '$lexicmap_index.extra_files_path'
+    --big-genomes '$lexicmap_big_genomes'
+    --batch-size '$batch_size'
+    --contig-interval '$contig_interval'
+    --kmer '$kmer'
+    --masks '$masks'
+    --max-genome '$max_genome'
+    --min-seq-len '$min_seq_len'
+    --rand-seed '$rand_seed'
+    --seed-in-desert-dist '$seed_in_desert_dist'
+    --seed-max-desert '$seed_max_desert'
+    #if $mask_file
+        --mask-file '$mask_file'
+    #end if
+
+    ]]></command>
+    <inputs>
+        <param argument="--input-genomes" format="@FASTA_TYPES@" type="data" optional="false" multiple="true" label="FASTA files" help="Should be of datatype &quot;fasta.gz&quot; or &quot;fasta&quot;"  />
+        <section name="advanced_settings" title="Advanced settings" expanded="false">
+            <param argument="--batch-size" value="5000" max="131072" type="integer" label="Batch Size" help="Maximum number of genomes in each batch (maximum value: 131072)" />
+            <param argument="--contig-interval" min="1000" value="1000" type="integer" label="Contig interval" help="Length of interval (N's) between contigs in a genome. It can't betoo small (&gt;1000) or some alignments might be fragmented" />
+            <param argument="--kmer" value="31" type="integer" min="2" max="32" label="Max k-mer size" help="Maximum k-mer size. K needs to be &gt;= 32." />
+            <param argument="--mask-file" format="txt" type="data" optional="true" label="Mask file" help="File of custom masks. This flag oversides -k/--kmer, -m/--masks, -s/--rand-seed etc." />
+            <param argument="--masks" min="1" value="20000" type="integer" label="LexicHash masks" help="Number of LexicHash masks." />
+            <param argument="--max-genome" value="15000000" max="268435456" type="integer" label="Max genome size" help="Maximum genome size. Genomes with any single contig larger than the threshold will be skipped, while fragmented (with many contigs) genomes larger than the threshold will be split into chunks and alignments from these chunks will be merged in &quot;lexicmap search&quot;. The value needs to be smaller than the maximum supported genome size: 268435456." />
+            <param argument="--min-seq-len" value="-1" type="integer" label="Max sequence length" help="Maximum sequence length to index. The value would be equal to the kmer parameter value for values &gt;= 0." />
+            <param argument="--rand-seed" value="1" type="integer" label="Rand seed" help="Rand seed for generating random masks." />
+            <param argument="--seed-in-desert-dist" value="50" type="integer" label="Seed in desert dist" help="Distance of k-mers to fill deserts." />
+            <param argument="--seed-max-desert" value="100" type="integer" label="Seed max desert" help="Maximum length of sketching deserts, or maximum seed distance. Deserts with seed distance larger than this value will be filled by choosing k-mers roughly every --seed-in-desert-dist bases." />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="lexicmap_index" format="lexicmap_index" label="index data" />
+        <data name="lexicmap_big_genomes" format="tsv" label="out file with metrics about skipped genomes" />
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="input_genomes" value="genomes/GCF_001502155.1_ViralProj307776_genomic.fna.gz,genomes/GCF_001502175.1_ViralProj307780_genomic.fna.gz" />
+            <section name="advanced_settings">
+                <param name="max_genome" value="10000" />
+            </section>
+            <output name="lexicmap_index" ftype="lexicmap_index">
+                <extra_files name="genomes.chunks.bin" value="db.lmi/genomes.chunks.bin" />
+                <extra_files name="info.toml" value="db.lmi/info.toml" lines_diff="2"/>
+                <extra_files name="masks.bin" value="db.lmi/masks.bin" />
+                <extra_files name="genomes.map.bin">
+                    <assert_contents>
+                        <has_size value="54" />
+                    </assert_contents>
+                </extra_files>
+                <expand macro="genomes_batch" />
+                <expand macro="seeds" />
+            </output>
+            <output name="lexicmap_big_genomes" ftype="tsv" file="lexicmap_big_genomes.tsv" compare="re_match" />
+        </test>
+    </tests>
+    <help><![CDATA[
+    
+    Search sequences against an LexicMap index Database. For more information about settings
+    please visit: https://bioinf.shenwei.me/LexicMap/usage/index/
+
+    @info@
+        ]]></help>
+    <expand macro="citations" />
+</tool>