Mercurial > repos > iuc > seqkit_split2
comparison seqkit_split2.xml @ 0:c19015f577a5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqkit commit 76c1a289f15cc9a9a7d9a49dc132af62cc1d5af2
| author | iuc |
|---|---|
| date | Fri, 26 Sep 2025 16:48:57 +0000 |
| parents | |
| children | 911de3a36b31 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c19015f577a5 |
|---|---|
| 1 <tool id="seqkit_split2" name="Seqkit Split2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>Split sequences into files by part size, number of parts, or length</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="bio_tools"/> | |
| 7 <expand macro="requirements"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 #import re | |
| 10 mkdir -p out && | |
| 11 | |
| 12 ## The preprocessing steps below are adapted from the cutadapt.xml tool wrapper. | |
| 13 ## Set things up for handling inputs and outputs in single- vs paired-end modes | |
| 14 #set input_type = str($input_file_type.type) | |
| 15 #if $input_type == 'single': | |
| 16 #set paired = False | |
| 17 #else: | |
| 18 #set paired = True | |
| 19 #end if | |
| 20 | |
| 21 #if $input_type == 'paired_collection' | |
| 22 #set input_1 = $input_file_type.input_1.forward | |
| 23 #set input_2 = $input_file_type.input_1.reverse | |
| 24 #set read1 = re.sub('[^\w\-\s]', '_', str($input_file_type.input_1.name)) + "_1" | |
| 25 #set read2 = re.sub('[^\w\-\s]', '_', str($input_file_type.input_1.name)) + "_2" | |
| 26 #else | |
| 27 #set input_1 = $input_file_type.input_1 | |
| 28 #set read1 = re.sub('[^\w\-\s]', '_', str($input_file_type.input_1.element_identifier)) | |
| 29 #end if | |
| 30 | |
| 31 #if $input_1.is_of_type("fastq", "fastq.gz"): | |
| 32 #set ext = ".fastqsanger" | |
| 33 #else | |
| 34 #set ext = ".fasta" | |
| 35 #end if | |
| 36 #if $input_1.ext.endswith(".gz"): | |
| 37 #set ext=ext+".gz" | |
| 38 #end if | |
| 39 | |
| 40 #set read1 = $read1 + $ext | |
| 41 | |
| 42 #if $paired: | |
| 43 #if $input_2.is_of_type("fastq", "fastq.gz"): | |
| 44 #set ext2 = ".fastqsanger" | |
| 45 #else | |
| 46 #set ext2 = ".fasta" | |
| 47 #end if | |
| 48 #if $input_2.ext.endswith(".gz"): | |
| 49 #set ext2=ext2+".gz" | |
| 50 #end if | |
| 51 #set read2 = $read2 + $ext2 | |
| 52 #end if | |
| 53 | |
| 54 ## Link in the input files | |
| 55 ln -fs '$input_1' '$read1' && | |
| 56 #if $paired: | |
| 57 ln -fs '$input_2' '$read2' && | |
| 58 #end if | |
| 59 | |
| 60 seqkit split2 | |
| 61 #if $paired: | |
| 62 -1 '$read1' | |
| 63 -2 '$read2' | |
| 64 #else: | |
| 65 '$read1' | |
| 66 #end if | |
| 67 #if str($split_type.split_selector) == 'by_part': | |
| 68 -p $split_type.by_part | |
| 69 #else if str($split_type.split_selector) == 'by_size': | |
| 70 -s $split_type.by_size | |
| 71 #else if str($split_type.split_selector) == 'by_length': | |
| 72 -l $split_type.by_length | |
| 73 #end if | |
| 74 -o seqkit_split2 | |
| 75 -O out | |
| 76 -j "\${GALAXY_SLOTS:-4}" | |
| 77 ]]></command> | |
| 78 <inputs> | |
| 79 <conditional name="input_file_type"> | |
| 80 <param name="type" type="select" label="Single-end or Paired-end reads?"> | |
| 81 <option value="single">Single-end</option> | |
| 82 <option value="paired_collection">Paired-end Collection</option> | |
| 83 </param> | |
| 84 <when value="single"> | |
| 85 <param name="input_1" type="data" format="@FASTQ_TYPES@" label="Input FASTQ/A file" help="Select a single FASTA or FASTQ file (gzipped or uncompressed)"/> | |
| 86 </when> | |
| 87 <when value="paired_collection"> | |
| 88 <param name="input_1" format="@FASTQ_TYPES@" type="data_collection" collection_type="paired" label="Paired Collection" help="Should be of datatype "fastq.gz" or "fasta"" /> | |
| 89 </when> | |
| 90 </conditional> | |
| 91 <conditional name="split_type"> | |
| 92 <param name="split_selector" type="select" label="Split sequences by"> | |
| 93 <option value="by_part" selected="true">Number of parts</option> | |
| 94 <option value="by_size">Number of sequences per part</option> | |
| 95 <option value="by_length">Length of sequences</option> | |
| 96 </param> | |
| 97 <when value="by_part"> | |
| 98 <param name="by_part" type="integer" value="" min="1" label="Number of parts" help="Split sequences into N parts using round-robin distribution." /> | |
| 99 </when> | |
| 100 <when value="by_size"> | |
| 101 <param name="by_size" type="integer" value="" min="1" label="Number of sequences per part" help="Split sequences into parts with N sequences each." /> | |
| 102 </when> | |
| 103 <when value="by_length"> | |
| 104 <param name="by_length" type="text" value="" label="Chunk size" help="Split sequences into chunks of >=N bases. Supports K/M/G suffix (e.g., 10K, 1M)"> | |
| 105 <validator type="regex" message="Invalid characters in field">^[0-9KMG]+$</validator> | |
| 106 </param> | |
| 107 </when> | |
| 108 </conditional> | |
| 109 </inputs> | |
| 110 <outputs> | |
| 111 <collection name="outputs_files" type="list" label="${tool.name} on ${on_string}: Splitted files"> | |
| 112 <discover_datasets pattern="(?P<designation>seqkit_split2\.part_\d+)\.(?P<ext>.+)" directory="out"/> | |
| 113 </collection> | |
| 114 </outputs> | |
| 115 <tests> | |
| 116 <!-- Test 01: for Seqkit Split with Single End FASTQ file; splitting by parts --> | |
| 117 <test expect_num_outputs="1"> | |
| 118 <conditional name="input_file_type"> | |
| 119 <param name="type" value="single"/> | |
| 120 <param name="input_1" value="reads_1.fq.gz"/> | |
| 121 </conditional> | |
| 122 <conditional name="split_type"> | |
| 123 <param name="split_selector" value="by_part"/> | |
| 124 <param name="by_part" value="2"/> | |
| 125 </conditional> | |
| 126 <output_collection name="outputs_files" type="list" count="2"> | |
| 127 <element name="seqkit_split2.part_001" ftype="fastqsanger.gz"> | |
| 128 <assert_contents> | |
| 129 <has_n_lines n="4958"/> | |
| 130 </assert_contents> | |
| 131 </element> | |
| 132 <element name="seqkit_split2.part_002" ftype="fastqsanger.gz"> | |
| 133 <assert_contents> | |
| 134 <has_n_lines n="4949"/> | |
| 135 </assert_contents> | |
| 136 </element> | |
| 137 </output_collection> | |
| 138 </test> | |
| 139 | |
| 140 <!-- Test 02: for Seqkit Split with Paired FASTQ Collection; splitting by parts --> | |
| 141 <test expect_num_outputs="1"> | |
| 142 <conditional name="input_file_type"> | |
| 143 <param name="type" value="paired_collection"/> | |
| 144 <param name="input_1"> | |
| 145 <collection type="paired"> | |
| 146 <element name="forward" ftype="fastq.gz" value="reads_1.fq.gz"/> | |
| 147 <element name="reverse" ftype="fastq.gz" value="reads_2.fq.gz"/> | |
| 148 </collection> | |
| 149 </param> | |
| 150 </conditional> | |
| 151 <conditional name="split_type"> | |
| 152 <param name="split_selector" value="by_part"/> | |
| 153 <param name="by_part" value="2"/> | |
| 154 </conditional> | |
| 155 <output_collection name="outputs_files" type="list" count="2"> | |
| 156 <element name="seqkit_split2.part_001" ftype="fastqsanger.gz"> | |
| 157 <assert_contents> | |
| 158 <has_n_lines n="4958"/> | |
| 159 </assert_contents> | |
| 160 </element> | |
| 161 <element name="seqkit_split2.part_002" ftype="fastqsanger.gz"> | |
| 162 <assert_contents> | |
| 163 <has_n_lines n="4949"/> | |
| 164 </assert_contents> | |
| 165 </element> | |
| 166 </output_collection> | |
| 167 </test> | |
| 168 | |
| 169 <!-- Test 03: for Seqkit Split with Single End FASTA file; splitting by parts --> | |
| 170 <test expect_num_outputs="1"> | |
| 171 <conditional name="input_file_type"> | |
| 172 <param name="type" value="single"/> | |
| 173 <param name="input_1" value="hairpin.fa.gz"/> | |
| 174 </conditional> | |
| 175 <conditional name="split_type"> | |
| 176 <param name="split_selector" value="by_part"/> | |
| 177 <param name="by_part" value="2"/> | |
| 178 </conditional> | |
| 179 <output_collection name="outputs_files" type="list" count="2"> | |
| 180 <element name="seqkit_split2.part_001" ftype="fasta.gz"> | |
| 181 <assert_contents> | |
| 182 <has_n_lines n="2988"/> | |
| 183 </assert_contents> | |
| 184 </element> | |
| 185 <element name="seqkit_split2.part_002" ftype="fasta.gz"> | |
| 186 <assert_contents> | |
| 187 <has_n_lines n="2987"/> | |
| 188 </assert_contents> | |
| 189 </element> | |
| 190 </output_collection> | |
| 191 </test> | |
| 192 | |
| 193 <!-- Test 04: for Seqkit Split with Single End FASTA file; splitting by size --> | |
| 194 <test expect_num_outputs="1"> | |
| 195 <conditional name="input_file_type"> | |
| 196 <param name="type" value="single"/> | |
| 197 <param name="input_1" value="hairpin.fa.gz"/> | |
| 198 </conditional> | |
| 199 <conditional name="split_type"> | |
| 200 <param name="split_selector" value="by_size"/> | |
| 201 <param name="by_size" value="200"/> | |
| 202 </conditional> | |
| 203 <output_collection name="outputs_files" type="list" count="25"> | |
| 204 <element name="seqkit_split2.part_001" ftype="fasta.gz"> | |
| 205 <assert_contents> | |
| 206 <has_n_lines n="224"/> | |
| 207 </assert_contents> | |
| 208 </element> | |
| 209 <element name="seqkit_split2.part_002" ftype="fasta.gz"> | |
| 210 <assert_contents> | |
| 211 <has_n_lines n="281"/> | |
| 212 </assert_contents> | |
| 213 </element> | |
| 214 </output_collection> | |
| 215 </test> | |
| 216 | |
| 217 <!-- Test 05: for Seqkit Split with Single End FASTA file; splitting by length --> | |
| 218 <test expect_num_outputs="1"> | |
| 219 <conditional name="input_file_type"> | |
| 220 <param name="type" value="single"/> | |
| 221 <param name="input_1" value="hairpin.fa.gz"/> | |
| 222 </conditional> | |
| 223 <conditional name="split_type"> | |
| 224 <param name="split_selector" value="by_length"/> | |
| 225 <param name="by_length" value="50K"/> | |
| 226 </conditional> | |
| 227 <output_collection name="outputs_files" type="list" count="10"> | |
| 228 <element name="seqkit_split2.part_001" ftype="fasta.gz"> | |
| 229 <assert_contents> | |
| 230 <has_n_lines n="642"/> | |
| 231 </assert_contents> | |
| 232 </element> | |
| 233 <element name="seqkit_split2.part_002" ftype="fasta.gz"> | |
| 234 <assert_contents> | |
| 235 <has_n_lines n="589"/> | |
| 236 </assert_contents> | |
| 237 </element> | |
| 238 </output_collection> | |
| 239 </test> | |
| 240 </tests> | |
| 241 <help><![CDATA[ | |
| 242 | |
| 243 **Seqkit Split2** | |
| 244 | |
| 245 This tool splits FASTA or FASTQ files (single-end or paired-end) into multiple files based on the number of parts, sequences per part, or sequence length. It supports low memory usage and fast processing. | |
| 246 | |
| 247 **Input type**: Choose between single-end FASTA/FASTQ or paired-end FASTQ files. | |
| 248 | |
| 249 **Split sequences by**: | |
| 250 - **Number of parts**: Split into N parts using round-robin distribution. | |
| 251 - **Number of sequences per part**: Split into parts with N sequences each. | |
| 252 - **Length of sequences**: Split into chunks of >=N bases (supports K/M/G suffix, e.g., 10K, 1M). | |
| 253 | |
| 254 **Outputs** | |
| 255 | |
| 256 - A collection of split FASTA/FASTQ files | |
| 257 | |
| 258 For more details, see the Seqkit Split2 documentation_ | |
| 259 | |
| 260 .. _documentation: https://bioinf.shenwei.me/seqkit/usage/#split2 | |
| 261 | |
| 262 ]]></help> | |
| 263 <expand macro="citations"/> | |
| 264 <creator> | |
| 265 <person givenName="Saim" familyName="Momin" url="https://github.com/SaimMomin12" identifier="https://orcid.org/0009-0003-9935-828X"/> | |
| 266 <organization name="Galaxy Europe" url="https://galaxyproject.org/eu/"/> | |
| 267 </creator> | |
| 268 </tool> |
