comparison seqkit_split2.xml @ 0:c19015f577a5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqkit commit 76c1a289f15cc9a9a7d9a49dc132af62cc1d5af2
author iuc
date Fri, 26 Sep 2025 16:48:57 +0000
parents
children 911de3a36b31
comparison
equal deleted inserted replaced
-1:000000000000 0:c19015f577a5
1 <tool id="seqkit_split2" name="Seqkit Split2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Split sequences into files by part size, number of parts, or length</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 #import re
10 mkdir -p out &&
11
12 ## The preprocessing steps below are adapted from the cutadapt.xml tool wrapper.
13 ## Set things up for handling inputs and outputs in single- vs paired-end modes
14 #set input_type = str($input_file_type.type)
15 #if $input_type == 'single':
16 #set paired = False
17 #else:
18 #set paired = True
19 #end if
20
21 #if $input_type == 'paired_collection'
22 #set input_1 = $input_file_type.input_1.forward
23 #set input_2 = $input_file_type.input_1.reverse
24 #set read1 = re.sub('[^\w\-\s]', '_', str($input_file_type.input_1.name)) + "_1"
25 #set read2 = re.sub('[^\w\-\s]', '_', str($input_file_type.input_1.name)) + "_2"
26 #else
27 #set input_1 = $input_file_type.input_1
28 #set read1 = re.sub('[^\w\-\s]', '_', str($input_file_type.input_1.element_identifier))
29 #end if
30
31 #if $input_1.is_of_type("fastq", "fastq.gz"):
32 #set ext = ".fastqsanger"
33 #else
34 #set ext = ".fasta"
35 #end if
36 #if $input_1.ext.endswith(".gz"):
37 #set ext=ext+".gz"
38 #end if
39
40 #set read1 = $read1 + $ext
41
42 #if $paired:
43 #if $input_2.is_of_type("fastq", "fastq.gz"):
44 #set ext2 = ".fastqsanger"
45 #else
46 #set ext2 = ".fasta"
47 #end if
48 #if $input_2.ext.endswith(".gz"):
49 #set ext2=ext2+".gz"
50 #end if
51 #set read2 = $read2 + $ext2
52 #end if
53
54 ## Link in the input files
55 ln -fs '$input_1' '$read1' &&
56 #if $paired:
57 ln -fs '$input_2' '$read2' &&
58 #end if
59
60 seqkit split2
61 #if $paired:
62 -1 '$read1'
63 -2 '$read2'
64 #else:
65 '$read1'
66 #end if
67 #if str($split_type.split_selector) == 'by_part':
68 -p $split_type.by_part
69 #else if str($split_type.split_selector) == 'by_size':
70 -s $split_type.by_size
71 #else if str($split_type.split_selector) == 'by_length':
72 -l $split_type.by_length
73 #end if
74 -o seqkit_split2
75 -O out
76 -j "\${GALAXY_SLOTS:-4}"
77 ]]></command>
78 <inputs>
79 <conditional name="input_file_type">
80 <param name="type" type="select" label="Single-end or Paired-end reads?">
81 <option value="single">Single-end</option>
82 <option value="paired_collection">Paired-end Collection</option>
83 </param>
84 <when value="single">
85 <param name="input_1" type="data" format="@FASTQ_TYPES@" label="Input FASTQ/A file" help="Select a single FASTA or FASTQ file (gzipped or uncompressed)"/>
86 </when>
87 <when value="paired_collection">
88 <param name="input_1" format="@FASTQ_TYPES@" type="data_collection" collection_type="paired" label="Paired Collection" help="Should be of datatype &quot;fastq.gz&quot; or &quot;fasta&quot;" />
89 </when>
90 </conditional>
91 <conditional name="split_type">
92 <param name="split_selector" type="select" label="Split sequences by">
93 <option value="by_part" selected="true">Number of parts</option>
94 <option value="by_size">Number of sequences per part</option>
95 <option value="by_length">Length of sequences</option>
96 </param>
97 <when value="by_part">
98 <param name="by_part" type="integer" value="" min="1" label="Number of parts" help="Split sequences into N parts using round-robin distribution." />
99 </when>
100 <when value="by_size">
101 <param name="by_size" type="integer" value="" min="1" label="Number of sequences per part" help="Split sequences into parts with N sequences each." />
102 </when>
103 <when value="by_length">
104 <param name="by_length" type="text" value="" label="Chunk size" help="Split sequences into chunks of >=N bases. Supports K/M/G suffix (e.g., 10K, 1M)">
105 <validator type="regex" message="Invalid characters in field">^[0-9KMG]+$</validator>
106 </param>
107 </when>
108 </conditional>
109 </inputs>
110 <outputs>
111 <collection name="outputs_files" type="list" label="${tool.name} on ${on_string}: Splitted files">
112 <discover_datasets pattern="(?P&lt;designation&gt;seqkit_split2\.part_\d+)\.(?P&lt;ext&gt;.+)" directory="out"/>
113 </collection>
114 </outputs>
115 <tests>
116 <!-- Test 01: for Seqkit Split with Single End FASTQ file; splitting by parts -->
117 <test expect_num_outputs="1">
118 <conditional name="input_file_type">
119 <param name="type" value="single"/>
120 <param name="input_1" value="reads_1.fq.gz"/>
121 </conditional>
122 <conditional name="split_type">
123 <param name="split_selector" value="by_part"/>
124 <param name="by_part" value="2"/>
125 </conditional>
126 <output_collection name="outputs_files" type="list" count="2">
127 <element name="seqkit_split2.part_001" ftype="fastqsanger.gz">
128 <assert_contents>
129 <has_n_lines n="4958"/>
130 </assert_contents>
131 </element>
132 <element name="seqkit_split2.part_002" ftype="fastqsanger.gz">
133 <assert_contents>
134 <has_n_lines n="4949"/>
135 </assert_contents>
136 </element>
137 </output_collection>
138 </test>
139
140 <!-- Test 02: for Seqkit Split with Paired FASTQ Collection; splitting by parts -->
141 <test expect_num_outputs="1">
142 <conditional name="input_file_type">
143 <param name="type" value="paired_collection"/>
144 <param name="input_1">
145 <collection type="paired">
146 <element name="forward" ftype="fastq.gz" value="reads_1.fq.gz"/>
147 <element name="reverse" ftype="fastq.gz" value="reads_2.fq.gz"/>
148 </collection>
149 </param>
150 </conditional>
151 <conditional name="split_type">
152 <param name="split_selector" value="by_part"/>
153 <param name="by_part" value="2"/>
154 </conditional>
155 <output_collection name="outputs_files" type="list" count="2">
156 <element name="seqkit_split2.part_001" ftype="fastqsanger.gz">
157 <assert_contents>
158 <has_n_lines n="4958"/>
159 </assert_contents>
160 </element>
161 <element name="seqkit_split2.part_002" ftype="fastqsanger.gz">
162 <assert_contents>
163 <has_n_lines n="4949"/>
164 </assert_contents>
165 </element>
166 </output_collection>
167 </test>
168
169 <!-- Test 03: for Seqkit Split with Single End FASTA file; splitting by parts -->
170 <test expect_num_outputs="1">
171 <conditional name="input_file_type">
172 <param name="type" value="single"/>
173 <param name="input_1" value="hairpin.fa.gz"/>
174 </conditional>
175 <conditional name="split_type">
176 <param name="split_selector" value="by_part"/>
177 <param name="by_part" value="2"/>
178 </conditional>
179 <output_collection name="outputs_files" type="list" count="2">
180 <element name="seqkit_split2.part_001" ftype="fasta.gz">
181 <assert_contents>
182 <has_n_lines n="2988"/>
183 </assert_contents>
184 </element>
185 <element name="seqkit_split2.part_002" ftype="fasta.gz">
186 <assert_contents>
187 <has_n_lines n="2987"/>
188 </assert_contents>
189 </element>
190 </output_collection>
191 </test>
192
193 <!-- Test 04: for Seqkit Split with Single End FASTA file; splitting by size -->
194 <test expect_num_outputs="1">
195 <conditional name="input_file_type">
196 <param name="type" value="single"/>
197 <param name="input_1" value="hairpin.fa.gz"/>
198 </conditional>
199 <conditional name="split_type">
200 <param name="split_selector" value="by_size"/>
201 <param name="by_size" value="200"/>
202 </conditional>
203 <output_collection name="outputs_files" type="list" count="25">
204 <element name="seqkit_split2.part_001" ftype="fasta.gz">
205 <assert_contents>
206 <has_n_lines n="224"/>
207 </assert_contents>
208 </element>
209 <element name="seqkit_split2.part_002" ftype="fasta.gz">
210 <assert_contents>
211 <has_n_lines n="281"/>
212 </assert_contents>
213 </element>
214 </output_collection>
215 </test>
216
217 <!-- Test 05: for Seqkit Split with Single End FASTA file; splitting by length -->
218 <test expect_num_outputs="1">
219 <conditional name="input_file_type">
220 <param name="type" value="single"/>
221 <param name="input_1" value="hairpin.fa.gz"/>
222 </conditional>
223 <conditional name="split_type">
224 <param name="split_selector" value="by_length"/>
225 <param name="by_length" value="50K"/>
226 </conditional>
227 <output_collection name="outputs_files" type="list" count="10">
228 <element name="seqkit_split2.part_001" ftype="fasta.gz">
229 <assert_contents>
230 <has_n_lines n="642"/>
231 </assert_contents>
232 </element>
233 <element name="seqkit_split2.part_002" ftype="fasta.gz">
234 <assert_contents>
235 <has_n_lines n="589"/>
236 </assert_contents>
237 </element>
238 </output_collection>
239 </test>
240 </tests>
241 <help><![CDATA[
242
243 **Seqkit Split2**
244
245 This tool splits FASTA or FASTQ files (single-end or paired-end) into multiple files based on the number of parts, sequences per part, or sequence length. It supports low memory usage and fast processing.
246
247 **Input type**: Choose between single-end FASTA/FASTQ or paired-end FASTQ files.
248
249 **Split sequences by**:
250 - **Number of parts**: Split into N parts using round-robin distribution.
251 - **Number of sequences per part**: Split into parts with N sequences each.
252 - **Length of sequences**: Split into chunks of >=N bases (supports K/M/G suffix, e.g., 10K, 1M).
253
254 **Outputs**
255
256 - A collection of split FASTA/FASTQ files
257
258 For more details, see the Seqkit Split2 documentation_
259
260 .. _documentation: https://bioinf.shenwei.me/seqkit/usage/#split2
261
262 ]]></help>
263 <expand macro="citations"/>
264 <creator>
265 <person givenName="Saim" familyName="Momin" url="https://github.com/SaimMomin12" identifier="https://orcid.org/0009-0003-9935-828X"/>
266 <organization name="Galaxy Europe" url="https://galaxyproject.org/eu/"/>
267 </creator>
268 </tool>