15
|
1 <tool id="sampler" name="Read sampling" version="1.0.1">
|
17
|
2 <description> Tool for randomly sampling subsets of reads from large datasets</description>
|
3
|
3 <requirements>
|
|
4 <requirement type="package">seqkit</requirement>
|
|
5 </requirements>
|
|
6 <stdio>
|
|
7 <exit_code range="1:" level="fatal" description="Error" />
|
|
8 </stdio>
|
|
9 <command>
|
9
|
10 <![CDATA[
|
3
|
11 #if str($paired)=="true"
|
|
12 ${__tool_directory__}/deinterlacer.py $input Afile Bfile
|
9
|
13 &&
|
15
|
14 NUMBER=\$(($(number) / 2))
|
9
|
15 &&
|
15
|
16 seqkit sample -2 --number \$NUMBER --rand-seed $seed -o Asample -w 0 Afile < /dev/null
|
|
17 &&
|
|
18 seqkit sample -2 --number \$NUMBER --rand-seed $seed -o Bsample -w 0 Bfile < /dev/null
|
9
|
19 &&
|
3
|
20 ${__tool_directory__}/fasta_interlacer.py -a Asample -b Bsample -p $output -x tmpfile
|
|
21 #else
|
15
|
22 seqkit sample -2 --number $number --rand-seed $seed -o $output -w 0 $input < /dev/null
|
3
|
23 #end if
|
9
|
24 ]]>
|
|
25 </command>
|
3
|
26
|
|
27 <inputs>
|
9
|
28 <param format="fasta" type="data" name="input" label="Read file (FASTA)" />
|
15
|
29 <param name="paired" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Paired-end reads" help="If paired-end reads are sampled, left and right-hand reads must be interlaced and all pairs must be complete."/>
|
|
30 <param name="number" type="integer" size="7" value="500000" min="1" label="Number of reads"/>
|
9
|
31 <param name="seed" type="integer" size="10" value="10" min="0" label="Random number generator seed " />
|
15
|
32
|
3
|
33 </inputs>
|
|
34
|
|
35
|
|
36 <outputs>
|
|
37 <data format="fasta" name="output" label="Random selection from dataset ${input.hid}, sample size ${number})" />
|
|
38 </outputs>
|
|
39
|
|
40 <help>
|
|
41 **What it does**
|
|
42
|
17
|
43 This tools randomly samples the specified number of reads from larger datasets.
|
|
44 Using the same random number generator seed with the same dataset results in sampling the same set of reads, while
|
|
45 using different seeds generates different subsets of reads.
|
3
|
46
|
|
47
|
|
48 </help>
|
|
49
|
|
50
|
|
51 </tool>
|