9
|
1 <tool id="sampler" name="Read sampling" version="1.0.0">
|
|
2 <description> Tool for random sampling subsets of reads from larger dataset</description>
|
3
|
3 <requirements>
|
|
4 <requirement type="package">seqkit</requirement>
|
|
5 </requirements>
|
|
6 <stdio>
|
|
7 <exit_code range="1:" level="fatal" description="Error" />
|
|
8 </stdio>
|
|
9 <command>
|
9
|
10 <![CDATA[
|
3
|
11 #if str($paired)=="true"
|
|
12 ${__tool_directory__}/deinterlacer.py $input Afile Bfile
|
9
|
13 &&
|
|
14 seqkit sample -2 --number $number --rand-seed $seed -o Asample -w 0 Afile < /dev/null
|
|
15 &&
|
|
16 seqkit sample -2 --number $number --rand-seed $seed -o Bsample -w 0 Bfile < /dev/null
|
|
17 &&
|
3
|
18 ${__tool_directory__}/fasta_interlacer.py -a Asample -b Bsample -p $output -x tmpfile
|
|
19 #else
|
|
20 seqkit sample -2 --number $number --rand-seed $seed -o $output -w 0 $input
|
|
21 #end if
|
9
|
22 ]]>
|
|
23 </command>
|
3
|
24
|
|
25 <inputs>
|
9
|
26 <param format="fasta" type="data" name="input" label="Read file (FASTA)" />
|
|
27 <param name="number" type="integer" size="7" value="500000" min="1" label="Number of reads or read pairs"/>
|
|
28 <param name="seed" type="integer" size="10" value="10" min="0" label="Random number generator seed " />
|
|
29 <param name="paired" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Paired-end reads" help="If paired-end reads are sampled, left and right-hand reads must be interlaced and all pairs must be complete."/>
|
3
|
30
|
|
31
|
|
32 </inputs>
|
|
33
|
|
34
|
|
35 <outputs>
|
|
36 <data format="fasta" name="output" label="Random selection from dataset ${input.hid}, sample size ${number})" />
|
|
37 </outputs>
|
|
38
|
|
39 <help>
|
|
40 **What it does**
|
|
41
|
|
42 This tools is intended to create sample of sequences from by taking 'random' sample from larger data sets.
|
|
43 Using a same seed parameter make sampling reproducible.
|
|
44
|
|
45
|
|
46 </help>
|
|
47
|
|
48
|
|
49 </tool>
|