3
|
1 <tool id="sampler" name="Sequence sampling" version="1.0.0">
|
|
2 <description> Tool for creating samples of sequences from larger dataset</description>
|
|
3 <requirements>
|
|
4 <requirement type="package">seqkit</requirement>
|
|
5 </requirements>
|
|
6 <stdio>
|
|
7 <exit_code range="1:" level="fatal" description="Error" />
|
|
8 </stdio>
|
|
9 <command>
|
|
10 #if str($paired)=="true"
|
|
11 ${__tool_directory__}/deinterlacer.py $input Afile Bfile
|
|
12 &&
|
|
13 seqkit sample -2 --number $number --rand-seed $seed -o Asample -w 0 Afile
|
|
14 &&
|
|
15 seqkit sample -2 --number $number --rand-seed $seed -o Bsample -w 0 Bfile
|
|
16 &&
|
|
17 ${__tool_directory__}/fasta_interlacer.py -a Asample -b Bsample -p $output -x tmpfile
|
|
18 #else
|
|
19 seqkit sample -2 --number $number --rand-seed $seed -o $output -w 0 $input
|
|
20 #end if
|
|
21 </command>
|
|
22
|
|
23 <inputs>
|
|
24 <param format="fasta" type="data" name="input" label="Choose your fasta file" />
|
|
25 <param name="number" type="integer" size="7" value="500000" min="1" label="number of sequences or sequence pairs"/>
|
|
26 <param name="seed" type="integer" size="10" value="10" min="0" label="random number generator seed " />
|
|
27 <param name="paired" type="boolean" truevalue="true" falsevalue="false" checked="False" label="All sequence reads are paired" help="check if you are using pair reads and and input sequences contain both read mates and left mates alternate with their right mates"/>
|
|
28
|
|
29
|
|
30 </inputs>
|
|
31
|
|
32
|
|
33 <outputs>
|
|
34 <data format="fasta" name="output" label="Random selection from dataset ${input.hid}, sample size ${number})" />
|
|
35 </outputs>
|
|
36
|
|
37 <help>
|
|
38 **What it does**
|
|
39
|
|
40 This tools is intended to create sample of sequences from by taking 'random' sample from larger data sets.
|
|
41 Using a same seed parameter make sampling reproducible.
|
|
42
|
|
43
|
|
44 </help>
|
|
45
|
|
46
|
|
47 </tool>
|