diff sampleFasta.xml @ 3:e320ef2d105a draft

Uploaded
author petr-novak
date Thu, 05 Sep 2019 09:04:56 -0400
parents
children c2c69c6090f0
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sampleFasta.xml	Thu Sep 05 09:04:56 2019 -0400
@@ -0,0 +1,47 @@
+<tool id="sampler" name="Sequence sampling" version="1.0.0">
+  <description> Tool for creating samples of sequences from larger dataset</description>
+  <requirements>
+    <requirement type="package">seqkit</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" level="fatal" description="Error" />
+  </stdio>
+  <command>
+    #if str($paired)=="true"
+      ${__tool_directory__}/deinterlacer.py $input Afile Bfile
+      &amp;&amp;
+      seqkit sample -2 --number $number --rand-seed $seed -o Asample -w 0 Afile
+      &amp;&amp;
+      seqkit sample -2 --number $number --rand-seed $seed -o Bsample -w 0 Bfile
+      &amp;&amp;
+      ${__tool_directory__}/fasta_interlacer.py -a Asample -b Bsample -p $output -x tmpfile
+    #else
+      seqkit sample -2 --number $number --rand-seed $seed -o $output -w 0 $input
+    #end if
+  </command>
+
+  <inputs>
+    <param format="fasta" type="data" name="input" label="Choose your fasta file" />
+    <param name="number" type="integer" size="7" value="500000" min="1" label="number of sequences or sequence pairs"/>
+    <param name="seed" type="integer" size="10" value="10" min="0" label="random number generator seed " />
+    <param name="paired" type="boolean" truevalue="true" falsevalue="false" checked="False" label="All sequence reads are paired" help="check if you are using pair reads and and input sequences contain both read mates and  left mates alternate with their right mates"/>
+    
+    
+  </inputs>
+
+
+  <outputs>
+ 	  <data format="fasta" name="output" label="Random selection from dataset ${input.hid}, sample size ${number})" />
+  </outputs>
+
+  <help>
+    **What it does**
+    
+    This tools is intended to create sample of sequences from by taking 'random' sample from larger data sets.
+    Using a same seed parameter make sampling reproducible. 
+
+    
+  </help>
+
+  
+</tool>