view sampleFasta.xml @ 6:f224513123a1 draft

Uploaded
author petr-novak
date Mon, 02 Dec 2019 03:45:28 -0500
parents e320ef2d105a
children c2c69c6090f0
line wrap: on
line source

<tool id="sampler" name="Sequence sampling" version="1.0.0">
  <description> Tool for creating samples of sequences from larger dataset</description>
  <requirements>
    <requirement type="package">seqkit</requirement>
  </requirements>
  <stdio>
    <exit_code range="1:" level="fatal" description="Error" />
  </stdio>
  <command>
    #if str($paired)=="true"
      ${__tool_directory__}/deinterlacer.py $input Afile Bfile
      &amp;&amp;
      seqkit sample -2 --number $number --rand-seed $seed -o Asample -w 0 Afile
      &amp;&amp;
      seqkit sample -2 --number $number --rand-seed $seed -o Bsample -w 0 Bfile
      &amp;&amp;
      ${__tool_directory__}/fasta_interlacer.py -a Asample -b Bsample -p $output -x tmpfile
    #else
      seqkit sample -2 --number $number --rand-seed $seed -o $output -w 0 $input
    #end if
  </command>

  <inputs>
    <param format="fasta" type="data" name="input" label="Choose your fasta file" />
    <param name="number" type="integer" size="7" value="500000" min="1" label="number of sequences or sequence pairs"/>
    <param name="seed" type="integer" size="10" value="10" min="0" label="random number generator seed " />
    <param name="paired" type="boolean" truevalue="true" falsevalue="false" checked="False" label="All sequence reads are paired" help="check if you are using pair reads and and input sequences contain both read mates and  left mates alternate with their right mates"/>
    
    
  </inputs>


  <outputs>
 	  <data format="fasta" name="output" label="Random selection from dataset ${input.hid}, sample size ${number})" />
  </outputs>

  <help>
    **What it does**
    
    This tools is intended to create sample of sequences from by taking 'random' sample from larger data sets.
    Using a same seed parameter make sampling reproducible. 

    
  </help>

  
</tool>