view sampleFasta.xml @ 12:6b7d31026d1c draft

Uploaded
author petr-novak
date Tue, 04 Feb 2020 08:38:22 -0500
parents c2c69c6090f0
children a675b4534b19
line wrap: on
line source

<tool id="sampler" name="Read sampling" version="1.0.0">
  <description> Tool for random sampling subsets of reads from larger dataset</description>
  <requirements>
    <requirement type="package">seqkit</requirement>
  </requirements>
  <stdio>
    <exit_code range="1:" level="fatal" description="Error" />
  </stdio>
  <command>
    <![CDATA[
    #if str($paired)=="true"
      ${__tool_directory__}/deinterlacer.py $input Afile Bfile
      &&
      seqkit sample -2 --number $number --rand-seed $seed -o Asample -w 0 Afile < /dev/null
      &&
      seqkit sample -2 --number $number --rand-seed $seed -o Bsample -w 0 Bfile < /dev/null
      &&
      ${__tool_directory__}/fasta_interlacer.py -a Asample -b Bsample -p $output -x tmpfile
    #else
      seqkit sample -2 --number $number --rand-seed $seed -o $output -w 0 $input
    #end if
    ]]>
    </command>

  <inputs>
    <param format="fasta" type="data" name="input" label="Read file (FASTA)" />
    <param name="number" type="integer" size="7" value="500000" min="1" label="Number of reads or read pairs"/>
    <param name="seed" type="integer" size="10" value="10" min="0" label="Random number generator seed " />
    <param name="paired" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Paired-end reads" help="If paired-end reads are sampled, left and right-hand reads must be interlaced and all pairs must be complete."/>
    
    
  </inputs>


  <outputs>
 	  <data format="fasta" name="output" label="Random selection from dataset ${input.hid}, sample size ${number})" />
  </outputs>

  <help>
    **What it does**
    
    This tools is intended to create sample of sequences from by taking 'random' sample from larger data sets.
    Using a same seed parameter make sampling reproducible. 

    
  </help>

  
</tool>