view nanostat.xml @ 0:e86d1c122ee7 draft

initial upload
author leomrtns
date Tue, 14 May 2019 05:36:34 -0400
parents
children 845458a694e0
line wrap: on
line source

<tool id="nanostat" name="NanoStat" version="0.1.0">
  <description>
    Calculate various statistics from a long read sequencing dataset in fastq, bam or albacore sequencing summary format
  </description>
  <requirements>
    <requirement type="package" version="1.1.2">nanostat</requirement>
  </requirements>
  <command detect_errors="exit_code"><![CDATA[
    #import re
    ## Galaxy creates xyz.dat but nanostat relies on suffix to detect compressed fasta/fastq; otoh Galaxy provides 'element_identifier'
    #if str($input_type.type) == "fastq" or str($input_type.type) == "fasta"
      #set $named_input_files = ''
      #for $i_file in $input_type.file
        ## Add single quotes around each input file identifier
        #set $_input_file = "'{}'".format($i_file.element_identifier)
        #set $named_input_files = $named_input_files + ' ' + $_input_file
        ln -s '${i_file}' ${_input_file} &&
      #end for
    #end if
    #### alternative would be something like: x=`file o.xyz.gz; if [[ $x == *gzip* ]]; then echo "found gzip"; fi

    NanoStat 
    --threads \${GALAXY_SLOTS:-4}
    #if str($input_type.type) == "fastq"
    --fastq ${named_input_files} 
    #else if str($input_type.type) == "fasta"
    --fasta "${named_input_files}" 
    #else if str($input_type.type) == "bam"
    --bam "${input_type.file}" 
    #else if str($input_type.type) == "summary"
    --readtype "${input_type.readtype}"
    --summary "${input_type.file}"
      #if $input_type.barcoded
      --barcoded 
      #end if
    #end if
    -n "$output1" 
    ]]></command>
  <inputs>
    <conditional name="input_type">
      <param name="type" type="select" label="File type of input read files"  help="It is not possible to mix distinct file types.">
        <option value="fastq" selected="true">fastq (compressed or not)</option>
        <option value="fasta">fasta (compressed or not)</option>
        <option value="bam">sorted bam</option>
        <option value="summary">Use albacore or guppy summary file for quality scores</option>
      </param>
      <when value="fastq">
        <param type="data" name="file" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2, fastqsanger.bgz" multiple="true" label="One or more (compressed) fastq file(s)." optional="true"/>
      </when>
      <when value="fasta">
        <param type="data" name="file" format="fasta, fasta.gz, fasta.bz2, fasta.bgz" multiple="true" label="One or more (compressed) fasta file(s)." optional="true"/>
      </when>
      <when value="bam">
        <param type="data" name="file" format="bam" label="One or more sorted bam file(s)." multiple="true" optional="true"/>
      </when>
      <when value="summary">
        <param type="data" name="file" format="tabular" label="Summary file generated by albacore or guppy." multiple="true" optional="true"/>
        <param name="barcoded" argument="--barcoded" type="boolean" truevalue="--barcode" falsevalue="" checked="false" label="Do you want to split the summary file by barcode?" help="Default:No"/>
        <param name="readtype" argument="--readtype" type="select" label="Which read type to extract information about from summary?">
          <option value="1D" selected="true">1D</option>
          <option value="2D">2D</option>
          <option value="1D2">1D2</option>
        </param>
      </when>
    </conditional>

  </inputs>
  <outputs>
    <data name="output1" format="tabular" />
  </outputs>
  <tests>
    <test>
      <param name="type" value="fastq"/>
      <param name="file" value="input_1.fq.gz,input_2.fq.bz2"/>
      <output name="output1" file="out.txt"/>
    </test>
  </tests>
  <help><![CDATA[
    usage: NanoStat [-h] [-v] [-o OUTDIR] [-p PREFIX] [-n NAME] [-t N]
    [--barcoded] [--readtype {1D,2D,1D2}]
    (--fastq file [file ...] | --fasta file [file ...] | --summary file [file ...] | --bam file [file ...])

    Calculate statistics of long read sequencing dataset.

    EXAMPLE usage:
    NanoStat --fastq reads.fastq.gz --outdir statreports

    ]]>  </help>
  <citations>
    <citation type="bibtex">
      @misc{githubnanostat,
      url = {https://github.com/wdecoster/nanostat}
      }
      @article{10.1093/bioinformatics/bty149,
      author = {De Coster, Wouter and D’Hert, Svenn and Schultz, Darrin T and Cruts, Marc and Van Broeckhoven, Christine},
      title = "{NanoPack: visualizing and processing long-read sequencing data}",
      journal = {Bioinformatics},
      volume = {34},
      number = {15},
      pages = {2666-2669},
      year = {2018},
      month = {03},
      abstract = "{Here we describe NanoPack, a set of tools developed for visualization and processing of long-read sequencing data from Oxford Nanopore Technologies and Pacific Biosciences.The NanoPack tools are written in Python3 and released under the GNU GPL3.0 License. The source code can be found at https://github.com/wdecoster/nanopack, together with links to separate scripts and their documentation. The scripts are compatible with Linux, Mac OS and the MS Windows 10 subsystem for Linux and are available as a graphical user interface, a web service at http://nanoplot.bioinf.be and command line tools.Supplementary data are available at Bioinformatics online.}",
      issn = {1367-4803},
      doi = {10.1093/bioinformatics/bty149},
      url = {https://doi.org/10.1093/bioinformatics/bty149},
      eprint = {http://oup.prod.sis.lan/bioinformatics/article-pdf/34/15/2666/25230836/bty149.pdf}
      }
    </citation>
  </citations>
</tool>