view dunovo.xml @ 2:a9cb6fb9ea94 draft

Update to Du Novo 0.7.6.
author nick
date Thu, 24 Aug 2017 02:13:33 -0400
parents ea832c221ec9
children 00dde366870a
line wrap: on
line source

<?xml version="1.0"?>
<tool id="dunovo" name="Du Novo: Make consensus reads" version="0.7.6">
  <description>from duplex sequencing alignments</description>
  <requirements>
    <requirement type="package" version="0.7.6">dunovo</requirement>
    <!-- TODO: require Python 2.7 -->
  </requirements>
  <command detect_errors="exit_code"><![CDATA[
    dunovo.py --galaxy $phone -r $min_reads -q $qual_thres -F $qual_format '$input'
    #if $keep_sscs:
      --sscs-file sscs.fa
    #end if
    > duplex.fa
    && outconv.py duplex.fa -1 '$dcs1' -2 '$dcs2'
    #if $keep_sscs:
      && outconv.py sscs.fa -1 '$sscs1' -2 '$sscs2'
    #end if
  ]]>
  </command>
  <inputs>
    <param name="input" type="data" format="tabular" label="Aligned input reads" />
    <param name="min_reads" type="integer" value="3" min="1" label="Minimum reads per family" help="Single-strand families with fewer than this many reads will be skipped."/>
    <param name="qual_thres" type="integer" value="25" min="1" label="Minimum base quality" help="Bases with a PHRED score less than this will not be counted in the consensus making."/>
    <param name="qual_format" type="select" label="FASTQ format" help="Solexa should also work for Illumina 1.3+ and 1.5+, and Sanger should work for Illumina 1.8+">
      <option value="sanger" selected="true">Sanger (PHRED 0 = &quot;!&quot;)</option>
      <option value="solexa">Solexa (PHRED 0 = &quot;@&quot;)</option>
    </param>
    <param name="keep_sscs" type="boolean" truevalue="true" falsevalue="" label="Output single-strand consensus sequences as well" />
    <param name="phone" type="boolean" truevalue="--phone-home" falsevalue="" checked="False" label="Send anonymous usage data" help="Report helpful usage data to the developer, to better understand the use cases and performance of the tool. The only data which will be recorded is the name and version of the tool, the size of the input data, the time taken to process it, and the IP address of the machine running it. No parameters or filenames are sent."/>
  </inputs>
  <outputs>
    <data name="dcs1" format="fasta" label="$tool.name on $on_string (mate 1)"/>
    <data name="dcs2" format="fasta" label="$tool.name on $on_string (mate 2)"/>
    <data name="sscs1" format="fasta" label="$tool.name on $on_string (SSCS mate 1)">
      <filter>keep_sscs</filter>
    </data>
    <data name="sscs2" format="fasta" label="$tool.name on $on_string (SSCS mate 2)">
      <filter>keep_sscs</filter>
    </data>
  </outputs>
  <tests>
    <test>
      <param name="input" value="families.msa.tsv"/>
      <output name="dcs1" file="families.cons_1.fa"/>
      <output name="dcs2" file="families.cons_2.fa"/>
    </test>
  </tests>
  <citations>
    <citation type="bibtex">@article{Stoler2016,
      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
      doi = {10.1186/s13059-016-1039-4},
      issn = {1474-760X},
      journal = {Genome biology},
      number = {1},
      pages = {180},
      pmid = {27566673},
      publisher = {Genome Biology},
      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
      volume = {17},
      year = {2016}
    }</citation>
  </citations>
  <help>

**What it does**

This is for processing duplex sequencing data. It creates single-strand and duplex consensus reads from aligned read families.

-----

**Input**

This expects the output format of the "Align families" tool.

-----

**Output**

This will output final, duplex consensus reads in two FASTA files (first and second reads in the pairs). Optionally, you can save the single-strand reads too, in a separate FASTA file.

    </help>
</tool>