changeset 4:5fbeaa41b223 draft

Update to Du Novo 2.0.6.
author nick
date Wed, 25 Oct 2017 19:26:43 -0400
parents 00dde366870a
children 000969829a5d
files align_families.xml correct_barcodes.xml dunovo.xml make_families.xml tool_dependencies.xml
diffstat 5 files changed, 95 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/align_families.xml	Mon Sep 11 16:59:44 2017 -0400
+++ b/align_families.xml	Wed Oct 25 19:26:43 2017 -0400
@@ -1,16 +1,20 @@
 <?xml version="1.0"?>
-<tool id="align_families" name="Du Novo: Align families" version="0.8.1">
+<tool id="align_families" name="Du Novo: Align families" version="2.0.6">
   <description>of duplex sequencing reads</description>
   <requirements>
     <requirement type="package" version="7.221">mafft</requirement>
-    <requirement type="package" version="0.8.1">dunovo</requirement>
+    <requirement type="package" version="2.0.6">dunovo</requirement>
     <!-- TODO: require Python 2.7 -->
   </requirements>
-  <command detect_errors="exit_code">align_families.py --galaxy $phone --processes \${GALAXY_SLOTS:-1} '$input' &gt; '$output'
+  <command detect_errors="exit_code">align_families.py --aligner $aligner --galaxy $phone --processes \${GALAXY_SLOTS:-1} '$input' &gt; '$output'
   </command>
   <inputs>
     <param name="input" type="data" format="tabular" label="Input reads" help="with barcodes, grouped by family"/>
-    <param name="phone" type="boolean" truevalue="--phone-home" falsevalue="" checked="False" label="Send anonymous usage data" help="Report helpful usage data to the developer, to better understand the use cases and performance of the tool. The only data which will be recorded is the name and version of the tool, the size of the input data, the time taken to process it, and the IP address of the machine running it. No parameters or filenames are sent."/>
+    <param name="aligner" type="select" value="mafft" label="Multiple sequence aligner" help="MAFFT is the original aligner Du Novo was published with in 2016. Kalign is much faster and has similar accuracy.">
+      <option value="kalign">Kalign2</option>
+      <option value="mafft">MAFFT</option>
+    </param>
+    <param name="phone" type="boolean" truevalue="--phone-home" falsevalue="" checked="False" label="Send usage data" help="Report helpful usage data to the developer, to better understand the use cases and performance of the tool. The only data which will be recorded is the name and version of the tool, the size of the input data, the number of processes used, the time and memory taken to process it, the alignment algorithm selected, and the IP address of the machine running it. Also, if the tool fails, it will report the name of the exception thrown and the line of code it occurred in. The names of the input and output datasets are not sent. All the reporting and recording code is available at https://github.com/NickSto/ET."/>
   </inputs>
   <outputs>
     <data name="output" format="tabular"/>
@@ -21,26 +25,10 @@
       <output name="output" file="smoke.families.aligned.tsv"/>
     </test>
     <test>
-      <param name="input" value="families.in.tsv"/>
-      <output name="output" file="families.sort.tsv"/>
+      <param name="input" value="families.sort.tsv"/>
+      <output name="output" file="families.msa.tsv"/>
     </test>
   </tests>
-  <citations>
-    <citation type="bibtex">@article{Stoler2016,
-      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
-      doi = {10.1186/s13059-016-1039-4},
-      issn = {1474-760X},
-      journal = {Genome biology},
-      number = {1},
-      pages = {180},
-      pmid = {27566673},
-      publisher = {Genome Biology},
-      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
-      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
-      volume = {17},
-      year = {2016}
-    }</citation>
-  </citations>
   <help>
 
 **What it does**
@@ -77,5 +65,21 @@
 
   $ mafft --nuc --quiet family.fa &gt; family.aligned.fa
 
-    </help>
+  </help>
+  <citations>
+    <citation type="bibtex">@article{Stoler2016,
+      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
+      doi = {10.1186/s13059-016-1039-4},
+      issn = {1474-760X},
+      journal = {Genome biology},
+      number = {1},
+      pages = {180},
+      pmid = {27566673},
+      publisher = {Genome Biology},
+      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
+      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
+      volume = {17},
+      year = {2016}
+    }</citation>
+  </citations>
 </tool>
--- a/correct_barcodes.xml	Mon Sep 11 16:59:44 2017 -0400
+++ b/correct_barcodes.xml	Wed Oct 25 19:26:43 2017 -0400
@@ -1,16 +1,16 @@
 <?xml version="1.0"?>
-<tool id="correct_barcodes" name="Du Novo: Correct barcodes" version="0.8.1">
+<tool id="correct_barcodes" name="Du Novo: Correct barcodes" version="2.0.6">
   <description>of duplex sequencing reads</description>
   <requirements>
     <requirement type="package" version="2.2.5">bowtie2</requirement>
     <requirement type="package" version="0.1.18">samtools</requirement>
-    <requirement type="package" version="1.9">networkx</requirement>
-    <requirement type="package" version="0.8.1">dunovo</requirement>
+    <requirement type="package" version="1.11">networkx</requirement>
+    <requirement type="package" version="2.0.6">dunovo</requirement>
     <!-- TODO: require Python 2.7 -->
   </requirements>
   <command detect_errors="exit_code"><![CDATA[
-    baralign.sh '$input' refdir barcodes.bam
-    && samtools view -f 256 barcodes.bam
+    baralign.sh '$input' refdir
+    | samtools view -S -f 256 -
     | correct.py --galaxy $phone --dist $dist --mapq $mapq --pos $pos '$input' refdir/barcodes.fa
     | sort
     > '$output'
@@ -21,27 +21,11 @@
     <param name="dist" type="integer" value="1" min="1" label="Maximum edit distance" help="Only use alignments where the barcodes differ by at most these many errors."/>
     <param name="mapq" type="integer" value="20" min="0" label="Minimum mapping quality" help="Only use alignments whose MAPQ is at least this."/>
     <param name="pos" type="integer" value="2" min="0" label="Minimum start offset" help="Ignore alignments where the start positions differ by more than this."/>
-    <param name="phone" type="boolean" truevalue="--phone-home" falsevalue="" checked="False" label="Send anonymous usage data" help="Report helpful usage data to the developer, to better understand the use cases and performance of the tool. The only data which will be recorded is the name and version of the tool, the size of the input data, the time taken to process it, and the IP address of the machine running it. No parameters or filenames are sent."/>
+    <param name="phone" type="boolean" truevalue="--phone-home" falsevalue="" checked="False" label="Send usage data" help="Report helpful usage data to the developer, to better understand the use cases and performance of the tool. The only data which will be recorded is the name and version of the tool, the size of the input data, the time and memory taken to process it, and the IP address of the machine running it. Also, if the tool fails, it will report the name of the exception thrown and the line of code it occurred in. The parameters and input/output dataset names are not sent. All the reporting and recording code is available at https://github.com/NickSto/ET"/>
   </inputs>
   <outputs>
     <data name="output" format="tabular"/>
   </outputs>
-  <citations>
-    <citation type="bibtex">@article{Stoler2016,
-      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
-      doi = {10.1186/s13059-016-1039-4},
-      issn = {1474-760X},
-      journal = {Genome biology},
-      number = {1},
-      pages = {180},
-      pmid = {27566673},
-      publisher = {Genome Biology},
-      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
-      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
-      volume = {17},
-      year = {2016}
-    }</citation>
-  </citations>
   <help>
 
 **What it does**
@@ -60,5 +44,21 @@
 
 The output format is the same as the input format, ready to be consumed by the "Align families" tool.
 
-    </help>
+  </help>
+  <citations>
+    <citation type="bibtex">@article{Stoler2016,
+      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
+      doi = {10.1186/s13059-016-1039-4},
+      issn = {1474-760X},
+      journal = {Genome biology},
+      number = {1},
+      pages = {180},
+      pmid = {27566673},
+      publisher = {Genome Biology},
+      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
+      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
+      volume = {17},
+      year = {2016}
+    }</citation>
+  </citations>
 </tool>
--- a/dunovo.xml	Mon Sep 11 16:59:44 2017 -0400
+++ b/dunovo.xml	Wed Oct 25 19:26:43 2017 -0400
@@ -1,8 +1,8 @@
 <?xml version="1.0"?>
-<tool id="dunovo" name="Du Novo: Make consensus reads" version="0.8.1">
+<tool id="dunovo" name="Du Novo: Make consensus reads" version="2.0.6">
   <description>from duplex sequencing alignments</description>
   <requirements>
-    <requirement type="package" version="0.8.1">dunovo</requirement>
+    <requirement type="package" version="2.0.6">dunovo</requirement>
     <!-- TODO: require Python 2.7 -->
   </requirements>
   <command detect_errors="exit_code">
@@ -14,15 +14,15 @@
   <inputs>
     <param name="input" type="data" format="tabular" label="Aligned input reads" />
     <param name="min_reads" type="integer" value="3" min="1" label="Minimum reads per family" help="Single-strand families with fewer than this many reads will be skipped."/>
-    <param name="cons_thres" type="float" value="0.5" min="0.5" max="1.0" label="The threshold to use when making consensus sequences. The consensus base must be present in more than this fraction of the reads, or &quot;N&quot; will be used."/>
-    <param name="min_cons_reads" type="integer" value="0" min="0" label="The minimum number of reads a base must appear in to be used as the consensus base. If no base at the position appears in at least this many reads, &quot;N&quot; will be used."/>
+    <param name="cons_thres" type="float" value="0.5" min="0.5" max="1.0" label="Consensus % threshold" help="The consensus base must be present in more than this fraction of the reads, or &quot;N&quot; will be used."/>
+    <param name="min_cons_reads" type="integer" value="0" min="0" label="Minimum number of reads for a consensus base." help="If no base at the position appears in at least this many reads, &quot;N&quot; will be used."/>
     <param name="qual_thres" type="integer" value="25" min="1" label="Minimum base quality" help="Bases with a PHRED score less than this will not be counted in the consensus making."/>
     <param name="qual_format" type="select" label="FASTQ format" help="Solexa should also work for Illumina 1.3+ and 1.5+, and Sanger should work for Illumina 1.8+">
       <option value="sanger" selected="true">Sanger (PHRED 0 = &quot;!&quot;)</option>
       <option value="solexa">Solexa (PHRED 0 = &quot;@&quot;)</option>
     </param>
     <param name="keep_sscs" type="boolean" truevalue="true" falsevalue="" label="Output single-strand consensus sequences as well" />
-    <param name="phone" type="boolean" truevalue="--phone-home" falsevalue="" checked="False" label="Send anonymous usage data" help="Report helpful usage data to the developer, to better understand the use cases and performance of the tool. The only data which will be recorded is the name and version of the tool, the size of the input data, the time taken to process it, and the IP address of the machine running it. No parameters or filenames are sent."/>
+    <param name="phone" type="boolean" truevalue="--phone-home" falsevalue="" checked="False" label="Send usage data" help="Report helpful usage data to the developer, to better understand the use cases and performance of the tool. The only data which will be recorded is the name and version of the tool, the size of the input data, the number of processes used, the time and memory taken to process it, and the IP address of the machine running it. Also, if the tool fails, it will report the name of the exception thrown and the line of code it occurred in. The parameters and input/output dataset names are not sent. All the reporting and recording code is available at https://github.com/NickSto/ET."/>
   </inputs>
   <outputs>
     <data name="dcs1" format="fasta" label="$tool.name on $on_string (mate 1)"/>
@@ -41,22 +41,6 @@
       <output name="dcs2" file="families.dcs_2.fa"/>
     </test>
   </tests>
-  <citations>
-    <citation type="bibtex">@article{Stoler2016,
-      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
-      doi = {10.1186/s13059-016-1039-4},
-      issn = {1474-760X},
-      journal = {Genome biology},
-      number = {1},
-      pages = {180},
-      pmid = {27566673},
-      publisher = {Genome Biology},
-      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
-      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
-      volume = {17},
-      year = {2016}
-    }</citation>
-  </citations>
   <help>
 
 **What it does**
@@ -75,5 +59,21 @@
 
 This will output final, duplex consensus reads in two FASTA files (first and second reads in the pairs). Optionally, you can save the single-strand reads too, in a separate FASTA file.
 
-    </help>
+  </help>
+  <citations>
+    <citation type="bibtex">@article{Stoler2016,
+      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
+      doi = {10.1186/s13059-016-1039-4},
+      issn = {1474-760X},
+      journal = {Genome biology},
+      number = {1},
+      pages = {180},
+      pmid = {27566673},
+      publisher = {Genome Biology},
+      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
+      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
+      volume = {17},
+      year = {2016}
+    }</citation>
+  </citations>
 </tool>
--- a/make_families.xml	Mon Sep 11 16:59:44 2017 -0400
+++ b/make_families.xml	Wed Oct 25 19:26:43 2017 -0400
@@ -1,8 +1,8 @@
 <?xml version="1.0"?>
-<tool id="make_families" name="Du Novo: Make families" version="0.8.1">
+<tool id="make_families" name="Du Novo: Make families" version="2.0.6">
   <description>of duplex sequencing reads</description>
   <requirements>
-    <requirement type="package" version="0.8.1">dunovo</requirement>
+    <requirement type="package" version="2.0.6">dunovo</requirement>
   </requirements>
   <!-- TODO: Add dependency on coreutils to get paste? -->
   <command detect_errors="exit_code">make-families.sh -t $taglen -i $invariant '$fastq1' '$fastq2' &gt; '$output'
@@ -32,22 +32,6 @@
       <output name="output" file="smoke.families.i0.tsv"/>
     </test>
   </tests>
-  <citations>
-    <citation type="bibtex">@article{Stoler2016,
-      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
-      doi = {10.1186/s13059-016-1039-4},
-      issn = {1474-760X},
-      journal = {Genome biology},
-      number = {1},
-      pages = {180},
-      pmid = {27566673},
-      publisher = {Genome Biology},
-      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
-      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
-      volume = {17},
-      year = {2016}
-    }</citation>
-  </citations>
   <help>
 
 **What it does**
@@ -91,5 +75,21 @@
   |  CCT  |  ATG  |  ba   | ATGCCT  |
   +-------+-------+-------+---------+
 
-    </help>
+  </help>
+  <citations>
+    <citation type="bibtex">@article{Stoler2016,
+      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
+      doi = {10.1186/s13059-016-1039-4},
+      issn = {1474-760X},
+      journal = {Genome biology},
+      number = {1},
+      pages = {180},
+      pmid = {27566673},
+      publisher = {Genome Biology},
+      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
+      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
+      volume = {17},
+      year = {2016}
+    }</citation>
+  </citations>
 </tool>
--- a/tool_dependencies.xml	Mon Sep 11 16:59:44 2017 -0400
+++ b/tool_dependencies.xml	Wed Oct 25 19:26:43 2017 -0400
@@ -9,13 +9,13 @@
   <package name="mafft" version="7.221">
     <repository changeset_revision="15974dd17515" name="mafft" owner="rnateam" toolshed="https://toolshed.g2.bx.psu.edu" />
   </package>
-  <package name="networkx" version="1.9">
-    <repository changeset_revision="83df321ad85e" name="package_networkx_1_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+  <package name="networkx" version="1.11">
+    <repository changeset_revision="e761775277c1" name="package_networkx_1_10" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
   </package>
-  <package name="dunovo" version="0.8.1">
+  <package name="dunovo" version="2.0.6">
     <install version="1.0">
       <actions>
-        <action sha256sum="f85fd35ef67c8f76af0d556a4babe9acf7b8abdd8a77232d4f4763cc7de60eed" type="download_by_url">https://github.com/galaxyproject/dunovo/archive/v0.8.1.tar.gz</action>
+        <action sha256sum="3d628d297767f9836ab57ef738b2b29f588c36df8c43ec6814ea97e29da1d5ec" type="download_by_url">https://github.com/galaxyproject/dunovo/archive/v2.0.6.tar.gz</action>
         <action type="shell_command">make</action>
         <action type="move_directory_files">
           <source_directory>.</source_directory>