changeset 1:01a241476407 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaspades commit 9401451df4a985ef5686864eaadafa077ffc0877
author iuc
date Thu, 02 Mar 2017 16:04:56 -0500
parents e93c1a0678cd
children 05c394313b1c
files CHANGE README.md metaspades.xml spades.pl tool_dependencies.xml
diffstat 5 files changed, 100 insertions(+), 259 deletions(-) [+]
line wrap: on
line diff
--- a/CHANGE	Tue Aug 09 10:46:28 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-1.0
-===
- - Supports SPades 3.9
- - CHANGE: Improved memory consumption in metagenomic pipeline.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Thu Mar 02 16:04:56 2017 -0500
@@ -0,0 +1,17 @@
+**License**
+
+SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2.
+
+This wrapper is copyrighted by Philip Mabon and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with this program.  If not, see http://www.gnu.org/licenses/.
+
+** Acknowledgments **
+
+Original wrapper developed by Lionel Guy.
+
+Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes.
+
+Nicola Soranzo fixed various bugs.
--- a/metaspades.xml	Tue Aug 09 10:46:28 2016 -0400
+++ b/metaspades.xml	Thu Mar 02 16:04:56 2017 -0500
@@ -1,139 +1,97 @@
-<tool id="metaspades" name="metaspades" version="1.0">
-  <description>genome assembler for metagenomics datasets</description>
-  <requirements>
-    <requirement type="package" version="3.9.0">spades</requirement>
-  </requirements>
-  <command interpreter="perl">spades.pl 
-    $out_contigs 
-    $out_contig_stats 
-    $out_scaffolds 
-    $out_scaffold_stats 
-    $out_log
-
-    ## if the first fileset is a paired-collection, use the key as the name
-    #if $files[0].file_type.type == "paired-collection":
-        $files[0].file_type.fastq_collection.name
-    #else:
-        NODE
-    #end if
+<tool id="metaspades" name="metaSPAdes" version="3.9.0">
+    <description>assembler for metagenomics datasets</description>
+    <requirements>
+        <requirement type="package" version="3.9.0">spades</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command>
+    <![CDATA[
     ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output
-    spades.py
-    ## Forces unzipped output, faster
-    --disable-gzip-output
-    --meta
-    $onlyassembler
-
-    -t \${GALAXY_SLOTS:-16}
-
+    spades.py -o . --disable-gzip-output --meta $onlyassembler -t \${GALAXY_SLOTS:-16}
     #if not $kmer_choice.auto_kmer_choice:
-    -k "$kmer_choice.kmers"    
+        -k "$kmer_choice.kmers"
     #end if
-
     ## Sequence files
-      #set num=1
-      #if str( $lib_type ) == "paired_end":
+    #set num=1
+    #if str( $lib_type ) == "paired_end":
         #set prefix = 'pe'
-      #end if
-      --$prefix$num-$orientation    
-      #for $file in $files
-	#if $file.file_type.type == "separate"
-          --$prefix$num-1 fastq:$file.file_type.fwd_reads
-          --$prefix$num-2 fastq:$file.file_type.rev_reads
+    #end if
+    --$prefix$num-$orientation
+    #for $file in $files
+        #if $file.file_type.type == "separate"
+            --$prefix$num-1 fastq:$file.file_type.fwd_reads
+            --$prefix$num-2 fastq:$file.file_type.rev_reads
         #elif $file.file_type.type == "interleaved"
-          --$prefix$num-12 fastq:$file.file_type.interleaved_reads
+            --$prefix$num-12 fastq:$file.file_type.interleaved_reads
         #elif $file.file_type.type == "paired-collection"
-        --$prefix$num-1 fastq:$file.file_type.fastq_collection.forward
-        --$prefix$num-2 fastq:$file.file_type.fastq_collection.reverse
+            --$prefix$num-1 fastq:$file.file_type.fastq_collection.forward
+            --$prefix$num-2 fastq:$file.file_type.fastq_collection.reverse
         #end if
-      #end for
-
-
-  </command>
-  <inputs>
-    <param name="onlyassembler" type="boolean" truevalue="--only-assembler" falsevalue="" checked="False" label="Run only assembly? (without read error correction)" />
-    <conditional name="kmer_choice">
-      <param name="auto_kmer_choice" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Automatically choose k-mer values" help="k-mer choices can be chosen by SPAdes instead of being entered manually" />
-      <when value="false">
-        <param name="kmers" type="text" label="K-mers to use, separated by commas" value="21,33,55" help="Comma-separated list of k-mer sizes to be used (all values must be odd, less than 128, listed in ascending order, and smaller than the read length). The default value is 21,33,55." />
-      </when>
-      <when value="true"> </when>
-    </conditional>    
-
-    <!-- Reads -->
-
-      <param name="lib_type" type="select" label="Library type">
-	<option value="paired_end">Paired-end</option>
-      </param>
-      <param name="orientation" type="select" label="Orientation">
-	<option value="fr" selected="true">-> &lt;- (fr)</option>
-	<option value="rf">&lt;- -> (rf)</option>
-	<option value="ff">-> -> (ff)</option>
-      </param>
-      <repeat name="files" title="Files" min="1">
-	<conditional name="file_type">
-	  <param name="type" type="select" label="Select file format">
-	    <option value="separate">Separate input files</option>
-	    <option value="interleaved">Interleaved files</option>
-            <option value="paired-collection">Paired List Collection</option>
-	  </param>
-	  <when value="separate">
-	    <param name="fwd_reads" type="data" format="fastq" label="Forward reads" help="FASTQ format" />
-	    <param name="rev_reads" type="data" format="fastq" label="Reverse reads" help="FASTQ format" />
-	  </when>
-	  <when value="interleaved">
-	    <param name="interleaved_reads" type="data" format="fastq" label="Interleaved paired reads" help="FASTQ format" />
-	  </when>
-          <when value="paired-collection">
-            <param name="fastq_collection" type="data_collection" label="Paired-end reads collection" optional="false" format="fastq" collection_type="paired" help="FASTQ format" /> 
-          </when>
-	</conditional>
-      </repeat>
-
-
-  </inputs>
-  <outputs>
-    <data name="out_contigs" format="fasta" label="SPAdes contigs (fasta)" />
-    <data name="out_contig_stats" format="tabular" label="SPAdes contig stats" />
-    <data name="out_scaffolds" format="fasta" label="SPAdes scaffolds (fasta)" />
-    <data name="out_scaffold_stats" format="tabular" label="SPAdes scaffold stats" />
-    <data name="out_log" format="txt" label="SPAdes log" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="sc" value="false" />
-      <param name="careful" value="false" />
-      <param name="kmers" value="33,55" />
-      <param name="lib_type" value="paired_end" />
-      <param name="fwd_reads" value="ecoli_1K_1.fq" ftype="fastq" />
-      <param name="rev_reads" value="ecoli_1K_2.fq" ftype="fastq" />
-      <output name="out_contigs" file="reference_1K.fa" ftype="fasta" compare="re_match" lines_diff="1" />
-    </test> 
- </tests>
-  <help>
+    #end for
+    ]]>
+    </command>
+    <inputs>
+        <param name="onlyassembler" type="boolean" truevalue="--only-assembler" falsevalue="" checked="False" label="Run only assembly? (without read error correction)" />
+        <conditional name="kmer_choice">
+            <param name="auto_kmer_choice" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Automatically choose k-mer values" help="k-mer choices can be chosen by SPAdes instead of being entered manually" />
+            <when value="false">
+                <param name="kmers" type="text" label="K-mers to use, separated by commas" value="21,33,55" help="Comma-separated list of k-mer sizes to be used (all values must be odd, less than 128, listed in ascending order, and smaller than the read length). The default value is 21,33,55." />
+            </when>
+            <when value="true" />
+        </conditional>
+        <param name="lib_type" type="select" label="Library type">
+            <option value="paired_end">Paired-end</option>
+        </param>
+        <param label="Orientation" name="orientation" type="select">
+            <option selected="true" value="fr"><![CDATA[-> <- (fr)]]></option>
+            <option value="rf"><![CDATA[<- -> (rf)]]></option>
+            <option value="ff"><![CDATA[-> -> (ff)]]></option>
+        </param>
+        <repeat name="files" title="Files" min="1">
+            <conditional name="file_type">
+                <param name="type" type="select" label="Select file format">
+                    <option value="separate">Separate input files</option>
+                    <option value="interleaved">Interleaved files</option>
+                    <option value="paired-collection">Paired List Collection</option>
+                </param>
+                <when value="separate">
+                    <param name="fwd_reads" type="data" format="fastq" label="Forward reads" help="FASTQ format" />
+                    <param name="rev_reads" type="data" format="fastq" label="Reverse reads" help="FASTQ format" />
+                </when>
+                <when value="interleaved">
+                    <param name="interleaved_reads" type="data" format="fastq" label="Interleaved paired reads" help="FASTQ format" />
+                </when>
+                <when value="paired-collection">
+                    <param name="fastq_collection" type="data_collection" label="Paired-end reads collection" format="fastq" collection_type="paired" help="FASTQ format" />
+                </when>
+            </conditional>
+        </repeat>
+    </inputs>
+    <outputs>
+        <data name="out_contigs" format="fasta" from_work_dir="contigs.fasta" label="SPAdes contigs (fasta)" />
+        <data name="out_scaffolds" format="fasta" from_work_dir="scaffolds.fasta" label="SPAdes scaffolds (fasta)" />
+        <data name="out_log" format="txt" from_work_dir="spades.log" label="SPAdes log" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="sc" value="false" />
+            <param name="careful" value="false" />
+            <param name="kmers" value="33,55" />
+            <param name="lib_type" value="paired_end" />
+            <param name="fwd_reads" value="ecoli_1K_1.fq" ftype="fastq" />
+            <param name="rev_reads" value="ecoli_1K_2.fq" ftype="fastq" />
+            <output name="out_contigs" file="reference_1K.fa" ftype="fasta" compare="re_match" lines_diff="1" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
 **What it does**
 
 SPAdes – St. Petersburg genome assembler – is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes.
-
-This wrapper runs SPAdes 3.9, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. 
-
-**License**
-
-SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2.
-
-This wrapper is copyrighted by Philip Mabon and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with this program.  If not, see http://www.gnu.org/licenses/.
-
-** Acknowledgments **
-
-Original wrapper developed by Lionel Guy.
-
-Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes.
-
-Nicola Soranzo fixed various bugs.
-  </help>
+]]>
+    </help>
     <citations>
         <citation type="doi">10.1089/cmb.2012.0021</citation>
     </citations>
--- a/spades.pl	Tue Aug 09 10:46:28 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-#!/usr/bin/env perl
-## A wrapper script to call spades.py and collect its output
-use strict;
-use warnings;
-use File::Temp qw/ tempfile tempdir /;
-use File::Copy;
-use Getopt::Long;
-
-# Parse arguments
-my ($out_contigs_file,
-    $out_contigs_stats,
-    $out_scaffolds_file,
-    $out_scaffolds_stats,
-    $out_log_file,
-    $new_name,
-    @sysargs) = @ARGV;
-
-
-my $output_dir = 'output_dir';
-
-# Create log handle
-open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
-
-# Run program
-runSpades(@sysargs);
-collectOutput($new_name);
-extractCoverageLength($out_contigs_file, $out_contigs_stats);
-extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
-print $log "Done\n";
-close $log;
-exit 0;
-
-# Run spades
-sub runSpades {
-    my $cmd = join(" ", @_) . " -o $output_dir";
-    my $return_code = system($cmd);
-    if ($return_code) {
-	print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
-	die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
-    }
-    return 0;
-}
-
-# Collect output
-sub collectOutput{
-    my ($new_name) = @_;
-    
-    # To do: check that the files are there
-    # Collects output
-    if ( not -e "$output_dir/contigs.fasta") {
-        die "Could not find contigs.fasta file\n";
-    }
-    if ( not -e "$output_dir/scaffolds.fasta") {
-        die "Could not find scaffolds.fasta file\n";
-    }
-
-    #if a new name is given for the contigs and scaffolds, change them before moving them
-    if ( $new_name ne 'NODE') {
-        renameContigs($new_name);
-    }
-    else {
-        move "$output_dir/contigs.fasta", $out_contigs_file;
-        move "$output_dir/scaffolds.fasta", $out_scaffolds_file;        
-    }
-
-    
-
-    open LOG, '<', "$output_dir/spades.log" 
-	or die "Cannot open log file $output_dir/spades.log: $?";
-    print $log $_ while (<LOG>);
-    return 0;
-}
-
-#Change name in contig and scaffolds file
-sub renameContigs{
-    my ($name) = @_;
-
-    open my $in, '<',"$output_dir/contigs.fasta" or die $!;
-    open my $out,'>', $out_contigs_file;
-
-    while ( my $line = <$in>) {
-        #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
-        #also move the remainder of the length
-        if ( $line =~ />NODE_(\d+)_(.+)/) {
-            $line = ">$name" . "_$1 $2\n";
-        }
-        print $out $line;
-    }
-    close $in;
-    close $out;
-    
-
-    open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
-    open $out,'>', $out_scaffolds_file;
-
-    while ( my $line = <$in>) {
-        #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
-        #also move the remainder of the length
-        if ( $line =~ />NODE_(\d+)_(.+)/) {
-            $line = ">$name" . "_$1 $2\n";
-        }
-        print $out $line;
-    }
-    close $in;
-    close $out;
-
-}
-
-
-# Extract
-sub extractCoverageLength{
-    my ($in, $out) = @_;
-    open FASTA, '<', $in or die $!;
-    open TAB, '>', $out or die $!;
-    print TAB "#name\tlength\tcoverage\n";
-    while (<FASTA>){
-	next unless /^>/;
-	chomp;
-	die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/);
-	my ($name,$n, $l, $cov) = ($1,$2, $3, $4);
-	print TAB "$name" . "_$n\t$l\t$cov\n";
-    }
-    close TAB;
-}
--- a/tool_dependencies.xml	Tue Aug 09 10:46:28 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="spades" version="3.9.0">
-        <repository changeset_revision="d8c8c3dc8f9a" name="package_spades_3_9_0" owner="nml" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>