Mercurial > repos > lionelguy > spades
changeset 10:7d72faef9af0 draft
Uploaded
author | takadonet |
---|---|
date | Fri, 10 Oct 2014 14:40:43 -0400 |
parents | 1f1943b29266 |
children | 1027f26f52d5 |
files | CHANGE tools/spades_3_0/filter_spades_output.pl tools/spades_3_0/filter_spades_output.xml tools/spades_3_0/plot_spades_stats.xml tools/spades_3_0/r_wrapper.sh tools/spades_3_0/spades.pl tools/spades_3_0/spades.xml tools/spades_3_0/tool_dependencies.xml tools/spades_3_1_1/filter_spades_output.pl tools/spades_3_1_1/filter_spades_output.xml tools/spades_3_1_1/plot_spades_stats.xml tools/spades_3_1_1/r_wrapper.sh tools/spades_3_1_1/spades.pl tools/spades_3_1_1/spades.xml tools/spades_3_1_1/tool_dependencies.xml |
diffstat | 15 files changed, 632 insertions(+), 553 deletions(-) [+] |
line wrap: on
line diff
--- a/CHANGE Wed Feb 05 05:21:29 2014 -0500 +++ b/CHANGE Fri Oct 10 14:40:43 2014 -0400 @@ -1,3 +1,10 @@ +0.9 +=== +- Supports SPAdes 3.1.1 +- Supports Mate-pair only assembly with high-quality libraries +- Added support for List Paired Collection for files in a library. +- Renaming of contig names in scaffolds and contig files if List Paired Collection given + 0.7 === (thanks to Nicola Soranzo for fixing these bugs) @@ -14,4 +21,4 @@ new input format - Adds licensing information -(started at version 0.6) \ No newline at end of file +(started at version 0.6)
--- a/tools/spades_3_0/filter_spades_output.pl Wed Feb 05 05:21:29 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ -#!/usr/bin/perl -w - -=head1 SYNOPSIS - -filter_spades_output.pl - Filters contigs or scaffolds based on contig length and coverage. - -=head1 USAGE - -filter_spades_output.pl [-c|--coverage-cutoff] [-l|--length-cutoff] [-o|--filtered-out out.fasta] -t|--tab stats.tab seqs.fasta - -=head1 INPUT - -=head2 [-c|--coverage-cutoff] - -Mininum coverage. Contigs with lower coverage will be discarded. Default 10. - -=head2 [-l|--length-cutoff] - -Mininum coverage. Smaller ontigs will be discarded. Default 500. - -=head2 -t|--tab stats.tab - -A tabular file, with three columns: contig name, length, and coverage: - -NODE_1 31438 24.5116 -NODE_2 31354 2316.96 -NODE_3 26948 82.3294 - -Such a file is produced by spades.xml. Contigs should be in the same order as in the fasta file. - -=head2 [-o|--filtered-out out.fasta] - -If specified, filtered out sequences will be written to this file. - -=head2 seqs.fasta - -Sequences in fasta format. Start of IDs must match ids in the tabular file. - -=head1 OUTPUT - -A fasta file on stdout. - -=head1 AUTHOR - -Lionel Guy (lionel.guy@icm.uu.se) - -=head1 DATE - -Thu Aug 29 13:51:13 CEST 2013 - -=cut - -# libraries -use strict; -use Getopt::Long; -use Bio::SeqIO; - -my $coverage_co = 10; -my $length_co = 500; -my $out_filtered; -my $tab_file; - -GetOptions( - 'c|coverage-cutoff=s' => \$coverage_co, - 'l|length-cutoff=s' => \$length_co, - 'o|filtered-out=s' => \$out_filtered, - 't|tab=s' => \$tab_file, -); -my $fasta_file = shift(@ARGV); -die ("No tab file specified") unless ($tab_file); -die ("No fasta file specified") unless ($fasta_file); - -## Read tab file, discard rows with comments -open TAB, '<', $tab_file or die "$?"; -my @stats; -while (<TAB>){ - chomp; - push @stats, $_ unless (/^#/); -} - -## Read fasta -my $seq_in = Bio::SeqIO->new(-file => $fasta_file, - -format => 'fasta'); -my $seq_out = Bio::SeqIO->new(-fh => \*STDOUT, - -format => 'fasta'); -my $seq_out_filt = Bio::SeqIO->new(-file => ">$out_filtered", - -format => 'fasta') if ($out_filtered); -while (my $seq = $seq_in->next_seq){ - my $stat = shift @stats; - die "Less rows in tab than sequences in seq file" unless $stat; - my ($id_tab, $length, $coverage) = split(/\t+/, $stat); - die "id, length or coverate not defined at $stat\n" - unless ($id_tab && $length && $coverage); - my $id_seq = $seq->id; - die "Unmatched ids $id_seq and $id_tab\n" unless ($id_seq =~ /^$id_tab/i); - if ($length >= $length_co && $coverage >= $coverage_co){ - $seq_out->write_seq($seq); - } elsif ($out_filtered){ - $seq_out_filt->write_seq($seq); - } else { - # do nothing - } -} -die "More rows in tab than sequences in seq file" if (scalar(@stats) > 0); -exit 0; -
--- a/tools/spades_3_0/filter_spades_output.xml Wed Feb 05 05:21:29 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -<tool id="filter_spades_output" name="Filter SPAdes output" version="0.1"> - <description>remove low coverage and short contigs/scaffolds</description> - <command interpreter="perl">filter_spades_output.pl - --coverage-cutoff $coverage_co - --length-cutoff $length_co - #if $keep_leftover - --filtered-out $filtered_out - #end if - --tab $stats_in - $fasta_in > $fasta_output - </command> - - <inputs> - <param name="fasta_in" type="data" format="fasta" label="Sequences" help="Contigs or scaffolds. Make sure you input the corresponding stat file" /> - <param name="stats_in" type="data" format="tabular" label="Contig stats" /> - <param name="length_co" type="integer" value="1000" min="0" label="Length cut-off" help="Contigs with length under that value are shown in red" /> - <param name="coverage_co" type="integer" value="10" min="0" label="Coverage cut-off" help="Contigs with length under that value are shown in red" /> - <param name="keep_leftover" type="boolean" checked="false" label="Save filtered-out sequences?" /> - </inputs> - <outputs> - <data format="fasta" name="fasta_output" label="Filtered sequences" /> - <data format="fasta" name="filtered_out" label="Discarded sequences"> - <filter>keep_leftover == "true"</filter> - </data> - </outputs> - <help> -**What it does** - -Using the output of SPAdes (a fasta and a stats file, either from contigs or scaffolds), it filters the fasta files, discarding all sequences that are under a given length or under a given coverage. - -Typically, this is used to discard short contigs, or contaminations. To display a coverage vs. length plot, use the "SPAdes stats" tool in the same package. - </help> -</tool>
--- a/tools/spades_3_0/plot_spades_stats.xml Wed Feb 05 05:21:29 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -<tool id="plot_spades_stats" name="SPAdes stats" version="0.1"> - <description>coverage vs. length plot</description> - <requirements> - <requirement type="package">R</requirement> - </requirements> - <command interpreter="bash">r_wrapper.sh $script_file</command> - - <inputs> - <param name="input_scaffolds" type="data" format="tabular" label="Scaffold stats"/> - <param name="input_contigs" type="data" format="tabular" label="Contig stats"/> - <param name="length_co" type="integer" value="1000" min="0" label="Length cut-off" help="Contigs with length under that value are shown in red"/> - <param name="coverage_co" type="integer" value="10" min="0" label="Coverage cut-off" help="Contigs with length under that value are shown in red"/> - </inputs> - <configfiles> - <configfile name="script_file"> -## Setup R error handling to go to stderr -options( show.error.messages=F, - error = function () { - cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) -} ) -files = c("${input_contigs}", "${input_scaffolds}") -types = c("Contigs", "Scaffolds") - -## Start plotting device -png("${out_file}", w=500, h=1000) -par(mfrow=c(2,1)) - -## Loop over the two files -for (i in 1:length(types)){ - seqs = read.table(files[i], header=FALSE, comment.char="#") - colnames = c("name", "length", "coverage") - names(seqs) = colnames - - ## Stats over all sequences - sl_all = sort(seqs\$length, decreasing=TRUE) - cs_all = cumsum(sl_all) - s_all = sum(seqs\$length) - n50_idx_all = which.min(sl_all[cs_all < 0.5*s_all]) - n90_idx_all = which.min(sl_all[cs_all < 0.9*s_all]) - n50_all = sl_all[n50_idx_all] - n90_all = sl_all[n90_idx_all] - - ## Filter short seqs, redo stats - seqs_filt = seqs[seqs\$length >= ${length_co} & seqs\$coverage >= ${coverage_co},] - if (nrow(seqs_filt) > 0){ - sl_filt = sort(seqs_filt\$length, decreasing=TRUE) - cs_filt = cumsum(sl_filt) - s_filt = sum(seqs_filt\$length) - n50_idx_filt = which.min(sl_filt[cs_filt < 0.5*s_filt]) - n90_idx_filt = which.min(sl_filt[cs_filt < 0.9*s_filt]) - n50_filt = sl_filt[n50_idx_filt] - n90_filt = sl_filt[n90_idx_filt] - } - seqs_bad = seqs[seqs\$length < ${length_co} | seqs\$coverage < ${coverage_co},] - - ## Length vs coverage - plot(length~coverage, data=seqs, log="xy", type="n", main=paste(types[i], ": coverage vs. length", sep=""), xlab="Coverage", ylab="Length") - if (nrow(seqs_bad) > 0){ - points(length~coverage, data=seqs_bad, cex=0.5, col="red") - } - if (nrow(seqs_filt) > 0){ - points(length~coverage, data=seqs_filt, cex=0.5, col="black") - } - abline(v=${coverage_co}, h=${length_co}, lty=2, col=grey(0.3)) - legend(x="topleft", legend=c("Before/after filtering", paste(c("N50: ", "N90: ", "Median cov.: "), c(n50_all, n90_all, round(median(seqs\$coverage))), rep("/", 3), c(n50_filt, n90_filt, round(median(seqs_filt\$coverage))), sep="")), cex=0.8) -} -dev.off() - </configfile> - </configfiles> - <outputs> - <data format="png" name="out_file" /> - </outputs> - <help> -**What it does** - -Using the output of SPAdes (a pair of fasta file and stat file for each of the contigs and scaffolds), it produces a coverage vs. contig plot. Each dot represent a contig/scaffold. Given a coverage and a length cutoff, sequences that do not meet those criteria are shown in red. Some statistics are also given (N50, N90, median contig/scaffold length) both before and after filtering. - -Use the "filter SPAdes output" tool to actually filter sequences. - </help> -</tool> \ No newline at end of file
--- a/tools/spades_3_0/r_wrapper.sh Wed Feb 05 05:21:29 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -#!/bin/sh - -### Run R providing the R script in $1 as standard input and passing -### the remaining arguments on the command line - -# Function that writes a message to stderr and exits -function fail -{ - echo "$@" >&2 - exit 1 -} - -# Ensure R executable is found -which R > /dev/null || fail "'R' is required by this tool but was not found on path" - -# Extract first argument -infile=$1; shift - -# Ensure the file exists -test -f $infile || fail "R input file '$infile' does not exist" - -# Invoke R passing file named by first argument to stdin -R --vanilla --slave $* < $infile
--- a/tools/spades_3_0/spades.pl Wed Feb 05 05:21:29 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -#!/usr/bin/env perl -## A wrapper script to call spades.py and collect its output -use strict; -use warnings; -use File::Temp qw/ tempfile tempdir /; -use File::Copy; -use Getopt::Long; - -# Parse arguments -my ($out_contigs_file, - $out_contigs_stats, - $out_scaffolds_file, - $out_scaffolds_stats, - $out_log_file, - @sysargs) = @ARGV; - -## GetOptions not compatible with parsing the rest of the arguments in an array. -## Keeping the not-so-nice parse-in-one-go method, without named arguments. -# GetOptions( -# 'contigs-file=s' => \$out_contigs_file, -# 'contigs-stats=s' => \$out_contigs_stats, -# 'scaffolds-file=s' => \$out_scaffolds_file, -# 'scaffolds-stats=s' => \$out_scaffolds_stats, -# 'out_log_file=s' => \$out_log_file, -# ); - -# my @sysargs = @ARGV; - -# Create temporary folder to store files, delete after use -#my $output_dir = tempdir( CLEANUP => 0 ); -my $output_dir = 'output_dir'; -# Link "dat" files as fastq, otherwise spades complains about file format - -# Create log handle -open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; - -# Run program -# To do: record time -&runSpades(@sysargs); -&collectOutput(); -&extractCoverageLength($out_contigs_file, $out_contigs_stats); -&extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); -print $log "Done\n"; -close $log; -exit 0; - -# Run spades -sub runSpades { - my $cmd = join(" ", @_) . " -o $output_dir"; - my $return_code = system($cmd); - if ($return_code) { - print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; - die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; - } - return 0; -} - -# Collect output -sub collectOutput{ - # To do: check that the files are there - # Collects output - move "$output_dir/contigs.fasta", $out_contigs_file; - move "$output_dir/scaffolds.fasta", $out_scaffolds_file; - open LOG, '<', "$output_dir/spades.log" - or die "Cannot open log file $output_dir/spades.log: $?"; - print $log $_ while (<LOG>); - return 0; -} - -# Extract -sub extractCoverageLength{ - my ($in, $out) = @_; - open FASTA, '<', $in or die $!; - open TAB, '>', $out or die $!; - print TAB "#name\tlength\tcoverage\n"; - while (<FASTA>){ - next unless /^>/; - chomp; - die "Not all elements found in $_\n" if (! m/^>NODE_(\d+)_length_(\d+)_cov_(\d+\.*\d*)/); - my ($n, $l, $cov) = ($1, $2, $3); - print TAB "NODE_$n\t$l\t$cov\n"; - } - close TAB; -}
--- a/tools/spades_3_0/spades.xml Wed Feb 05 05:21:29 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,193 +0,0 @@ -<tool id="spades" name="spades" version="0.8"> - <description>SPAdes genome assembler for regular and single-cell projects</description> - <requirements> - <requirement type="package" version="3.0.0">spades</requirement> - </requirements> - <command interpreter="perl">spades.pl - $out_contigs - $out_contig_stats - $out_scaffolds - $out_scaffold_stats - $out_log - ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output - spades.py - ## Forces unzipped output, faster - --disable-gzip-output - $sc - $onlyassembler - $careful - -t \${GALAXY_SLOTS:-16} - -k "$kmers" - $iontorrent - ## Sequence files, libraries - #for $i, $library in enumerate( $libraries ) - #set num=$i+1 - #if str( $library.lib_type ) == "paired_end": - #set prefix = 'pe' - #else: - #set prefix = 'mp' - #end if - --$prefix$num-$library.orientation - #for $file in $library.files - #if $file.file_type.type == "separate" - --$prefix$num-1 fastq:$file.file_type.fwd_reads - --$prefix$num-2 fastq:$file.file_type.rev_reads - #elif $file.file_type.type == "interleaved" - --$prefix$num-12 fastq:$file.file_type.interleaved_reads - #elif $file.file_type.type == "unpaired" - --$prefix$num-s fastq:$file.file_type.unpaired_reads - #end if - #end for - #end for - ## PacBio reads - #for $i, $pacbiolib in enumerate( $pacbio ) - --pacbio fastq:$pacbiolib.pacbio_reads - #end for - ## Sanger - #for $i, $sangerlib in enumerate( $sanger ) - --sanger $sangerlib.file_type.type:$sangerlib.file_type.sanger_reads - #end for - ## Contigs - #for $i, $trustedcontigs in enumerate( $trustedcontigs ) - --trusted-contigs $trustedcontigs.file_type.type:$trustedcontigs.file_type.trusted_contigs - #end for - #for $i, $untrustedcontigs in enumerate( $untrustedcontigs ) - --untrusted-contigs $untrustedcontigs.file_type.type:$untrustedcontigs.file_type.untrusted_contigs - #end for - </command> - <inputs> - <param name="sc" type="boolean" truevalue="--sc" falsevalue="" label="Single-cell?" help="This option is required for MDA (single-cell) data."> - <option value="false">No</option> - <option value="true">Yes</option> - </param> - <param name="onlyassembler" type="boolean" truevalue="--only-assembler" falsevalue="" checked="False" label="Run only assembly? (without read error correction)" /> - <param name="careful" type="boolean" truevalue="--careful" falsevalue="" checked="True" label="Careful correction?" help="Tries to reduce number of mismatches and short indels. Also runs MismatchCorrector – a post processing tool, which uses BWA tool (comes with SPAdes)." /> - <param name="kmers" type="text" label="K-mers to use, separated by commas" value="21,33,55" help="Comma-separated list of k-mer sizes to be used (all values must be odd, less than 128, listed in ascending order, and smaller than the read length). The default value is 21,33,55." > - </param> - <param name="iontorrent" type="boolean" truevalue="--iontorrent" falsevalue="" checked="False" label="Libraries are IonTorrent reads?" /> - <!-- Reads --> - <repeat name="libraries" title="Libraries" min="1" help="It is not possible to specify only mate-pair libraries. Scaffolds are not produced if neither a paired-end nor a mate-pair library is provided."> - <param name="lib_type" type="select" label="Library type"> - <option value="paired_end">Paired-end / Single reads</option> - <option value="mate_paired">Mate pairs</option> - </param> - <param name="orientation" type="select" label="Orientation"> - <option value="fr" selected="true">-> <- (fr)</option> - <option value="rf"><- -> (rf)</option> - <option value="ff">-> -> (ff)</option> - </param> - <repeat name="files" title="Files" min="1"> - <conditional name="file_type"> - <param name="type" type="select" label="Select file format"> - <option value="separate">Separate input files</option> - <option value="interleaved">Interleaved files</option> - <option value="unpaired">Unpaired/Single reads</option> - </param> - <when value="separate"> - <param name="fwd_reads" type="data" format="fastq" label="Forward reads" help="FASTQ format" /> - <param name="rev_reads" type="data" format="fastq" label="Reverse reads" help="FASTQ format" /> - </when> - <when value="interleaved"> - <param name="interleaved_reads" type="data" format="fastq" label="Interleaved paired reads" help="FASTQ format" /> - </when> - <when value="unpaired"> - <param name="unpaired_reads" type="data" format="fastq" label="Unpaired reads" help="FASTQ format" /> - </when> - </conditional> - </repeat> - </repeat> - <!-- PacBio --> - <repeat name="pacbio" title="PacBio CLR reads"> - <param name="pacbio_reads" type="data" format="fastq" label="PacBio CLR reads." help="FASTQ format. For PacBio pre-corrected or CCS reads, use single reads above." /> - </repeat> - <!-- Sanger --> - <repeat name="sanger" title="Sanger reads"> - <conditional name="file_type"> - <param name="type" type="select" label="Select file format" help="No read correction is done on Sanger reads, no need to provide quality information."> - <option value="fasta">fasta</option> - <option value="fastq">fastq</option> - </param> - <when value="fasta"> - <param name="sanger_reads" type="data" format="fasta" label="Sanger reads" help="FASTA format" /> - </when> - <when value="fastq"> - <param name="sanger_reads" type="data" format="fastq" label="Sanger reads" help="FASTQ format" /> - </when> - </conditional> - </repeat> - <!-- Contigs --> - <repeat name="trustedcontigs" title="Trusted contigs" help="Reliable contigs of the same genome, which are likely to have no misassemblies and small rate of other errors (e.g. mismatches and indels). This option is not intended for contigs of the related species."> - <conditional name="file_type"> - <param name="type" type="select" label="Select file format"> - <option value="fasta">fasta</option> - <option value="fastq">fastq</option> - </param> - <when value="fasta"> - <param name="trusted_contigs" type="data" format="fasta" label="Trusted contigs" help="FASTA format" /> - </when> - <when value="fastq"> - <param name="trusted_contigs" type="data" format="fastq" label="Trusted contigs" help="FASTQ format" /> - </when> - </conditional> - </repeat> - <repeat name="untrustedcontigs" title="Untrusted contigs" help="Contigs of the same genome, quality of which is average or unknown. Contigs of poor quality can be used but may introduce errors in the assembly. This option is also not intended for contigs of the related species."> - <conditional name="file_type"> - <param name="type" type="select" label="Select file format"> - <option value="fasta">fasta</option> - <option value="fastq">fastq</option> - </param> - <when value="fasta"> - <param name="untrusted_contigs" type="data" format="fasta" label="Untrusted contigs" help="FASTA format" /> - </when> - <when value="fastq"> - <param name="untrusted_contigs" type="data" format="fastq" label="Untrusted contigsz" help="FASTQ format" /> - </when> - </conditional> - </repeat> - </inputs> - <outputs> - <data name="out_contigs" format="fasta" label="SPAdes contigs (fasta)" /> - <data name="out_contig_stats" format="tabular" label="SPAdes contig stats" /> - <data name="out_scaffolds" format="fasta" label="SPAdes scaffolds (fasta)" /> - <data name="out_scaffold_stats" format="tabular" label="SPAdes scaffold stats" /> - <data name="out_log" format="txt" label="SPAdes log" /> - </outputs> - <!-- <tests> - <test> - <param name="sc" value="false" /> - <param name="careful" value="false" /> - <param name="kmers" value="33,55" /> - <param name="type" value="pairedend" /> - <param name="fwd_reads" value="ecoli_1K_1.fq" ftype="fastq" /> - <param name="rev_reads" value="ecoli_1K_2.fq" ftype="fastq" /> - <output name="out_contigs" file="reference_1K.fa" ftype="fasta" compare="re_match" lines_diff="1" /> - </test> - </tests> --> - <help> -**What it does** - -SPAdes – St. Petersburg genome assembler – is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes. - -This wrapper runs SPAdes 3.0.0, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. - -**SPAdes citation** - -Anton Bankevich, Sergey Nurk, Dmitry Antipov, Alexey A. Gurevich, Mikhail Dvorkin, Alexander S. Kulikov, Valery M. Lesin, Sergey I. Nikolenko, Son Pham, Andrey D. Prjibelski, Alexey V. Pyshkin, Alexander V. Sirotkin, Nikolay Vyahhi, Glenn Tesler, Max A. Alekseyev, and Pavel A. Pevzner. Journal of Computational Biology. May 2012, 19(5): 455-477. doi:10.1089/cmb.2012.0021. - -**License** - -SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2. - -This wrapper is copyrighted by Lionel Guy, and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/. - -** Acknowledgments ** - -Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes. - -Nicola Soranzo fixed various bugs. - </help> -</tool>
--- a/tools/spades_3_0/tool_dependencies.xml Wed Feb 05 05:21:29 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="spades" version="3.0.0"> - <install version="1.0"> - <actions> - <action type="download_by_url">http://spades.bioinf.spbau.ru/release3.0.0/SPAdes-3.0.0-Linux.tar.gz</action> - <!-- Copying the whole bin folder --> - <action type="make_directory">$INSTALL_DIR/bin</action> - <action type="make_directory">$INSTALL_DIR/share</action> - <action type="move_directory_files"> - <source_directory>bin</source_directory> - <destination_directory>$INSTALL_DIR/bin</destination_directory> - </action> - <action type="move_directory_files"> - <source_directory>share</source_directory> - <destination_directory>$INSTALL_DIR/share</destination_directory> - </action> - <!-- This is required to have spades accept .dat files --> - <!-- <action type="shell_command">sed -i -e "s/\('\.fa', '\.fasta', '\.fq', '\.fastq', '\.gz'\)/\\1, '.dat'/" $INSTALL_DIR/share/spades/spades_pipeline/support.py</action> --> - <!-- <action type="set_environment"> --> - <!-- <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> --> - <!-- </action> --> - </actions> - </install> - <readme> -This installs SPAdes 3.0.0. - -See manual here http://spades.bioinf.spbau.ru/release3.0.0/manual.html -See also here http://bioinf.spbau.ru/en/spades - </readme> - </package> -</tool_dependency> -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_3_1_1/filter_spades_output.pl Fri Oct 10 14:40:43 2014 -0400 @@ -0,0 +1,106 @@ +#!/usr/bin/env perl -w + +=head1 SYNOPSIS + +filter_spades_output.pl - Filters contigs or scaffolds based on contig length and coverage. + +=head1 USAGE + +filter_spades_output.pl [-c|--coverage-cutoff] [-l|--length-cutoff] [-o|--filtered-out out.fasta] -t|--tab stats.tab seqs.fasta + +=head1 INPUT + +=head2 [-c|--coverage-cutoff] + +Mininum coverage. Contigs with lower coverage will be discarded. Default 10. + +=head2 [-l|--length-cutoff] + +Mininum coverage. Smaller ontigs will be discarded. Default 500. + +=head2 -t|--tab stats.tab + +A tabular file, with three columns: contig name, length, and coverage: + +NODE_1 31438 24.5116 +NODE_2 31354 2316.96 +NODE_3 26948 82.3294 + +Such a file is produced by spades.xml. Contigs should be in the same order as in the fasta file. + +=head2 [-o|--filtered-out out.fasta] + +If specified, filtered out sequences will be written to this file. + +=head2 seqs.fasta + +Sequences in fasta format. Start of IDs must match ids in the tabular file. + +=head1 OUTPUT + +A fasta file on stdout. + +=head1 AUTHOR + +Lionel Guy (lionel.guy@icm.uu.se) + +=head1 DATE + +Thu Aug 29 13:51:13 CEST 2013 + +=cut + +# libraries +use strict; +use Getopt::Long; +use Bio::SeqIO; + +my $coverage_co = 10; +my $length_co = 500; +my $out_filtered; +my $tab_file; + +GetOptions( + 'c|coverage-cutoff=s' => \$coverage_co, + 'l|length-cutoff=s' => \$length_co, + 'o|filtered-out=s' => \$out_filtered, + 't|tab=s' => \$tab_file, +); +my $fasta_file = shift(@ARGV); +die ("No tab file specified") unless ($tab_file); +die ("No fasta file specified") unless ($fasta_file); + +## Read tab file, discard rows with comments +open TAB, '<', $tab_file or die "$?"; +my @stats; +while (<TAB>){ + chomp; + push @stats, $_ unless (/^#/); +} + +## Read fasta +my $seq_in = Bio::SeqIO->new(-file => $fasta_file, + -format => 'fasta'); +my $seq_out = Bio::SeqIO->new(-fh => \*STDOUT, + -format => 'fasta'); +my $seq_out_filt = Bio::SeqIO->new(-file => ">$out_filtered", + -format => 'fasta') if ($out_filtered); +while (my $seq = $seq_in->next_seq){ + my $stat = shift @stats; + die "Less rows in tab than sequences in seq file" unless $stat; + my ($id_tab, $length, $coverage) = split(/\t+/, $stat); + die "id, length or coverate not defined at $stat\n" + unless ($id_tab && $length && $coverage); + my $id_seq = $seq->id; + die "Unmatched ids $id_seq and $id_tab\n" unless ($id_seq =~ /^$id_tab/i); + if ($length >= $length_co && $coverage >= $coverage_co){ + $seq_out->write_seq($seq); + } elsif ($out_filtered){ + $seq_out_filt->write_seq($seq); + } else { + # do nothing + } +} +die "More rows in tab than sequences in seq file" if (scalar(@stats) > 0); +exit 0; +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_3_1_1/filter_spades_output.xml Fri Oct 10 14:40:43 2014 -0400 @@ -0,0 +1,33 @@ +<tool id="filter_spades_output" name="Filter SPAdes output" version="0.1"> + <description>remove low coverage and short contigs/scaffolds</description> + <command interpreter="perl">filter_spades_output.pl + --coverage-cutoff $coverage_co + --length-cutoff $length_co + #if $keep_leftover + --filtered-out $filtered_out + #end if + --tab $stats_in + $fasta_in > $fasta_output + </command> + + <inputs> + <param name="fasta_in" type="data" format="fasta" label="Sequences" help="Contigs or scaffolds. Make sure you input the corresponding stat file" /> + <param name="stats_in" type="data" format="tabular" label="Contig stats" /> + <param name="length_co" type="integer" value="1000" min="0" label="Length cut-off" help="Contigs with length under that value are shown in red" /> + <param name="coverage_co" type="integer" value="10" min="0" label="Coverage cut-off" help="Contigs with length under that value are shown in red" /> + <param name="keep_leftover" type="boolean" checked="false" label="Save filtered-out sequences?" /> + </inputs> + <outputs> + <data format="fasta" name="fasta_output" label="Filtered sequences" /> + <data format="fasta" name="filtered_out" label="Discarded sequences"> + <filter>keep_leftover == "true"</filter> + </data> + </outputs> + <help> +**What it does** + +Using the output of SPAdes (a fasta and a stats file, either from contigs or scaffolds), it filters the fasta files, discarding all sequences that are under a given length or under a given coverage. + +Typically, this is used to discard short contigs, or contaminations. To display a coverage vs. length plot, use the "SPAdes stats" tool in the same package. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_3_1_1/plot_spades_stats.xml Fri Oct 10 14:40:43 2014 -0400 @@ -0,0 +1,80 @@ +<tool id="plot_spades_stats" name="SPAdes stats" version="0.1"> + <description>coverage vs. length plot</description> + <requirements> + <requirement type="package">R</requirement> + </requirements> + <command interpreter="bash">r_wrapper.sh $script_file</command> + + <inputs> + <param name="input_scaffolds" type="data" format="tabular" label="Scaffold stats"/> + <param name="input_contigs" type="data" format="tabular" label="Contig stats"/> + <param name="length_co" type="integer" value="1000" min="0" label="Length cut-off" help="Contigs with length under that value are shown in red"/> + <param name="coverage_co" type="integer" value="10" min="0" label="Coverage cut-off" help="Contigs with length under that value are shown in red"/> + </inputs> + <configfiles> + <configfile name="script_file"> +## Setup R error handling to go to stderr +options( show.error.messages=F, + error = function () { + cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) +} ) +files = c("${input_contigs}", "${input_scaffolds}") +types = c("Contigs", "Scaffolds") + +## Start plotting device +png("${out_file}", w=500, h=1000) +par(mfrow=c(2,1)) + +## Loop over the two files +for (i in 1:length(types)){ + seqs = read.table(files[i], header=FALSE, comment.char="#") + colnames = c("name", "length", "coverage") + names(seqs) = colnames + + ## Stats over all sequences + sl_all = sort(seqs\$length, decreasing=TRUE) + cs_all = cumsum(sl_all) + s_all = sum(seqs\$length) + n50_idx_all = which.min(sl_all[cs_all < 0.5*s_all]) + n90_idx_all = which.min(sl_all[cs_all < 0.9*s_all]) + n50_all = sl_all[n50_idx_all] + n90_all = sl_all[n90_idx_all] + + ## Filter short seqs, redo stats + seqs_filt = seqs[seqs\$length >= ${length_co} & seqs\$coverage >= ${coverage_co},] + if (nrow(seqs_filt) > 0){ + sl_filt = sort(seqs_filt\$length, decreasing=TRUE) + cs_filt = cumsum(sl_filt) + s_filt = sum(seqs_filt\$length) + n50_idx_filt = which.min(sl_filt[cs_filt < 0.5*s_filt]) + n90_idx_filt = which.min(sl_filt[cs_filt < 0.9*s_filt]) + n50_filt = sl_filt[n50_idx_filt] + n90_filt = sl_filt[n90_idx_filt] + } + seqs_bad = seqs[seqs\$length < ${length_co} | seqs\$coverage < ${coverage_co},] + + ## Length vs coverage + plot(length~coverage, data=seqs, log="xy", type="n", main=paste(types[i], ": coverage vs. length", sep=""), xlab="Coverage", ylab="Length") + if (nrow(seqs_bad) > 0){ + points(length~coverage, data=seqs_bad, cex=0.5, col="red") + } + if (nrow(seqs_filt) > 0){ + points(length~coverage, data=seqs_filt, cex=0.5, col="black") + } + abline(v=${coverage_co}, h=${length_co}, lty=2, col=grey(0.3)) + legend(x="topleft", legend=c("Before/after filtering", paste(c("N50: ", "N90: ", "Median cov.: "), c(n50_all, n90_all, round(median(seqs\$coverage))), rep("/", 3), c(n50_filt, n90_filt, round(median(seqs_filt\$coverage))), sep="")), cex=0.8) +} +dev.off() + </configfile> + </configfiles> + <outputs> + <data format="png" name="out_file" /> + </outputs> + <help> +**What it does** + +Using the output of SPAdes (a pair of fasta file and stat file for each of the contigs and scaffolds), it produces a coverage vs. contig plot. Each dot represent a contig/scaffold. Given a coverage and a length cutoff, sequences that do not meet those criteria are shown in red. Some statistics are also given (N50, N90, median contig/scaffold length) both before and after filtering. + +Use the "filter SPAdes output" tool to actually filter sequences. + </help> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_3_1_1/r_wrapper.sh Fri Oct 10 14:40:43 2014 -0400 @@ -0,0 +1,23 @@ +#!/bin/sh + +### Run R providing the R script in $1 as standard input and passing +### the remaining arguments on the command line + +# Function that writes a message to stderr and exits +function fail +{ + echo "$@" >&2 + exit 1 +} + +# Ensure R executable is found +which R > /dev/null || fail "'R' is required by this tool but was not found on path" + +# Extract first argument +infile=$1; shift + +# Ensure the file exists +test -f $infile || fail "R input file '$infile' does not exist" + +# Invoke R passing file named by first argument to stdin +R --vanilla --slave $* < $infile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_3_1_1/spades.pl Fri Oct 10 14:40:43 2014 -0400 @@ -0,0 +1,139 @@ +#!/usr/bin/env perl +## A wrapper script to call spades.py and collect its output +use strict; +use warnings; +use File::Temp qw/ tempfile tempdir /; +use File::Copy; +use Getopt::Long; + +# Parse arguments +my ($out_contigs_file, + $out_contigs_stats, + $out_scaffolds_file, + $out_scaffolds_stats, + $out_log_file, + $new_name, + @sysargs) = @ARGV; + +## GetOptions not compatible with parsing the rest of the arguments in an array. +## Keeping the not-so-nice parse-in-one-go method, without named arguments. +# GetOptions( +# 'contigs-file=s' => \$out_contigs_file, +# 'contigs-stats=s' => \$out_contigs_stats, +# 'scaffolds-file=s' => \$out_scaffolds_file, +# 'scaffolds-stats=s' => \$out_scaffolds_stats, +# 'out_log_file=s' => \$out_log_file, +# ); + +# my @sysargs = @ARGV; + +# Create temporary folder to store files, delete after use +#my $output_dir = tempdir( CLEANUP => 0 ); +my $output_dir = 'output_dir'; +# Link "dat" files as fastq, otherwise spades complains about file format + +# Create log handle +open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; + +# Run program +# To do: record time +runSpades(@sysargs); +collectOutput($new_name); +extractCoverageLength($out_contigs_file, $out_contigs_stats); +extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); +print $log "Done\n"; +close $log; +exit 0; + +# Run spades +sub runSpades { + my $cmd = join(" ", @_) . " -o $output_dir"; + my $return_code = system($cmd); + if ($return_code) { + print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; + die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; + } + return 0; +} + +# Collect output +sub collectOutput{ + my ($new_name) = @_; + + # To do: check that the files are there + # Collects output + if ( not -e "$output_dir/contigs.fasta") { + die "Could not find contigs.fasta file\n"; + } + if ( not -e "$output_dir/scaffolds.fasta") { + die "Could not find scaffolds.fasta file\n"; + } + + #if a new name is given for the contigs and scaffolds, change them before moving them + if ( $new_name ne 'NODE') { + renameContigs($new_name); + } + else { + move "$output_dir/contigs.fasta", $out_contigs_file; + move "$output_dir/scaffolds.fasta", $out_scaffolds_file; + } + + + + open LOG, '<', "$output_dir/spades.log" + or die "Cannot open log file $output_dir/spades.log: $?"; + print $log $_ while (<LOG>); + return 0; +} + +#Change name in contig and scaffolds file +sub renameContigs{ + my ($name) = @_; + + open my $in, '<',"$output_dir/contigs.fasta" or die $!; + open my $out,'>', $out_contigs_file; + + while ( my $line = <$in>) { + #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. + #also move the remainder of the length + if ( $line =~ />NODE_(\d+)_(.+)/) { + $line = ">$name" . "_$1 $2\n"; + } + print $out $line; + } + close $in; + close $out; + + + open $in, '<',"$output_dir/scaffolds.fasta" or die $!; + open $out,'>', $out_scaffolds_file; + + while ( my $line = <$in>) { + #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. + #also move the remainder of the length + if ( $line =~ />NODE_(\d+)_(.+)/) { + $line = ">$name" . "_$1 $2\n"; + } + print $out $line; + } + close $in; + close $out; + +} + + +# Extract +sub extractCoverageLength{ + my ($in, $out) = @_; + open FASTA, '<', $in or die $!; + open TAB, '>', $out or die $!; + print TAB "#name\tlength\tcoverage\n"; + while (<FASTA>){ + next unless /^>/; + chomp; + die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/); + my ($name,$n, $l, $cov) = ($1,$2, $3, $4); + print TAB "$name" . "_$n\t$l\t$cov\n"; + } + close TAB; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_3_1_1/spades.xml Fri Oct 10 14:40:43 2014 -0400 @@ -0,0 +1,210 @@ +<tool id="spades" name="spades" version="0.9"> + <description>SPAdes genome assembler for regular and single-cell projects</description> + <requirements> + <requirement type="package" version="3.1.1">spades</requirement> + </requirements> + <command interpreter="perl">spades.pl + $out_contigs + $out_contig_stats + $out_scaffolds + $out_scaffold_stats + $out_log + + ## if the first library file is a paired-collection, use the key as the name + #if $libraries[0].files[0].file_type.type == "paired-collection": + $libraries[0].files[0].file_type.fastq_collection.name + #else: + NODE + #end if + ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output + spades.py + ## Forces unzipped output, faster + --disable-gzip-output + $sc + $onlyassembler + $careful + -t \${GALAXY_SLOTS:-16} + -k "$kmers" + $iontorrent + ## Sequence files, libraries + #for $i, $library in enumerate( $libraries ) + #set num=$i+1 + #if str( $library.lib_type ) == "paired_end": + #set prefix = 'pe' + #elif str( $library.lib_type ) == "mate_paired": + #set prefix = 'mp' + #else: + $set prefix = 'hqmp' + #end if + --$prefix$num-$library.orientation + #for $file in $library.files + #if $file.file_type.type == "separate" + --$prefix$num-1 fastq:$file.file_type.fwd_reads + --$prefix$num-2 fastq:$file.file_type.rev_reads + #elif $file.file_type.type == "interleaved" + --$prefix$num-12 fastq:$file.file_type.interleaved_reads + #elif $file.file_type.type == "unpaired" + --$prefix$num-s fastq:$file.file_type.unpaired_reads + #elif $file.file_type.type == "paired-collection" + --$prefix$num-1 fastq:$file.file_type.fastq_collection.forward + --$prefix$num-2 fastq:$file.file_type.fastq_collection.reverse + #end if + #end for + #end for + ## PacBio reads + #for $i, $pacbiolib in enumerate( $pacbio ) + --pacbio fastq:$pacbiolib.pacbio_reads + #end for + ## Sanger + #for $i, $sangerlib in enumerate( $sanger ) + --sanger $sangerlib.file_type.type:$sangerlib.file_type.sanger_reads + #end for + ## Contigs + #for $i, $trustedcontigs in enumerate( $trustedcontigs ) + --trusted-contigs $trustedcontigs.file_type.type:$trustedcontigs.file_type.trusted_contigs + #end for + #for $i, $untrustedcontigs in enumerate( $untrustedcontigs ) + --untrusted-contigs $untrustedcontigs.file_type.type:$untrustedcontigs.file_type.untrusted_contigs + #end for + </command> + <inputs> + <param name="sc" type="boolean" truevalue="--sc" falsevalue="" label="Single-cell?" help="This option is required for MDA (single-cell) data."> + <option value="false">No</option> + <option value="true">Yes</option> + </param> + <param name="onlyassembler" type="boolean" truevalue="--only-assembler" falsevalue="" checked="False" label="Run only assembly? (without read error correction)" /> + <param name="careful" type="boolean" truevalue="--careful" falsevalue="" checked="True" label="Careful correction?" help="Tries to reduce number of mismatches and short indels. Also runs MismatchCorrector – a post processing tool, which uses BWA tool (comes with SPAdes)." /> + <param name="kmers" type="text" label="K-mers to use, separated by commas" value="21,33,55" help="Comma-separated list of k-mer sizes to be used (all values must be odd, less than 128, listed in ascending order, and smaller than the read length). The default value is 21,33,55." > + </param> + <param name="iontorrent" type="boolean" truevalue="--iontorrent" falsevalue="" checked="False" label="Libraries are IonTorrent reads?" /> + <!-- Reads --> + <repeat name="libraries" title="Libraries" min="1" help="It is not possible to specify only mate-pair libraries. Scaffolds are not produced if neither a paired-end nor a mate-pair library is provided."> + <param name="lib_type" type="select" label="Library type"> + <option value="paired_end">Paired-end / Single reads</option> + <option value="mate_paired">Mate pairs</option> + <option value="high_mate_paired">High Quality Mate pairs</option> + </param> + <param name="orientation" type="select" label="Orientation"> + <option value="fr" selected="true">-> <- (fr)</option> + <option value="rf"><- -> (rf)</option> + <option value="ff">-> -> (ff)</option> + </param> + <repeat name="files" title="Files" min="1"> + <conditional name="file_type"> + <param name="type" type="select" label="Select file format"> + <option value="separate">Separate input files</option> + <option value="interleaved">Interleaved files</option> + <option value="unpaired">Unpaired/Single reads</option> + <option value="paired-collection">Paired List Collection</option> + </param> + <when value="separate"> + <param name="fwd_reads" type="data" format="fastq" label="Forward reads" help="FASTQ format" /> + <param name="rev_reads" type="data" format="fastq" label="Reverse reads" help="FASTQ format" /> + </when> + <when value="interleaved"> + <param name="interleaved_reads" type="data" format="fastq" label="Interleaved paired reads" help="FASTQ format" /> + </when> + <when value="unpaired"> + <param name="unpaired_reads" type="data" format="fastq" label="Unpaired reads" help="FASTQ format" /> + </when> + <when value="paired-collection"> + <param name="fastq_collection" type="data_collection" label="Paired-end reads collection" optional="false" format="fastq" collection_type="paired" help="FASTQ format" /> + </when> + </conditional> + </repeat> + </repeat> + <!-- PacBio --> + <repeat name="pacbio" title="PacBio CLR reads"> + <param name="pacbio_reads" type="data" format="fastq" label="PacBio CLR reads." help="FASTQ format. For PacBio pre-corrected or CCS reads, use single reads above." /> + </repeat> + <!-- Sanger --> + <repeat name="sanger" title="Sanger reads"> + <conditional name="file_type"> + <param name="type" type="select" label="Select file format" help="No read correction is done on Sanger reads, no need to provide quality information."> + <option value="fasta">fasta</option> + <option value="fastq">fastq</option> + </param> + <when value="fasta"> + <param name="sanger_reads" type="data" format="fasta" label="Sanger reads" help="FASTA format" /> + </when> + <when value="fastq"> + <param name="sanger_reads" type="data" format="fastq" label="Sanger reads" help="FASTQ format" /> + </when> + </conditional> + </repeat> + <!-- Contigs --> + <repeat name="trustedcontigs" title="Trusted contigs" help="Reliable contigs of the same genome, which are likely to have no misassemblies and small rate of other errors (e.g. mismatches and indels). This option is not intended for contigs of the related species."> + <conditional name="file_type"> + <param name="type" type="select" label="Select file format"> + <option value="fasta">fasta</option> + <option value="fastq">fastq</option> + </param> + <when value="fasta"> + <param name="trusted_contigs" type="data" format="fasta" label="Trusted contigs" help="FASTA format" /> + </when> + <when value="fastq"> + <param name="trusted_contigs" type="data" format="fastq" label="Trusted contigs" help="FASTQ format" /> + </when> + </conditional> + </repeat> + <repeat name="untrustedcontigs" title="Untrusted contigs" help="Contigs of the same genome, quality of which is average or unknown. Contigs of poor quality can be used but may introduce errors in the assembly. This option is also not intended for contigs of the related species."> + <conditional name="file_type"> + <param name="type" type="select" label="Select file format"> + <option value="fasta">fasta</option> + <option value="fastq">fastq</option> + </param> + <when value="fasta"> + <param name="untrusted_contigs" type="data" format="fasta" label="Untrusted contigs" help="FASTA format" /> + </when> + <when value="fastq"> + <param name="untrusted_contigs" type="data" format="fastq" label="Untrusted contigsz" help="FASTQ format" /> + </when> + </conditional> + </repeat> + </inputs> + <outputs> + <data name="out_contigs" format="fasta" label="SPAdes contigs (fasta)" /> + <data name="out_contig_stats" format="tabular" label="SPAdes contig stats" /> + <data name="out_scaffolds" format="fasta" label="SPAdes scaffolds (fasta)" /> + <data name="out_scaffold_stats" format="tabular" label="SPAdes scaffold stats" /> + <data name="out_log" format="txt" label="SPAdes log" /> + </outputs> + <!-- <tests> + <test> + <param name="sc" value="false" /> + <param name="careful" value="false" /> + <param name="kmers" value="33,55" /> + <param name="type" value="pairedend" /> + <param name="fwd_reads" value="ecoli_1K_1.fq" ftype="fastq" /> + <param name="rev_reads" value="ecoli_1K_2.fq" ftype="fastq" /> + <output name="out_contigs" file="reference_1K.fa" ftype="fasta" compare="re_match" lines_diff="1" /> + </test> + </tests> --> + <help> +**What it does** + +SPAdes – St. Petersburg genome assembler – is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes. + +This wrapper runs SPAdes 3.1.1, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. + +**SPAdes citation** + +Anton Bankevich, Sergey Nurk, Dmitry Antipov, Alexey A. Gurevich, Mikhail Dvorkin, Alexander S. Kulikov, Valery M. Lesin, Sergey I. Nikolenko, Son Pham, Andrey D. Prjibelski, Alexey V. Pyshkin, Alexander V. Sirotkin, Nikolay Vyahhi, Glenn Tesler, Max A. Alekseyev, and Pavel A. Pevzner. Journal of Computational Biology. May 2012, 19(5): 455-477. doi:10.1089/cmb.2012.0021. + +**License** + +SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2. + +This wrapper is copyrighted by Lionel Guy,Philip Mabon and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/. + +** Acknowledgments ** + +Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes. + +Nicola Soranzo fixed various bugs. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_3_1_1/tool_dependencies.xml Fri Oct 10 14:40:43 2014 -0400 @@ -0,0 +1,33 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="spades" version="3.1.1"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://spades.bioinf.spbau.ru/release3.1.1/SPAdes-3.1.1-Linux.tar.gz</action> + <!-- Copying the whole bin folder --> + <action type="make_directory">$INSTALL_DIR/bin</action> + <action type="make_directory">$INSTALL_DIR/share</action> + <action type="move_directory_files"> + <source_directory>bin</source_directory> + <destination_directory>$INSTALL_DIR/bin</destination_directory> + </action> + <action type="move_directory_files"> + <source_directory>share</source_directory> + <destination_directory>$INSTALL_DIR/share</destination_directory> + </action> + <!-- This is required to have spades accept .dat files --> + <!-- <action type="shell_command">sed -i -e "s/\('\.fa', '\.fasta', '\.fq', '\.fastq', '\.gz'\)/\\1, '.dat'/" $INSTALL_DIR/share/spades/spades_pipeline/support.py</action> --> + <!-- <action type="set_environment"> --> + <!-- <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> --> + <!-- </action> --> + </actions> + </install> + <readme> +This installs SPAdes 3.1.1. + +See manual here http://spades.bioinf.spbau.ru/release3.1.1/manual.html +See also here http://bioinf.spbau.ru/en/spades + </readme> + </package> +</tool_dependency> +