# HG changeset patch # User lionelguy # Date 1376917577 14400 # Node ID 0f8b2da62d7d6a3b718d66fe9ae5d83c15e974cf # Parent a407a6ed437dd714746545339b8f58658d0023b5 Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig. diff -r a407a6ed437d -r 0f8b2da62d7d tools/spades_2_4/spades.pl --- a/tools/spades_2_4/spades.pl Wed Jul 17 06:07:29 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -#!/usr/bin/env perl -## A wrapper script to call spades.py and collect its output -use strict; -use warnings; -use File::Temp qw/ tempfile tempdir /; -use File::Copy; - -# Parse arguments -my ($out_contigs_file, $out_scaffolds_file, $out_log_file, @sysargs) = @ARGV; - -# Create temporary folder to store files, delete after use -#my $output_dir = tempdir( CLEANUP => 0 ); -my $output_dir = tempdir( CLEANUP => 1 ); -# Link "dat" files as fastq, otherwise spades complains about file format - -# Create log handle -open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; - -# Run program -# To do: record time -&runSpades(@sysargs); -&collectOutput(); -print $log "Done\n"; -close $log; -exit 0; - -# Run spades -sub runSpades { - my $cmd = join(" ", @_) . " -o $output_dir"; - my $return_code = system($cmd); - if ($return_code) { - print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; - die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; - } - return 0; -} -# Collect output -sub collectOutput{ - # To do: check that the files are there - # Collects output - move "$output_dir/contigs.fasta", $out_contigs_file; - move "$output_dir/scaffolds.fasta", $out_scaffolds_file; - open LOG, '<', "$output_dir/spades.log" - or die "Cannot open log file $output_dir/spades.log: $?"; - print $log $_ while (); - return 0; -} diff -r a407a6ed437d -r 0f8b2da62d7d tools/spades_2_4/spades.xml --- a/tools/spades_2_4/spades.xml Wed Jul 17 06:07:29 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ - - SPAdes genome assembler for regular and single-cell projects - - spades - - spades.pl $out_contigs $out_scaffolds $out_log - ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output - spades.py - ## TODO: kmers, threads, other options (-sc for single-cell) - #if $sc == "true": - --sc - #end if - #if $careful == "true": - --careful - #end if - #if $rectangle == "true" - --rectangle - #end if - -t $threads - -k $kmers - -i $iterations - ##--phred-offset - ## Sequence files - #for $i, $s in enumerate( $reads ) - #if $s.read_type.type == "pairedend" - -1 $s.read_type.fwd_reads - -2 $s.read_type.rev_reads - #elif $s.read_type.type == "interleaved" - --12 $s.read_type.interleaved_reads - #elif $s.read_type.type == "unpaired" - -s $s.read_type.unpaired_reads - #end if - #end for - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Runs SPAdes 2.4.0, collects the output, and throws away all the temporary files. - -**Citation** - -Anton Bankevich, Sergey Nurk, Dmitry Antipov, Alexey A. Gurevich, Mikhail Dvorkin, Alexander S. Kulikov, Valery M. Lesin, Sergey I. Nikolenko, Son Pham, Andrey D. Prjibelski, Alexey V. Pyshkin, Alexander V. Sirotkin, Nikolay Vyahhi, Glenn Tesler, Max A. Alekseyev, and Pavel A. Pevzner. Journal of Computational Biology. May 2012, 19(5): 455-477. doi:10.1089/cmb.2012.0021. - - - diff -r a407a6ed437d -r 0f8b2da62d7d tools/spades_2_4/tool_dependencies.xml --- a/tools/spades_2_4/tool_dependencies.xml Wed Jul 17 06:07:29 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ - - - - - - http://spades.bioinf.spbau.ru/release2.4.0/SPAdes-2.4.0-Linux.tar.gz - - $INSTALL_DIR/bin - $INSTALL_DIR/share - - bin - $INSTALL_DIR/bin - - - share - $INSTALL_DIR/share - - - sed -i -e "s/\('\.fa', '\.fasta', '\.fq', '\.fastq', '\.gz'\)/\\1, '.dat'/" $INSTALL_DIR/bin/spades.py - - $INSTALL_DIR/bin - - - - -This installs SPAdes 2.4.0. - -See manual here http://spades.bioinf.spbau.ru/release2.4.0/manual.html -See also here http://bioinf.spbau.ru/en/spades - - - - diff -r a407a6ed437d -r 0f8b2da62d7d tools/spades_2_5/spades.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_2_5/spades.pl Mon Aug 19 09:06:17 2013 -0400 @@ -0,0 +1,84 @@ +#!/usr/bin/env perl +## A wrapper script to call spades.py and collect its output +use strict; +use warnings; +use File::Temp qw/ tempfile tempdir /; +use File::Copy; +use Getopt::Long; + +# Parse arguments +my ($out_contigs_file, + $out_contigs_stats, + $out_scaffolds_file, + $out_scaffolds_stats, + $out_log_file, + @sysargs) = @ARGV; + +## GetOptions not compatible with parsing the rest of the arguments in an array. +## Keeping the not-so-nice parse-in-one-go method, without named arguments. +# GetOptions( +# 'contigs-file=s' => \$out_contigs_file, +# 'contigs-stats=s' => \$out_contigs_stats, +# 'scaffolds-file=s' => \$out_scaffolds_file, +# 'scaffolds-stats=s' => \$out_scaffolds_stats, +# 'out_log_file=s' => \$out_log_file, +# ); + +# my @sysargs = @ARGV; + +# Create temporary folder to store files, delete after use +#my $output_dir = tempdir( CLEANUP => 0 ); +my $output_dir = tempdir( CLEANUP => 1 ); +# Link "dat" files as fastq, otherwise spades complains about file format + +# Create log handle +open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; + +# Run program +# To do: record time +&runSpades(@sysargs); +&collectOutput(); +&extractCoverageLength($out_contigs_file, $out_contigs_stats); +&extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); +print $log "Done\n"; +close $log; +exit 0; + +# Run spades +sub runSpades { + my $cmd = join(" ", @_) . " -o $output_dir"; + my $return_code = system($cmd); + if ($return_code) { + print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; + die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; + } + return 0; +} + +# Collect output +sub collectOutput{ + # To do: check that the files are there + # Collects output + move "$output_dir/contigs.fasta", $out_contigs_file; + move "$output_dir/scaffolds.fasta", $out_scaffolds_file; + open LOG, '<', "$output_dir/spades.log" + or die "Cannot open log file $output_dir/spades.log: $?"; + print $log $_ while (); + return 0; +} + +# Extract +sub extractCoverageLength{ + my ($in, $out) = @_; + open FASTA, '<', $in or die $!; + open TAB, '>', $out or die $!; + while (){ + next unless /^>/; + chomp; + my @a = split(/\s/, $_); + my ($NODE, $n, $LENGTH, $l, $COV, $cov) = split(/_/, $a[0]); + die "Not all elements found in $_\n" unless ($n && $l && $cov); + print TAB "$n\t$l\t$cov\n"; + } + close TAB; +} diff -r a407a6ed437d -r 0f8b2da62d7d tools/spades_2_5/spades.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_2_5/spades.xml Mon Aug 19 09:06:17 2013 -0400 @@ -0,0 +1,111 @@ + + SPAdes genome assembler for regular and single-cell projects + + spades + + spades.pl + $out_contigs + $out_contig_stats + $out_scaffolds + $out_scaffold_stats + $out_log + ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output + spades.py + ## TODO: kmers, threads, other options (-sc for single-cell) + #if $sc == "true": + --sc + #end if + #if $careful == "true": + --careful + #end if + #if $rectangle == "true" + --rectangle + #end if + -t $threads + -k $kmers + -i $iterations + ##--phred-offset + ## Sequence files + #for $i, $s in enumerate( $reads ) + #if $s.read_type.type == "pairedend" + -1 $s.read_type.fwd_reads + -2 $s.read_type.rev_reads + #elif $s.read_type.type == "interleaved" + --12 $s.read_type.interleaved_reads + #elif $s.read_type.type == "unpaired" + -s $s.read_type.unpaired_reads + #end if + #end for + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Runs SPAdes 2.5.0, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. + +**Citation** + +Anton Bankevich, Sergey Nurk, Dmitry Antipov, Alexey A. Gurevich, Mikhail Dvorkin, Alexander S. Kulikov, Valery M. Lesin, Sergey I. Nikolenko, Son Pham, Andrey D. Prjibelski, Alexey V. Pyshkin, Alexander V. Sirotkin, Nikolay Vyahhi, Glenn Tesler, Max A. Alekseyev, and Pavel A. Pevzner. Journal of Computational Biology. May 2012, 19(5): 455-477. doi:10.1089/cmb.2012.0021. + + + diff -r a407a6ed437d -r 0f8b2da62d7d tools/spades_2_5/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/spades_2_5/tool_dependencies.xml Mon Aug 19 09:06:17 2013 -0400 @@ -0,0 +1,33 @@ + + + + + + http://spades.bioinf.spbau.ru/release2.5.0/SPAdes-2.5.0-Linux.tar.gz + + $INSTALL_DIR/bin + $INSTALL_DIR/share + + bin + $INSTALL_DIR/bin + + + share + $INSTALL_DIR/share + + + sed -i -e "s/\('\.fa', '\.fasta', '\.fq', '\.fastq', '\.gz'\)/\\1, '.dat'/" $INSTALL_DIR/share/spades/spades_pipeline/support.py + + $INSTALL_DIR/bin + + + + +This installs SPAdes 2.5.0. + +See manual here http://spades.bioinf.spbau.ru/release2.5.0/manual.html +See also here http://bioinf.spbau.ru/en/spades + + + +