# HG changeset patch # User nml # Date 1465240386 14400 # Node ID 27b90e43e2d838791d4d434508100d7edeefd16b planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc diff -r 000000000000 -r 27b90e43e2d8 CHANGE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHANGE Mon Jun 06 15:13:06 2016 -0400 @@ -0,0 +1,4 @@ +1.0 +=== + - Supports SPades 3.8 + - NEW: Added plasmidSPAdes – a pipeline designed for extracting and assembling plasmids from WGS data sets. diff -r 000000000000 -r 27b90e43e2d8 plasmidSPAdes.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plasmidSPAdes.xml Mon Jun 06 15:13:06 2016 -0400 @@ -0,0 +1,243 @@ + + genome assembler for plasmids + + spades + + spades.pl + $out_contigs + $out_contig_stats + $out_scaffolds + $out_scaffold_stats + $out_log + + ## if the first fileset is a paired-collection, use the key as the name + #if $files[0].file_type.type == "paired-collection": + $files[0].file_type.fastq_collection.name + #else: + NODE + #end if + ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output + spades.py + ## Forces unzipped output, faster + --disable-gzip-output + --plasmid + $onlyassembler + $careful + -t \${GALAXY_SLOTS:-16} + -k "$kmers" + + #if $cov.state == "auto": + --cov-cutoff 'auto' + #elif $cov.state == "value": + --cov-cutoff '$cov.cutoff' + #end if + + $iontorrent + + ## Sequence files + #set num=1 + #if str( $lib_type ) == "paired_end": + #set prefix = 'pe' + #elif str( $lib_type ) == "mate_paired": + #set prefix = 'mp' + #elif str( $lib_type ) == "nxmate_paired": + #set prefix = 'nxmate' + #else: + #set prefix = 'hqmp' + #end if + --$prefix$num-$orientation + #for $file in $files + #if $file.file_type.type == "separate" + --$prefix$num-1 fastq:$file.file_type.fwd_reads + --$prefix$num-2 fastq:$file.file_type.rev_reads + #elif $file.file_type.type == "interleaved" + --$prefix$num-12 fastq:$file.file_type.interleaved_reads + #elif $file.file_type.type == "unpaired" + --$prefix$num-s fastq:$file.file_type.unpaired_reads + #elif $file.file_type.type == "paired-collection" + --$prefix$num-1 fastq:$file.file_type.fastq_collection.forward + --$prefix$num-2 fastq:$file.file_type.fastq_collection.reverse + #end if + #end for + + ## PacBio reads + #for $i, $pacbiolib in enumerate( $pacbio ) + --pacbio fastq:$pacbiolib.pacbio_reads + #end for + ## Nanopore + #for $i, $nanoporelib in enumerate( $nanopore ) + --nanopore fastq:$nanoporelib.nanopore_reads + #end for + ## Sanger + #for $i, $sangerlib in enumerate( $sanger ) + --sanger $sangerlib.file_type.type:$sangerlib.file_type.sanger_reads + #end for + ## Contigs + #for $i, $trustedcontigs in enumerate( $trustedcontigs ) + --trusted-contigs $trustedcontigs.file_type.type:$trustedcontigs.file_type.trusted_contigs + #end for + #for $i, $untrustedcontigs in enumerate( $untrustedcontigs ) + --untrusted-contigs $untrustedcontigs.file_type.type:$untrustedcontigs.file_type.untrusted_contigs + #end for + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +SPAdes – St. Petersburg genome assembler – is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes. + +This wrapper runs SPAdes 3.8, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. + +**License** + +SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2. + +This wrapper is copyrighted by Philip Mabon and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/. + +** Acknowledgments ** + +Original wrapper developed by Lionel Guy. + +Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes. + +Nicola Soranzo fixed various bugs. + + + 10.1089/cmb.2012.0021 + + diff -r 000000000000 -r 27b90e43e2d8 spades.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spades.pl Mon Jun 06 15:13:06 2016 -0400 @@ -0,0 +1,124 @@ +#!/usr/bin/env perl +## A wrapper script to call spades.py and collect its output +use strict; +use warnings; +use File::Temp qw/ tempfile tempdir /; +use File::Copy; +use Getopt::Long; + +# Parse arguments +my ($out_contigs_file, + $out_contigs_stats, + $out_scaffolds_file, + $out_scaffolds_stats, + $out_log_file, + $new_name, + @sysargs) = @ARGV; + + +my $output_dir = 'output_dir'; + +# Create log handle +open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; + +# Run program +runSpades(@sysargs); +collectOutput($new_name); +extractCoverageLength($out_contigs_file, $out_contigs_stats); +extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); +print $log "Done\n"; +close $log; +exit 0; + +# Run spades +sub runSpades { + my $cmd = join(" ", @_) . " -o $output_dir"; + my $return_code = system($cmd); + if ($return_code) { + print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; + die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; + } + return 0; +} + +# Collect output +sub collectOutput{ + my ($new_name) = @_; + + # To do: check that the files are there + # Collects output + if ( not -e "$output_dir/contigs.fasta") { + die "Could not find contigs.fasta file\n"; + } + if ( not -e "$output_dir/scaffolds.fasta") { + die "Could not find scaffolds.fasta file\n"; + } + + #if a new name is given for the contigs and scaffolds, change them before moving them + if ( $new_name ne 'NODE') { + renameContigs($new_name); + } + else { + move "$output_dir/contigs.fasta", $out_contigs_file; + move "$output_dir/scaffolds.fasta", $out_scaffolds_file; + } + + + + open LOG, '<', "$output_dir/spades.log" + or die "Cannot open log file $output_dir/spades.log: $?"; + print $log $_ while (); + return 0; +} + +#Change name in contig and scaffolds file +sub renameContigs{ + my ($name) = @_; + + open my $in, '<',"$output_dir/contigs.fasta" or die $!; + open my $out,'>', $out_contigs_file; + + while ( my $line = <$in>) { + #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. + #also move the remainder of the length + if ( $line =~ />NODE_(\d+)_(.+)/) { + $line = ">$name" . "_$1 $2\n"; + } + print $out $line; + } + close $in; + close $out; + + + open $in, '<',"$output_dir/scaffolds.fasta" or die $!; + open $out,'>', $out_scaffolds_file; + + while ( my $line = <$in>) { + #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. + #also move the remainder of the length + if ( $line =~ />NODE_(\d+)_(.+)/) { + $line = ">$name" . "_$1 $2\n"; + } + print $out $line; + } + close $in; + close $out; + +} + + +# Extract +sub extractCoverageLength{ + my ($in, $out) = @_; + open FASTA, '<', $in or die $!; + open TAB, '>', $out or die $!; + print TAB "#name\tlength\tcoverage\n"; + while (){ + next unless /^>/; + chomp; + die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)_(component_\d+)/); + my ($name,$n, $l, $cov,$component) = ($1,$2, $3, $4,$5); + print TAB "$name" . "_$n" . "_$component\t$l\t$cov\n"; + } + close TAB; +} diff -r 000000000000 -r 27b90e43e2d8 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Jun 06 15:13:06 2016 -0400 @@ -0,0 +1,6 @@ + + + + + +