# HG changeset patch
# User nml
# Date 1465240386 14400
# Node ID 27b90e43e2d838791d4d434508100d7edeefd16b
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
diff -r 000000000000 -r 27b90e43e2d8 CHANGE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/CHANGE Mon Jun 06 15:13:06 2016 -0400
@@ -0,0 +1,4 @@
+1.0
+===
+ - Supports SPades 3.8
+ - NEW: Added plasmidSPAdes – a pipeline designed for extracting and assembling plasmids from WGS data sets.
diff -r 000000000000 -r 27b90e43e2d8 plasmidSPAdes.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plasmidSPAdes.xml Mon Jun 06 15:13:06 2016 -0400
@@ -0,0 +1,243 @@
+
+ genome assembler for plasmids
+
+ spades
+
+ spades.pl
+ $out_contigs
+ $out_contig_stats
+ $out_scaffolds
+ $out_scaffold_stats
+ $out_log
+
+ ## if the first fileset is a paired-collection, use the key as the name
+ #if $files[0].file_type.type == "paired-collection":
+ $files[0].file_type.fastq_collection.name
+ #else:
+ NODE
+ #end if
+ ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output
+ spades.py
+ ## Forces unzipped output, faster
+ --disable-gzip-output
+ --plasmid
+ $onlyassembler
+ $careful
+ -t \${GALAXY_SLOTS:-16}
+ -k "$kmers"
+
+ #if $cov.state == "auto":
+ --cov-cutoff 'auto'
+ #elif $cov.state == "value":
+ --cov-cutoff '$cov.cutoff'
+ #end if
+
+ $iontorrent
+
+ ## Sequence files
+ #set num=1
+ #if str( $lib_type ) == "paired_end":
+ #set prefix = 'pe'
+ #elif str( $lib_type ) == "mate_paired":
+ #set prefix = 'mp'
+ #elif str( $lib_type ) == "nxmate_paired":
+ #set prefix = 'nxmate'
+ #else:
+ #set prefix = 'hqmp'
+ #end if
+ --$prefix$num-$orientation
+ #for $file in $files
+ #if $file.file_type.type == "separate"
+ --$prefix$num-1 fastq:$file.file_type.fwd_reads
+ --$prefix$num-2 fastq:$file.file_type.rev_reads
+ #elif $file.file_type.type == "interleaved"
+ --$prefix$num-12 fastq:$file.file_type.interleaved_reads
+ #elif $file.file_type.type == "unpaired"
+ --$prefix$num-s fastq:$file.file_type.unpaired_reads
+ #elif $file.file_type.type == "paired-collection"
+ --$prefix$num-1 fastq:$file.file_type.fastq_collection.forward
+ --$prefix$num-2 fastq:$file.file_type.fastq_collection.reverse
+ #end if
+ #end for
+
+ ## PacBio reads
+ #for $i, $pacbiolib in enumerate( $pacbio )
+ --pacbio fastq:$pacbiolib.pacbio_reads
+ #end for
+ ## Nanopore
+ #for $i, $nanoporelib in enumerate( $nanopore )
+ --nanopore fastq:$nanoporelib.nanopore_reads
+ #end for
+ ## Sanger
+ #for $i, $sangerlib in enumerate( $sanger )
+ --sanger $sangerlib.file_type.type:$sangerlib.file_type.sanger_reads
+ #end for
+ ## Contigs
+ #for $i, $trustedcontigs in enumerate( $trustedcontigs )
+ --trusted-contigs $trustedcontigs.file_type.type:$trustedcontigs.file_type.trusted_contigs
+ #end for
+ #for $i, $untrustedcontigs in enumerate( $untrustedcontigs )
+ --untrusted-contigs $untrustedcontigs.file_type.type:$untrustedcontigs.file_type.untrusted_contigs
+ #end for
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+SPAdes – St. Petersburg genome assembler – is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes.
+
+This wrapper runs SPAdes 3.8, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage.
+
+**License**
+
+SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2.
+
+This wrapper is copyrighted by Philip Mabon and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
+
+** Acknowledgments **
+
+Original wrapper developed by Lionel Guy.
+
+Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes.
+
+Nicola Soranzo fixed various bugs.
+
+
+ 10.1089/cmb.2012.0021
+
+
diff -r 000000000000 -r 27b90e43e2d8 spades.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/spades.pl Mon Jun 06 15:13:06 2016 -0400
@@ -0,0 +1,124 @@
+#!/usr/bin/env perl
+## A wrapper script to call spades.py and collect its output
+use strict;
+use warnings;
+use File::Temp qw/ tempfile tempdir /;
+use File::Copy;
+use Getopt::Long;
+
+# Parse arguments
+my ($out_contigs_file,
+ $out_contigs_stats,
+ $out_scaffolds_file,
+ $out_scaffolds_stats,
+ $out_log_file,
+ $new_name,
+ @sysargs) = @ARGV;
+
+
+my $output_dir = 'output_dir';
+
+# Create log handle
+open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
+
+# Run program
+runSpades(@sysargs);
+collectOutput($new_name);
+extractCoverageLength($out_contigs_file, $out_contigs_stats);
+extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
+print $log "Done\n";
+close $log;
+exit 0;
+
+# Run spades
+sub runSpades {
+ my $cmd = join(" ", @_) . " -o $output_dir";
+ my $return_code = system($cmd);
+ if ($return_code) {
+ print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
+ die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
+ }
+ return 0;
+}
+
+# Collect output
+sub collectOutput{
+ my ($new_name) = @_;
+
+ # To do: check that the files are there
+ # Collects output
+ if ( not -e "$output_dir/contigs.fasta") {
+ die "Could not find contigs.fasta file\n";
+ }
+ if ( not -e "$output_dir/scaffolds.fasta") {
+ die "Could not find scaffolds.fasta file\n";
+ }
+
+ #if a new name is given for the contigs and scaffolds, change them before moving them
+ if ( $new_name ne 'NODE') {
+ renameContigs($new_name);
+ }
+ else {
+ move "$output_dir/contigs.fasta", $out_contigs_file;
+ move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
+ }
+
+
+
+ open LOG, '<', "$output_dir/spades.log"
+ or die "Cannot open log file $output_dir/spades.log: $?";
+ print $log $_ while ();
+ return 0;
+}
+
+#Change name in contig and scaffolds file
+sub renameContigs{
+ my ($name) = @_;
+
+ open my $in, '<',"$output_dir/contigs.fasta" or die $!;
+ open my $out,'>', $out_contigs_file;
+
+ while ( my $line = <$in>) {
+ #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
+ #also move the remainder of the length
+ if ( $line =~ />NODE_(\d+)_(.+)/) {
+ $line = ">$name" . "_$1 $2\n";
+ }
+ print $out $line;
+ }
+ close $in;
+ close $out;
+
+
+ open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
+ open $out,'>', $out_scaffolds_file;
+
+ while ( my $line = <$in>) {
+ #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
+ #also move the remainder of the length
+ if ( $line =~ />NODE_(\d+)_(.+)/) {
+ $line = ">$name" . "_$1 $2\n";
+ }
+ print $out $line;
+ }
+ close $in;
+ close $out;
+
+}
+
+
+# Extract
+sub extractCoverageLength{
+ my ($in, $out) = @_;
+ open FASTA, '<', $in or die $!;
+ open TAB, '>', $out or die $!;
+ print TAB "#name\tlength\tcoverage\n";
+ while (){
+ next unless /^>/;
+ chomp;
+ die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)_(component_\d+)/);
+ my ($name,$n, $l, $cov,$component) = ($1,$2, $3, $4,$5);
+ print TAB "$name" . "_$n" . "_$component\t$l\t$cov\n";
+ }
+ close TAB;
+}
diff -r 000000000000 -r 27b90e43e2d8 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Jun 06 15:13:06 2016 -0400
@@ -0,0 +1,6 @@
+
+
+
+
+
+