Next changeset 1:a415d510332b (2016-11-07) |
Commit message:
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc |
added:
CHANGE plasmidSPAdes.xml spades.pl tool_dependencies.xml |
b |
diff -r 000000000000 -r 27b90e43e2d8 CHANGE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHANGE Mon Jun 06 15:13:06 2016 -0400 |
b |
@@ -0,0 +1,4 @@ +1.0 +=== + - Supports SPades 3.8 + - NEW: Added plasmidSPAdes – a pipeline designed for extracting and assembling plasmids from WGS data sets. |
b |
diff -r 000000000000 -r 27b90e43e2d8 plasmidSPAdes.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plasmidSPAdes.xml Mon Jun 06 15:13:06 2016 -0400 |
[ |
b'@@ -0,0 +1,243 @@\n+<tool id="plasmidspades" name="plasmidspades" version="1.0">\n+ <description>genome assembler for plasmids</description>\n+ <requirements>\n+ <requirement type="package" version="3.8.0">spades</requirement>\n+ </requirements>\n+ <command interpreter="perl">spades.pl \n+ $out_contigs \n+ $out_contig_stats \n+ $out_scaffolds \n+ $out_scaffold_stats \n+ $out_log\n+\n+ ## if the first fileset is a paired-collection, use the key as the name\n+ #if $files[0].file_type.type == "paired-collection":\n+ $files[0].file_type.fastq_collection.name\n+ #else:\n+ NODE\n+ #end if\n+ ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output\n+ spades.py\n+ ## Forces unzipped output, faster\n+ --disable-gzip-output\n+ --plasmid\n+ $onlyassembler\n+ $careful\n+ -t \\${GALAXY_SLOTS:-16}\n+ -k "$kmers"\n+\n+ #if $cov.state == "auto":\n+ --cov-cutoff \'auto\'\n+ #elif $cov.state == "value":\n+ --cov-cutoff \'$cov.cutoff\'\n+ #end if\n+ \n+ $iontorrent\n+ \n+ ## Sequence files\n+ #set num=1\n+ #if str( $lib_type ) == "paired_end":\n+ #set prefix = \'pe\'\n+ #elif str( $lib_type ) == "mate_paired":\n+ #set prefix = \'mp\'\n+ #elif str( $lib_type ) == "nxmate_paired":\n+ #set prefix = \'nxmate\'\n+ #else:\n+ #set prefix = \'hqmp\'\n+ #end if\n+ --$prefix$num-$orientation \n+ #for $file in $files\n+\t#if $file.file_type.type == "separate"\n+ --$prefix$num-1 fastq:$file.file_type.fwd_reads\n+ --$prefix$num-2 fastq:$file.file_type.rev_reads\n+ #elif $file.file_type.type == "interleaved"\n+ --$prefix$num-12 fastq:$file.file_type.interleaved_reads\n+ #elif $file.file_type.type == "unpaired"\n+ --$prefix$num-s fastq:$file.file_type.unpaired_reads\n+ #elif $file.file_type.type == "paired-collection"\n+ --$prefix$num-1 fastq:$file.file_type.fastq_collection.forward\n+ --$prefix$num-2 fastq:$file.file_type.fastq_collection.reverse\n+ #end if\n+ #end for\n+\n+ ## PacBio reads\n+ #for $i, $pacbiolib in enumerate( $pacbio )\n+ --pacbio fastq:$pacbiolib.pacbio_reads\n+ #end for\n+ ## Nanopore\n+ #for $i, $nanoporelib in enumerate( $nanopore )\n+ --nanopore fastq:$nanoporelib.nanopore_reads\n+ #end for\n+ ## Sanger\n+ #for $i, $sangerlib in enumerate( $sanger )\n+ --sanger $sangerlib.file_type.type:$sangerlib.file_type.sanger_reads\n+ #end for \n+ ## Contigs\n+ #for $i, $trustedcontigs in enumerate( $trustedcontigs )\n+ --trusted-contigs $trustedcontigs.file_type.type:$trustedcontigs.file_type.trusted_contigs\n+ #end for\n+ #for $i, $untrustedcontigs in enumerate( $untrustedcontigs )\n+ --untrusted-contigs $untrustedcontigs.file_type.type:$untrustedcontigs.file_type.untrusted_contigs\n+ #end for\n+ </command>\n+ <inputs>\n+ <param name="onlyassembler" type="boolean" truevalue="--only-assembler" falsevalue="" checked="False" label="Run only assembly? (without read error correction)" />\n+ <param name="careful" type="boolean" truevalue="--careful" falsevalue="" checked="True" label="Careful correction?" help="Tries to reduce number of mismatches and short indels. Also runs MismatchCorrector \xe2\x80\x93 a post processing tool, which uses BWA tool (comes with SPAdes)." />\n+ <param name="kmers" type="text" label="K-mers to use, separated by commas" value="21,33,55" help="Comma-separated list of k-mer sizes to be used (all values must be odd, less than 128, listed in ascending order, and smaller than the read length). The default value is 21,33,55." >\n+\n+ </param>\n+ <conditional name="cov">\n+ <param name="state" type="select" label="Coverage Cutoff">\n+ <option value="off">Off</option>\n+ <option value="value">User Specific</option>\n+ <option value="auto">Auto</option>\n+ </param>\n+ <when value="off">\n+ </when>\n+ <when '..b'epeat name="trustedcontigs" title="Trusted contigs" help="Reliable contigs of the same genome, which are likely to have no misassemblies and small rate of other errors (e.g. mismatches and indels). This option is not intended for contigs of the related species.">\n+ <conditional name="file_type">\n+\t<param name="type" type="select" label="Select file format">\n+\t <option value="fasta">fasta</option>\n+\t <option value="fastq">fastq</option>\n+\t</param>\n+\t<when value="fasta">\n+\t <param name="trusted_contigs" type="data" format="fasta" label="Trusted contigs" help="FASTA format" />\n+\t</when>\n+\t<when value="fastq">\n+\t <param name="trusted_contigs" type="data" format="fastq" label="Trusted contigs" help="FASTQ format" />\n+\t</when>\n+ </conditional>\n+ </repeat>\n+ <repeat name="untrustedcontigs" title="Untrusted contigs" help="Contigs of the same genome, quality of which is average or unknown. Contigs of poor quality can be used but may introduce errors in the assembly. This option is also not intended for contigs of the related species.">\n+ <conditional name="file_type">\n+\t<param name="type" type="select" label="Select file format">\n+\t <option value="fasta">fasta</option>\n+\t <option value="fastq">fastq</option>\n+\t</param>\n+\t<when value="fasta">\n+\t <param name="untrusted_contigs" type="data" format="fasta" label="Untrusted contigs" help="FASTA format" />\n+\t</when>\n+\t<when value="fastq">\n+\t <param name="untrusted_contigs" type="data" format="fastq" label="Untrusted contigs" help="FASTQ format" />\n+\t</when>\n+ </conditional>\n+ </repeat>\n+ </inputs>\n+ <outputs>\n+ <data name="out_contigs" format="fasta" label="SPAdes contigs (fasta)" />\n+ <data name="out_contig_stats" format="tabular" label="SPAdes contig stats" />\n+ <data name="out_scaffolds" format="fasta" label="SPAdes scaffolds (fasta)" />\n+ <data name="out_scaffold_stats" format="tabular" label="SPAdes scaffold stats" />\n+ <data name="out_log" format="txt" label="SPAdes log" />\n+ </outputs>\n+ <tests>\n+ <test>\n+ <param name="sc" value="false" />\n+ <param name="careful" value="false" />\n+ <param name="kmers" value="33,55" />\n+ <param name="lib_type" value="paired_end" />\n+ <param name="fwd_reads" value="ecoli_1K_1.fq" ftype="fastq" />\n+ <param name="rev_reads" value="ecoli_1K_2.fq" ftype="fastq" />\n+ <output name="out_contigs" file="reference_1K.fa" ftype="fasta" compare="re_match" lines_diff="1" />\n+ </test> \n+ </tests>\n+ <help>\n+**What it does**\n+\n+SPAdes \xe2\x80\x93 St. Petersburg genome assembler \xe2\x80\x93 is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes.\n+\n+This wrapper runs SPAdes 3.8, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. \n+\n+**License**\n+\n+SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2.\n+\n+This wrapper is copyrighted by Philip Mabon and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.\n+\n+** Acknowledgments **\n+\n+Original wrapper developed by Lionel Guy.\n+\n+Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes.\n+\n+Nicola Soranzo fixed various bugs.\n+ </help>\n+ <citations>\n+ <citation type="doi">10.1089/cmb.2012.0021</citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r 000000000000 -r 27b90e43e2d8 spades.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spades.pl Mon Jun 06 15:13:06 2016 -0400 |
b |
@@ -0,0 +1,124 @@ +#!/usr/bin/env perl +## A wrapper script to call spades.py and collect its output +use strict; +use warnings; +use File::Temp qw/ tempfile tempdir /; +use File::Copy; +use Getopt::Long; + +# Parse arguments +my ($out_contigs_file, + $out_contigs_stats, + $out_scaffolds_file, + $out_scaffolds_stats, + $out_log_file, + $new_name, + @sysargs) = @ARGV; + + +my $output_dir = 'output_dir'; + +# Create log handle +open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; + +# Run program +runSpades(@sysargs); +collectOutput($new_name); +extractCoverageLength($out_contigs_file, $out_contigs_stats); +extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); +print $log "Done\n"; +close $log; +exit 0; + +# Run spades +sub runSpades { + my $cmd = join(" ", @_) . " -o $output_dir"; + my $return_code = system($cmd); + if ($return_code) { + print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; + die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; + } + return 0; +} + +# Collect output +sub collectOutput{ + my ($new_name) = @_; + + # To do: check that the files are there + # Collects output + if ( not -e "$output_dir/contigs.fasta") { + die "Could not find contigs.fasta file\n"; + } + if ( not -e "$output_dir/scaffolds.fasta") { + die "Could not find scaffolds.fasta file\n"; + } + + #if a new name is given for the contigs and scaffolds, change them before moving them + if ( $new_name ne 'NODE') { + renameContigs($new_name); + } + else { + move "$output_dir/contigs.fasta", $out_contigs_file; + move "$output_dir/scaffolds.fasta", $out_scaffolds_file; + } + + + + open LOG, '<', "$output_dir/spades.log" + or die "Cannot open log file $output_dir/spades.log: $?"; + print $log $_ while (<LOG>); + return 0; +} + +#Change name in contig and scaffolds file +sub renameContigs{ + my ($name) = @_; + + open my $in, '<',"$output_dir/contigs.fasta" or die $!; + open my $out,'>', $out_contigs_file; + + while ( my $line = <$in>) { + #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. + #also move the remainder of the length + if ( $line =~ />NODE_(\d+)_(.+)/) { + $line = ">$name" . "_$1 $2\n"; + } + print $out $line; + } + close $in; + close $out; + + + open $in, '<',"$output_dir/scaffolds.fasta" or die $!; + open $out,'>', $out_scaffolds_file; + + while ( my $line = <$in>) { + #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. + #also move the remainder of the length + if ( $line =~ />NODE_(\d+)_(.+)/) { + $line = ">$name" . "_$1 $2\n"; + } + print $out $line; + } + close $in; + close $out; + +} + + +# Extract +sub extractCoverageLength{ + my ($in, $out) = @_; + open FASTA, '<', $in or die $!; + open TAB, '>', $out or die $!; + print TAB "#name\tlength\tcoverage\n"; + while (<FASTA>){ + next unless /^>/; + chomp; + die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)_(component_\d+)/); + my ($name,$n, $l, $cov,$component) = ($1,$2, $3, $4,$5); + print TAB "$name" . "_$n" . "_$component\t$l\t$cov\n"; + } + close TAB; +} |
b |
diff -r 000000000000 -r 27b90e43e2d8 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Jun 06 15:13:06 2016 -0400 |
b |
@@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="spades" version="3.8.0"> + <repository changeset_revision="c9eab59c0bc2" name="package_spades_3_8_0" owner="nml" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |