Mercurial > repos > nml > metaspades
changeset 1:01a241476407 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaspades commit 9401451df4a985ef5686864eaadafa077ffc0877
author | iuc |
---|---|
date | Thu, 02 Mar 2017 16:04:56 -0500 |
parents | e93c1a0678cd |
children | 05c394313b1c |
files | CHANGE README.md metaspades.xml spades.pl tool_dependencies.xml |
diffstat | 5 files changed, 100 insertions(+), 259 deletions(-) [+] |
line wrap: on
line diff
--- a/CHANGE Tue Aug 09 10:46:28 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -1.0 -=== - - Supports SPades 3.9 - - CHANGE: Improved memory consumption in metagenomic pipeline.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Thu Mar 02 16:04:56 2017 -0500 @@ -0,0 +1,17 @@ +**License** + +SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2. + +This wrapper is copyrighted by Philip Mabon and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/. + +** Acknowledgments ** + +Original wrapper developed by Lionel Guy. + +Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes. + +Nicola Soranzo fixed various bugs.
--- a/metaspades.xml Tue Aug 09 10:46:28 2016 -0400 +++ b/metaspades.xml Thu Mar 02 16:04:56 2017 -0500 @@ -1,139 +1,97 @@ -<tool id="metaspades" name="metaspades" version="1.0"> - <description>genome assembler for metagenomics datasets</description> - <requirements> - <requirement type="package" version="3.9.0">spades</requirement> - </requirements> - <command interpreter="perl">spades.pl - $out_contigs - $out_contig_stats - $out_scaffolds - $out_scaffold_stats - $out_log - - ## if the first fileset is a paired-collection, use the key as the name - #if $files[0].file_type.type == "paired-collection": - $files[0].file_type.fastq_collection.name - #else: - NODE - #end if +<tool id="metaspades" name="metaSPAdes" version="3.9.0"> + <description>assembler for metagenomics datasets</description> + <requirements> + <requirement type="package" version="3.9.0">spades</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + <![CDATA[ ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output - spades.py - ## Forces unzipped output, faster - --disable-gzip-output - --meta - $onlyassembler - - -t \${GALAXY_SLOTS:-16} - + spades.py -o . --disable-gzip-output --meta $onlyassembler -t \${GALAXY_SLOTS:-16} #if not $kmer_choice.auto_kmer_choice: - -k "$kmer_choice.kmers" + -k "$kmer_choice.kmers" #end if - ## Sequence files - #set num=1 - #if str( $lib_type ) == "paired_end": + #set num=1 + #if str( $lib_type ) == "paired_end": #set prefix = 'pe' - #end if - --$prefix$num-$orientation - #for $file in $files - #if $file.file_type.type == "separate" - --$prefix$num-1 fastq:$file.file_type.fwd_reads - --$prefix$num-2 fastq:$file.file_type.rev_reads + #end if + --$prefix$num-$orientation + #for $file in $files + #if $file.file_type.type == "separate" + --$prefix$num-1 fastq:$file.file_type.fwd_reads + --$prefix$num-2 fastq:$file.file_type.rev_reads #elif $file.file_type.type == "interleaved" - --$prefix$num-12 fastq:$file.file_type.interleaved_reads + --$prefix$num-12 fastq:$file.file_type.interleaved_reads #elif $file.file_type.type == "paired-collection" - --$prefix$num-1 fastq:$file.file_type.fastq_collection.forward - --$prefix$num-2 fastq:$file.file_type.fastq_collection.reverse + --$prefix$num-1 fastq:$file.file_type.fastq_collection.forward + --$prefix$num-2 fastq:$file.file_type.fastq_collection.reverse #end if - #end for - - - </command> - <inputs> - <param name="onlyassembler" type="boolean" truevalue="--only-assembler" falsevalue="" checked="False" label="Run only assembly? (without read error correction)" /> - <conditional name="kmer_choice"> - <param name="auto_kmer_choice" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Automatically choose k-mer values" help="k-mer choices can be chosen by SPAdes instead of being entered manually" /> - <when value="false"> - <param name="kmers" type="text" label="K-mers to use, separated by commas" value="21,33,55" help="Comma-separated list of k-mer sizes to be used (all values must be odd, less than 128, listed in ascending order, and smaller than the read length). The default value is 21,33,55." /> - </when> - <when value="true"> </when> - </conditional> - - <!-- Reads --> - - <param name="lib_type" type="select" label="Library type"> - <option value="paired_end">Paired-end</option> - </param> - <param name="orientation" type="select" label="Orientation"> - <option value="fr" selected="true">-> <- (fr)</option> - <option value="rf"><- -> (rf)</option> - <option value="ff">-> -> (ff)</option> - </param> - <repeat name="files" title="Files" min="1"> - <conditional name="file_type"> - <param name="type" type="select" label="Select file format"> - <option value="separate">Separate input files</option> - <option value="interleaved">Interleaved files</option> - <option value="paired-collection">Paired List Collection</option> - </param> - <when value="separate"> - <param name="fwd_reads" type="data" format="fastq" label="Forward reads" help="FASTQ format" /> - <param name="rev_reads" type="data" format="fastq" label="Reverse reads" help="FASTQ format" /> - </when> - <when value="interleaved"> - <param name="interleaved_reads" type="data" format="fastq" label="Interleaved paired reads" help="FASTQ format" /> - </when> - <when value="paired-collection"> - <param name="fastq_collection" type="data_collection" label="Paired-end reads collection" optional="false" format="fastq" collection_type="paired" help="FASTQ format" /> - </when> - </conditional> - </repeat> - - - </inputs> - <outputs> - <data name="out_contigs" format="fasta" label="SPAdes contigs (fasta)" /> - <data name="out_contig_stats" format="tabular" label="SPAdes contig stats" /> - <data name="out_scaffolds" format="fasta" label="SPAdes scaffolds (fasta)" /> - <data name="out_scaffold_stats" format="tabular" label="SPAdes scaffold stats" /> - <data name="out_log" format="txt" label="SPAdes log" /> - </outputs> - <tests> - <test> - <param name="sc" value="false" /> - <param name="careful" value="false" /> - <param name="kmers" value="33,55" /> - <param name="lib_type" value="paired_end" /> - <param name="fwd_reads" value="ecoli_1K_1.fq" ftype="fastq" /> - <param name="rev_reads" value="ecoli_1K_2.fq" ftype="fastq" /> - <output name="out_contigs" file="reference_1K.fa" ftype="fasta" compare="re_match" lines_diff="1" /> - </test> - </tests> - <help> + #end for + ]]> + </command> + <inputs> + <param name="onlyassembler" type="boolean" truevalue="--only-assembler" falsevalue="" checked="False" label="Run only assembly? (without read error correction)" /> + <conditional name="kmer_choice"> + <param name="auto_kmer_choice" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Automatically choose k-mer values" help="k-mer choices can be chosen by SPAdes instead of being entered manually" /> + <when value="false"> + <param name="kmers" type="text" label="K-mers to use, separated by commas" value="21,33,55" help="Comma-separated list of k-mer sizes to be used (all values must be odd, less than 128, listed in ascending order, and smaller than the read length). The default value is 21,33,55." /> + </when> + <when value="true" /> + </conditional> + <param name="lib_type" type="select" label="Library type"> + <option value="paired_end">Paired-end</option> + </param> + <param label="Orientation" name="orientation" type="select"> + <option selected="true" value="fr"><![CDATA[-> <- (fr)]]></option> + <option value="rf"><![CDATA[<- -> (rf)]]></option> + <option value="ff"><![CDATA[-> -> (ff)]]></option> + </param> + <repeat name="files" title="Files" min="1"> + <conditional name="file_type"> + <param name="type" type="select" label="Select file format"> + <option value="separate">Separate input files</option> + <option value="interleaved">Interleaved files</option> + <option value="paired-collection">Paired List Collection</option> + </param> + <when value="separate"> + <param name="fwd_reads" type="data" format="fastq" label="Forward reads" help="FASTQ format" /> + <param name="rev_reads" type="data" format="fastq" label="Reverse reads" help="FASTQ format" /> + </when> + <when value="interleaved"> + <param name="interleaved_reads" type="data" format="fastq" label="Interleaved paired reads" help="FASTQ format" /> + </when> + <when value="paired-collection"> + <param name="fastq_collection" type="data_collection" label="Paired-end reads collection" format="fastq" collection_type="paired" help="FASTQ format" /> + </when> + </conditional> + </repeat> + </inputs> + <outputs> + <data name="out_contigs" format="fasta" from_work_dir="contigs.fasta" label="SPAdes contigs (fasta)" /> + <data name="out_scaffolds" format="fasta" from_work_dir="scaffolds.fasta" label="SPAdes scaffolds (fasta)" /> + <data name="out_log" format="txt" from_work_dir="spades.log" label="SPAdes log" /> + </outputs> + <tests> + <test> + <param name="sc" value="false" /> + <param name="careful" value="false" /> + <param name="kmers" value="33,55" /> + <param name="lib_type" value="paired_end" /> + <param name="fwd_reads" value="ecoli_1K_1.fq" ftype="fastq" /> + <param name="rev_reads" value="ecoli_1K_2.fq" ftype="fastq" /> + <output name="out_contigs" file="reference_1K.fa" ftype="fasta" compare="re_match" lines_diff="1" /> + </test> + </tests> + <help> +<![CDATA[ **What it does** SPAdes – St. Petersburg genome assembler – is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes. - -This wrapper runs SPAdes 3.9, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. - -**License** - -SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2. - -This wrapper is copyrighted by Philip Mabon and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/. - -** Acknowledgments ** - -Original wrapper developed by Lionel Guy. - -Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes. - -Nicola Soranzo fixed various bugs. - </help> +]]> + </help> <citations> <citation type="doi">10.1089/cmb.2012.0021</citation> </citations>
--- a/spades.pl Tue Aug 09 10:46:28 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,124 +0,0 @@ -#!/usr/bin/env perl -## A wrapper script to call spades.py and collect its output -use strict; -use warnings; -use File::Temp qw/ tempfile tempdir /; -use File::Copy; -use Getopt::Long; - -# Parse arguments -my ($out_contigs_file, - $out_contigs_stats, - $out_scaffolds_file, - $out_scaffolds_stats, - $out_log_file, - $new_name, - @sysargs) = @ARGV; - - -my $output_dir = 'output_dir'; - -# Create log handle -open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; - -# Run program -runSpades(@sysargs); -collectOutput($new_name); -extractCoverageLength($out_contigs_file, $out_contigs_stats); -extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); -print $log "Done\n"; -close $log; -exit 0; - -# Run spades -sub runSpades { - my $cmd = join(" ", @_) . " -o $output_dir"; - my $return_code = system($cmd); - if ($return_code) { - print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; - die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; - } - return 0; -} - -# Collect output -sub collectOutput{ - my ($new_name) = @_; - - # To do: check that the files are there - # Collects output - if ( not -e "$output_dir/contigs.fasta") { - die "Could not find contigs.fasta file\n"; - } - if ( not -e "$output_dir/scaffolds.fasta") { - die "Could not find scaffolds.fasta file\n"; - } - - #if a new name is given for the contigs and scaffolds, change them before moving them - if ( $new_name ne 'NODE') { - renameContigs($new_name); - } - else { - move "$output_dir/contigs.fasta", $out_contigs_file; - move "$output_dir/scaffolds.fasta", $out_scaffolds_file; - } - - - - open LOG, '<', "$output_dir/spades.log" - or die "Cannot open log file $output_dir/spades.log: $?"; - print $log $_ while (<LOG>); - return 0; -} - -#Change name in contig and scaffolds file -sub renameContigs{ - my ($name) = @_; - - open my $in, '<',"$output_dir/contigs.fasta" or die $!; - open my $out,'>', $out_contigs_file; - - while ( my $line = <$in>) { - #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. - #also move the remainder of the length - if ( $line =~ />NODE_(\d+)_(.+)/) { - $line = ">$name" . "_$1 $2\n"; - } - print $out $line; - } - close $in; - close $out; - - - open $in, '<',"$output_dir/scaffolds.fasta" or die $!; - open $out,'>', $out_scaffolds_file; - - while ( my $line = <$in>) { - #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. - #also move the remainder of the length - if ( $line =~ />NODE_(\d+)_(.+)/) { - $line = ">$name" . "_$1 $2\n"; - } - print $out $line; - } - close $in; - close $out; - -} - - -# Extract -sub extractCoverageLength{ - my ($in, $out) = @_; - open FASTA, '<', $in or die $!; - open TAB, '>', $out or die $!; - print TAB "#name\tlength\tcoverage\n"; - while (<FASTA>){ - next unless /^>/; - chomp; - die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/); - my ($name,$n, $l, $cov) = ($1,$2, $3, $4); - print TAB "$name" . "_$n\t$l\t$cov\n"; - } - close TAB; -}
--- a/tool_dependencies.xml Tue Aug 09 10:46:28 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="spades" version="3.9.0"> - <repository changeset_revision="d8c8c3dc8f9a" name="package_spades_3_9_0" owner="nml" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>