view tools/spades_2_5/spades.pl @ 1:0f8b2da62d7d draft

Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
author lionelguy
date Mon, 19 Aug 2013 09:06:17 -0400
parents
children b5ce24f34dd7
line wrap: on
line source

#!/usr/bin/env perl
## A wrapper script to call spades.py and collect its output
use strict;
use warnings;
use File::Temp qw/ tempfile tempdir /;
use File::Copy;
use Getopt::Long;

# Parse arguments
my ($out_contigs_file,
    $out_contigs_stats,
    $out_scaffolds_file,
    $out_scaffolds_stats,
    $out_log_file,
    @sysargs) = @ARGV;

## GetOptions not compatible with parsing the rest of the arguments in an array.
## Keeping the not-so-nice parse-in-one-go method, without named arguments.
# GetOptions(
#     'contigs-file=s'    => \$out_contigs_file,
#     'contigs-stats=s'   => \$out_contigs_stats,
#     'scaffolds-file=s'  => \$out_scaffolds_file,
#     'scaffolds-stats=s' => \$out_scaffolds_stats,
#     'out_log_file=s'    => \$out_log_file,
# );

# my @sysargs = @ARGV;

# Create temporary folder to store files, delete after use
#my $output_dir = tempdir( CLEANUP => 0 );
my $output_dir = tempdir( CLEANUP => 1 );
# Link "dat" files as fastq, otherwise spades complains about file format

# Create log handle
open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";

# Run program
# To do: record time
&runSpades(@sysargs);
&collectOutput();
&extractCoverageLength($out_contigs_file, $out_contigs_stats);
&extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
print $log "Done\n";
close $log;
exit 0;

# Run spades
sub runSpades {
    my $cmd = join(" ", @_) . " -o $output_dir";
    my $return_code = system($cmd);
    if ($return_code) {
	print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
	die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
    }
    return 0;
}

# Collect output
sub collectOutput{
    # To do: check that the files are there
    # Collects output
    move "$output_dir/contigs.fasta", $out_contigs_file;
    move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
    open LOG, '<', "$output_dir/spades.log" 
	or die "Cannot open log file $output_dir/spades.log: $?";
    print $log $_ while (<LOG>);
    return 0;
}

# Extract
sub extractCoverageLength{
    my ($in, $out) = @_;
    open FASTA, '<', $in or die $!;
    open TAB, '>', $out or die $!;
    while (<FASTA>){
	next unless /^>/;
	chomp;
	my @a = split(/\s/, $_);
	my ($NODE, $n, $LENGTH, $l, $COV, $cov) = split(/_/, $a[0]);
	die "Not all elements found in $_\n" unless ($n && $l && $cov);
	print TAB "$n\t$l\t$cov\n";
    }
    close TAB;
}