view tools/spades_2_5/spades.pl @ 3:d82f18c76309 draft

Uploaded spades wrapper 0.6. Supports spades 2.5.1. Also removes the need for the hack at installation time, by fixing the problem with input files. Shows the license for the tool as well.
author lionelguy
date Thu, 12 Sep 2013 07:46:54 -0400
parents b5ce24f34dd7
children 95ddc2380130
line wrap: on
line source

#!/usr/bin/env perl
## A wrapper script to call spades.py and collect its output
use strict;
use warnings;
use File::Temp qw/ tempfile tempdir /;
use File::Copy;
use Getopt::Long;

# Parse arguments
my ($out_contigs_file,
    $out_contigs_stats,
    $out_scaffolds_file,
    $out_scaffolds_stats,
    $out_log_file,
    @sysargs) = @ARGV;

## GetOptions not compatible with parsing the rest of the arguments in an array.
## Keeping the not-so-nice parse-in-one-go method, without named arguments.
# GetOptions(
#     'contigs-file=s'    => \$out_contigs_file,
#     'contigs-stats=s'   => \$out_contigs_stats,
#     'scaffolds-file=s'  => \$out_scaffolds_file,
#     'scaffolds-stats=s' => \$out_scaffolds_stats,
#     'out_log_file=s'    => \$out_log_file,
# );

# my @sysargs = @ARGV;

# Create temporary folder to store files, delete after use
#my $output_dir = tempdir( CLEANUP => 0 );
my $output_dir = 'output_dir';
# Link "dat" files as fastq, otherwise spades complains about file format

# Create log handle
open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";

# Run program
# To do: record time
&runSpades(@sysargs);
&collectOutput();
&extractCoverageLength($out_contigs_file, $out_contigs_stats);
&extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
print $log "Done\n";
close $log;
exit 0;

# Run spades
sub runSpades {
    my $cmd = join(" ", @_) . " -o $output_dir";
    my $return_code = system($cmd);
    if ($return_code) {
	print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
	die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
    }
    return 0;
}

# Collect output
sub collectOutput{
    # To do: check that the files are there
    # Collects output
    move "$output_dir/contigs.fasta", $out_contigs_file;
    move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
    open LOG, '<', "$output_dir/spades.log" 
	or die "Cannot open log file $output_dir/spades.log: $?";
    print $log $_ while (<LOG>);
    return 0;
}

# Extract
sub extractCoverageLength{
    my ($in, $out) = @_;
    open FASTA, '<', $in or die $!;
    open TAB, '>', $out or die $!;
    print TAB "#name\tlength\tcoverage\n";
    while (<FASTA>){
	next unless /^>/;
	chomp;
	my @a = split(/\s/, $_);
	my ($NODE, $n, $LENGTH, $l, $COV, $cov) = split(/_/, $a[0]);
	die "Not all elements found in $_\n" unless ($n && $l && $cov);
	print TAB "NODE_$n\t$l\t$cov\n";
    }
    close TAB;
}