Mercurial > repos > lionelguy > spades
comparison tools/spades_2_5/spades.pl @ 1:0f8b2da62d7d draft
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
author | lionelguy |
---|---|
date | Mon, 19 Aug 2013 09:06:17 -0400 |
parents | |
children | b5ce24f34dd7 |
comparison
equal
deleted
inserted
replaced
0:a407a6ed437d | 1:0f8b2da62d7d |
---|---|
1 #!/usr/bin/env perl | |
2 ## A wrapper script to call spades.py and collect its output | |
3 use strict; | |
4 use warnings; | |
5 use File::Temp qw/ tempfile tempdir /; | |
6 use File::Copy; | |
7 use Getopt::Long; | |
8 | |
9 # Parse arguments | |
10 my ($out_contigs_file, | |
11 $out_contigs_stats, | |
12 $out_scaffolds_file, | |
13 $out_scaffolds_stats, | |
14 $out_log_file, | |
15 @sysargs) = @ARGV; | |
16 | |
17 ## GetOptions not compatible with parsing the rest of the arguments in an array. | |
18 ## Keeping the not-so-nice parse-in-one-go method, without named arguments. | |
19 # GetOptions( | |
20 # 'contigs-file=s' => \$out_contigs_file, | |
21 # 'contigs-stats=s' => \$out_contigs_stats, | |
22 # 'scaffolds-file=s' => \$out_scaffolds_file, | |
23 # 'scaffolds-stats=s' => \$out_scaffolds_stats, | |
24 # 'out_log_file=s' => \$out_log_file, | |
25 # ); | |
26 | |
27 # my @sysargs = @ARGV; | |
28 | |
29 # Create temporary folder to store files, delete after use | |
30 #my $output_dir = tempdir( CLEANUP => 0 ); | |
31 my $output_dir = tempdir( CLEANUP => 1 ); | |
32 # Link "dat" files as fastq, otherwise spades complains about file format | |
33 | |
34 # Create log handle | |
35 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; | |
36 | |
37 # Run program | |
38 # To do: record time | |
39 &runSpades(@sysargs); | |
40 &collectOutput(); | |
41 &extractCoverageLength($out_contigs_file, $out_contigs_stats); | |
42 &extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); | |
43 print $log "Done\n"; | |
44 close $log; | |
45 exit 0; | |
46 | |
47 # Run spades | |
48 sub runSpades { | |
49 my $cmd = join(" ", @_) . " -o $output_dir"; | |
50 my $return_code = system($cmd); | |
51 if ($return_code) { | |
52 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; | |
53 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; | |
54 } | |
55 return 0; | |
56 } | |
57 | |
58 # Collect output | |
59 sub collectOutput{ | |
60 # To do: check that the files are there | |
61 # Collects output | |
62 move "$output_dir/contigs.fasta", $out_contigs_file; | |
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file; | |
64 open LOG, '<', "$output_dir/spades.log" | |
65 or die "Cannot open log file $output_dir/spades.log: $?"; | |
66 print $log $_ while (<LOG>); | |
67 return 0; | |
68 } | |
69 | |
70 # Extract | |
71 sub extractCoverageLength{ | |
72 my ($in, $out) = @_; | |
73 open FASTA, '<', $in or die $!; | |
74 open TAB, '>', $out or die $!; | |
75 while (<FASTA>){ | |
76 next unless /^>/; | |
77 chomp; | |
78 my @a = split(/\s/, $_); | |
79 my ($NODE, $n, $LENGTH, $l, $COV, $cov) = split(/_/, $a[0]); | |
80 die "Not all elements found in $_\n" unless ($n && $l && $cov); | |
81 print TAB "$n\t$l\t$cov\n"; | |
82 } | |
83 close TAB; | |
84 } |