Mercurial > repos > lionelguy > spades
annotate tools/spades_2_5/spades.pl @ 6:1b1af74a54ae draft
Uploaded
author | lionelguy |
---|---|
date | Thu, 12 Sep 2013 07:49:07 -0400 |
parents | b5ce24f34dd7 |
children | 95ddc2380130 |
rev | line source |
---|---|
1
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
1 #!/usr/bin/env perl |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
2 ## A wrapper script to call spades.py and collect its output |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
3 use strict; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
4 use warnings; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
5 use File::Temp qw/ tempfile tempdir /; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
6 use File::Copy; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
7 use Getopt::Long; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
8 |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
9 # Parse arguments |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
10 my ($out_contigs_file, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
11 $out_contigs_stats, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
12 $out_scaffolds_file, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
13 $out_scaffolds_stats, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
14 $out_log_file, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
15 @sysargs) = @ARGV; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
16 |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
17 ## GetOptions not compatible with parsing the rest of the arguments in an array. |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
18 ## Keeping the not-so-nice parse-in-one-go method, without named arguments. |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
19 # GetOptions( |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
20 # 'contigs-file=s' => \$out_contigs_file, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
21 # 'contigs-stats=s' => \$out_contigs_stats, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
22 # 'scaffolds-file=s' => \$out_scaffolds_file, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
23 # 'scaffolds-stats=s' => \$out_scaffolds_stats, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
24 # 'out_log_file=s' => \$out_log_file, |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
25 # ); |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
26 |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
27 # my @sysargs = @ARGV; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
28 |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
29 # Create temporary folder to store files, delete after use |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
30 #my $output_dir = tempdir( CLEANUP => 0 ); |
2 | 31 my $output_dir = 'output_dir'; |
1
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
32 # Link "dat" files as fastq, otherwise spades complains about file format |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
33 |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
34 # Create log handle |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
35 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
36 |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
37 # Run program |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
38 # To do: record time |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
39 &runSpades(@sysargs); |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
40 &collectOutput(); |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
41 &extractCoverageLength($out_contigs_file, $out_contigs_stats); |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
42 &extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
43 print $log "Done\n"; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
44 close $log; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
45 exit 0; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
46 |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
47 # Run spades |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
48 sub runSpades { |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
49 my $cmd = join(" ", @_) . " -o $output_dir"; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
50 my $return_code = system($cmd); |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
51 if ($return_code) { |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
52 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
53 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
54 } |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
55 return 0; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
56 } |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
57 |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
58 # Collect output |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
59 sub collectOutput{ |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
60 # To do: check that the files are there |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
61 # Collects output |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
62 move "$output_dir/contigs.fasta", $out_contigs_file; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
64 open LOG, '<', "$output_dir/spades.log" |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
65 or die "Cannot open log file $output_dir/spades.log: $?"; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
66 print $log $_ while (<LOG>); |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
67 return 0; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
68 } |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
69 |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
70 # Extract |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
71 sub extractCoverageLength{ |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
72 my ($in, $out) = @_; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
73 open FASTA, '<', $in or die $!; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
74 open TAB, '>', $out or die $!; |
2 | 75 print TAB "#name\tlength\tcoverage\n"; |
1
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
76 while (<FASTA>){ |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
77 next unless /^>/; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
78 chomp; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
79 my @a = split(/\s/, $_); |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
80 my ($NODE, $n, $LENGTH, $l, $COV, $cov) = split(/_/, $a[0]); |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
81 die "Not all elements found in $_\n" unless ($n && $l && $cov); |
2 | 82 print TAB "NODE_$n\t$l\t$cov\n"; |
1
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
83 } |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
84 close TAB; |
0f8b2da62d7d
Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff
changeset
|
85 } |