annotate tools/spades_2_5/spades.pl @ 6:1b1af74a54ae draft

Uploaded
author lionelguy
date Thu, 12 Sep 2013 07:49:07 -0400
parents b5ce24f34dd7
children 95ddc2380130
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
1 #!/usr/bin/env perl
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
2 ## A wrapper script to call spades.py and collect its output
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
3 use strict;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
4 use warnings;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
5 use File::Temp qw/ tempfile tempdir /;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
6 use File::Copy;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
7 use Getopt::Long;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
8
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
9 # Parse arguments
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
10 my ($out_contigs_file,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
11 $out_contigs_stats,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
12 $out_scaffolds_file,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
13 $out_scaffolds_stats,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
14 $out_log_file,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
15 @sysargs) = @ARGV;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
16
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
17 ## GetOptions not compatible with parsing the rest of the arguments in an array.
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
18 ## Keeping the not-so-nice parse-in-one-go method, without named arguments.
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
19 # GetOptions(
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
20 # 'contigs-file=s' => \$out_contigs_file,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
21 # 'contigs-stats=s' => \$out_contigs_stats,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
22 # 'scaffolds-file=s' => \$out_scaffolds_file,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
23 # 'scaffolds-stats=s' => \$out_scaffolds_stats,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
24 # 'out_log_file=s' => \$out_log_file,
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
25 # );
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
26
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
27 # my @sysargs = @ARGV;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
28
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
29 # Create temporary folder to store files, delete after use
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
30 #my $output_dir = tempdir( CLEANUP => 0 );
2
b5ce24f34dd7 Uploaded
lionelguy
parents: 1
diff changeset
31 my $output_dir = 'output_dir';
1
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
32 # Link "dat" files as fastq, otherwise spades complains about file format
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
33
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
34 # Create log handle
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
35 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
36
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
37 # Run program
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
38 # To do: record time
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
39 &runSpades(@sysargs);
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
40 &collectOutput();
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
41 &extractCoverageLength($out_contigs_file, $out_contigs_stats);
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
42 &extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
43 print $log "Done\n";
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
44 close $log;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
45 exit 0;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
46
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
47 # Run spades
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
48 sub runSpades {
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
49 my $cmd = join(" ", @_) . " -o $output_dir";
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
50 my $return_code = system($cmd);
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
51 if ($return_code) {
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
52 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
53 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
54 }
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
55 return 0;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
56 }
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
57
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
58 # Collect output
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
59 sub collectOutput{
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
60 # To do: check that the files are there
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
61 # Collects output
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
62 move "$output_dir/contigs.fasta", $out_contigs_file;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
64 open LOG, '<', "$output_dir/spades.log"
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
65 or die "Cannot open log file $output_dir/spades.log: $?";
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
66 print $log $_ while (<LOG>);
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
67 return 0;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
68 }
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
69
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
70 # Extract
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
71 sub extractCoverageLength{
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
72 my ($in, $out) = @_;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
73 open FASTA, '<', $in or die $!;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
74 open TAB, '>', $out or die $!;
2
b5ce24f34dd7 Uploaded
lionelguy
parents: 1
diff changeset
75 print TAB "#name\tlength\tcoverage\n";
1
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
76 while (<FASTA>){
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
77 next unless /^>/;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
78 chomp;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
79 my @a = split(/\s/, $_);
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
80 my ($NODE, $n, $LENGTH, $l, $COV, $cov) = split(/_/, $a[0]);
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
81 die "Not all elements found in $_\n" unless ($n && $l && $cov);
2
b5ce24f34dd7 Uploaded
lionelguy
parents: 1
diff changeset
82 print TAB "NODE_$n\t$l\t$cov\n";
1
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
83 }
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
84 close TAB;
0f8b2da62d7d Support for SPAdes 2.5.0. Added a tab-separated output with coverage vs. length info for each contig.
lionelguy
parents:
diff changeset
85 }