Mercurial > repos > nml > spades
annotate spades.pl @ 1:80f079961dc9 draft
planemo upload commit 769074e10bbc1bc1ad0a820978cbedcebac412d5-dirty
author | nml |
---|---|
date | Mon, 18 Jan 2016 10:02:57 -0500 |
parents | 009c00203195 |
children |
rev | line source |
---|---|
0
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
1 #!/usr/bin/env perl |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
2 ## A wrapper script to call spades.py and collect its output |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
3 use strict; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
4 use warnings; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
5 use File::Temp qw/ tempfile tempdir /; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
6 use File::Copy; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
7 use Getopt::Long; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
8 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
9 # Parse arguments |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
10 my ($out_contigs_file, |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
11 $out_contigs_stats, |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
12 $out_scaffolds_file, |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
13 $out_scaffolds_stats, |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
14 $out_log_file, |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
15 $new_name, |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
16 @sysargs) = @ARGV; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
17 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
18 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
19 my $output_dir = 'output_dir'; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
20 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
21 # Create log handle |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
22 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
23 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
24 # Run program |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
25 runSpades(@sysargs); |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
26 collectOutput($new_name); |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
27 extractCoverageLength($out_contigs_file, $out_contigs_stats); |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
28 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
29 print $log "Done\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
30 close $log; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
31 exit 0; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
32 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
33 # Run spades |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
34 sub runSpades { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
35 my $cmd = join(" ", @_) . " -o $output_dir"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
36 my $return_code = system($cmd); |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
37 if ($return_code) { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
38 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
39 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
40 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
41 return 0; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
42 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
43 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
44 # Collect output |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
45 sub collectOutput{ |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
46 my ($new_name) = @_; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
47 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
48 # To do: check that the files are there |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
49 # Collects output |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
50 if ( not -e "$output_dir/contigs.fasta") { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
51 die "Could not find contigs.fasta file\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
52 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
53 if ( not -e "$output_dir/scaffolds.fasta") { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
54 die "Could not find scaffolds.fasta file\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
55 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
56 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
57 #if a new name is given for the contigs and scaffolds, change them before moving them |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
58 if ( $new_name ne 'NODE') { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
59 renameContigs($new_name); |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
60 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
61 else { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
62 move "$output_dir/contigs.fasta", $out_contigs_file; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
64 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
65 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
66 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
67 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
68 open LOG, '<', "$output_dir/spades.log" |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
69 or die "Cannot open log file $output_dir/spades.log: $?"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
70 print $log $_ while (<LOG>); |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
71 return 0; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
72 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
73 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
74 #Change name in contig and scaffolds file |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
75 sub renameContigs{ |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
76 my ($name) = @_; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
77 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
78 open my $in, '<',"$output_dir/contigs.fasta" or die $!; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
79 open my $out,'>', $out_contigs_file; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
80 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
81 while ( my $line = <$in>) { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
82 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
83 #also move the remainder of the length |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
84 if ( $line =~ />NODE_(\d+)_(.+)/) { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
85 $line = ">$name" . "_$1 $2\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
86 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
87 print $out $line; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
88 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
89 close $in; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
90 close $out; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
91 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
92 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
93 open $in, '<',"$output_dir/scaffolds.fasta" or die $!; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
94 open $out,'>', $out_scaffolds_file; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
95 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
96 while ( my $line = <$in>) { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
97 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
98 #also move the remainder of the length |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
99 if ( $line =~ />NODE_(\d+)_(.+)/) { |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
100 $line = ">$name" . "_$1 $2\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
101 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
102 print $out $line; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
103 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
104 close $in; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
105 close $out; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
106 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
107 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
108 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
109 |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
110 # Extract |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
111 sub extractCoverageLength{ |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
112 my ($in, $out) = @_; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
113 open FASTA, '<', $in or die $!; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
114 open TAB, '>', $out or die $!; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
115 print TAB "#name\tlength\tcoverage\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
116 while (<FASTA>){ |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
117 next unless /^>/; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
118 chomp; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
119 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/); |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
120 my ($name,$n, $l, $cov) = ($1,$2, $3, $4); |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
121 print TAB "$name" . "_$n\t$l\t$cov\n"; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
122 } |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
123 close TAB; |
009c00203195
planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff
changeset
|
124 } |