annotate spades.pl @ 1:80f079961dc9 draft

planemo upload commit 769074e10bbc1bc1ad0a820978cbedcebac412d5-dirty
author nml
date Mon, 18 Jan 2016 10:02:57 -0500
parents 009c00203195
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
1 #!/usr/bin/env perl
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
2 ## A wrapper script to call spades.py and collect its output
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
3 use strict;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
4 use warnings;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
5 use File::Temp qw/ tempfile tempdir /;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
6 use File::Copy;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
7 use Getopt::Long;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
8
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
9 # Parse arguments
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
10 my ($out_contigs_file,
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
11 $out_contigs_stats,
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
12 $out_scaffolds_file,
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
13 $out_scaffolds_stats,
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
14 $out_log_file,
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
15 $new_name,
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
16 @sysargs) = @ARGV;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
17
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
18
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
19 my $output_dir = 'output_dir';
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
20
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
21 # Create log handle
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
22 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
23
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
24 # Run program
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
25 runSpades(@sysargs);
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
26 collectOutput($new_name);
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
27 extractCoverageLength($out_contigs_file, $out_contigs_stats);
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
28 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
29 print $log "Done\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
30 close $log;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
31 exit 0;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
32
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
33 # Run spades
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
34 sub runSpades {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
35 my $cmd = join(" ", @_) . " -o $output_dir";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
36 my $return_code = system($cmd);
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
37 if ($return_code) {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
38 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
39 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
40 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
41 return 0;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
42 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
43
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
44 # Collect output
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
45 sub collectOutput{
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
46 my ($new_name) = @_;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
47
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
48 # To do: check that the files are there
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
49 # Collects output
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
50 if ( not -e "$output_dir/contigs.fasta") {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
51 die "Could not find contigs.fasta file\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
52 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
53 if ( not -e "$output_dir/scaffolds.fasta") {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
54 die "Could not find scaffolds.fasta file\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
55 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
56
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
57 #if a new name is given for the contigs and scaffolds, change them before moving them
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
58 if ( $new_name ne 'NODE') {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
59 renameContigs($new_name);
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
60 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
61 else {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
62 move "$output_dir/contigs.fasta", $out_contigs_file;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
64 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
65
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
66
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
67
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
68 open LOG, '<', "$output_dir/spades.log"
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
69 or die "Cannot open log file $output_dir/spades.log: $?";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
70 print $log $_ while (<LOG>);
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
71 return 0;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
72 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
73
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
74 #Change name in contig and scaffolds file
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
75 sub renameContigs{
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
76 my ($name) = @_;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
77
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
78 open my $in, '<',"$output_dir/contigs.fasta" or die $!;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
79 open my $out,'>', $out_contigs_file;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
80
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
81 while ( my $line = <$in>) {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
82 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
83 #also move the remainder of the length
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
84 if ( $line =~ />NODE_(\d+)_(.+)/) {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
85 $line = ">$name" . "_$1 $2\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
86 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
87 print $out $line;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
88 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
89 close $in;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
90 close $out;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
91
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
92
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
93 open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
94 open $out,'>', $out_scaffolds_file;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
95
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
96 while ( my $line = <$in>) {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
97 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
98 #also move the remainder of the length
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
99 if ( $line =~ />NODE_(\d+)_(.+)/) {
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
100 $line = ">$name" . "_$1 $2\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
101 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
102 print $out $line;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
103 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
104 close $in;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
105 close $out;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
106
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
107 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
108
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
109
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
110 # Extract
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
111 sub extractCoverageLength{
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
112 my ($in, $out) = @_;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
113 open FASTA, '<', $in or die $!;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
114 open TAB, '>', $out or die $!;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
115 print TAB "#name\tlength\tcoverage\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
116 while (<FASTA>){
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
117 next unless /^>/;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
118 chomp;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
119 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/);
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
120 my ($name,$n, $l, $cov) = ($1,$2, $3, $4);
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
121 print TAB "$name" . "_$n\t$l\t$cov\n";
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
122 }
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
123 close TAB;
009c00203195 planemo upload commit 3fd243b12e91a3fe71083376f40e85647c8b892b-dirty
nml
parents:
diff changeset
124 }