annotate spades.pl @ 1:a415d510332b draft default tip

Changing version
author nml
date Mon, 07 Nov 2016 16:31:18 -0500
parents 27b90e43e2d8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
1 #!/usr/bin/env perl
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
2 ## A wrapper script to call spades.py and collect its output
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
3 use strict;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
4 use warnings;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
5 use File::Temp qw/ tempfile tempdir /;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
6 use File::Copy;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
7 use Getopt::Long;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
8
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
9 # Parse arguments
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
10 my ($out_contigs_file,
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
11 $out_contigs_stats,
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
12 $out_scaffolds_file,
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
13 $out_scaffolds_stats,
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
14 $out_log_file,
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
15 $new_name,
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
16 @sysargs) = @ARGV;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
17
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
18
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
19 my $output_dir = 'output_dir';
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
20
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
21 # Create log handle
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
22 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
23
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
24 # Run program
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
25 runSpades(@sysargs);
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
26 collectOutput($new_name);
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
27 extractCoverageLength($out_contigs_file, $out_contigs_stats);
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
28 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
29 print $log "Done\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
30 close $log;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
31 exit 0;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
32
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
33 # Run spades
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
34 sub runSpades {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
35 my $cmd = join(" ", @_) . " -o $output_dir";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
36 my $return_code = system($cmd);
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
37 if ($return_code) {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
38 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
39 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
40 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
41 return 0;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
42 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
43
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
44 # Collect output
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
45 sub collectOutput{
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
46 my ($new_name) = @_;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
47
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
48 # To do: check that the files are there
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
49 # Collects output
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
50 if ( not -e "$output_dir/contigs.fasta") {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
51 die "Could not find contigs.fasta file\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
52 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
53 if ( not -e "$output_dir/scaffolds.fasta") {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
54 die "Could not find scaffolds.fasta file\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
55 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
56
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
57 #if a new name is given for the contigs and scaffolds, change them before moving them
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
58 if ( $new_name ne 'NODE') {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
59 renameContigs($new_name);
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
60 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
61 else {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
62 move "$output_dir/contigs.fasta", $out_contigs_file;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
64 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
65
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
66
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
67
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
68 open LOG, '<', "$output_dir/spades.log"
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
69 or die "Cannot open log file $output_dir/spades.log: $?";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
70 print $log $_ while (<LOG>);
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
71 return 0;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
72 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
73
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
74 #Change name in contig and scaffolds file
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
75 sub renameContigs{
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
76 my ($name) = @_;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
77
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
78 open my $in, '<',"$output_dir/contigs.fasta" or die $!;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
79 open my $out,'>', $out_contigs_file;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
80
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
81 while ( my $line = <$in>) {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
82 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
83 #also move the remainder of the length
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
84 if ( $line =~ />NODE_(\d+)_(.+)/) {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
85 $line = ">$name" . "_$1 $2\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
86 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
87 print $out $line;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
88 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
89 close $in;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
90 close $out;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
91
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
92
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
93 open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
94 open $out,'>', $out_scaffolds_file;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
95
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
96 while ( my $line = <$in>) {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
97 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
98 #also move the remainder of the length
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
99 if ( $line =~ />NODE_(\d+)_(.+)/) {
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
100 $line = ">$name" . "_$1 $2\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
101 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
102 print $out $line;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
103 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
104 close $in;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
105 close $out;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
106
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
107 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
108
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
109
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
110 # Extract
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
111 sub extractCoverageLength{
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
112 my ($in, $out) = @_;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
113 open FASTA, '<', $in or die $!;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
114 open TAB, '>', $out or die $!;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
115 print TAB "#name\tlength\tcoverage\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
116 while (<FASTA>){
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
117 next unless /^>/;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
118 chomp;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
119 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)_(component_\d+)/);
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
120 my ($name,$n, $l, $cov,$component) = ($1,$2, $3, $4,$5);
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
121 print TAB "$name" . "_$n" . "_$component\t$l\t$cov\n";
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
122 }
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
123 close TAB;
27b90e43e2d8 planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff changeset
124 }