comparison spades.pl @ 0:27b90e43e2d8 draft

planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
author nml
date Mon, 06 Jun 2016 15:13:06 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:27b90e43e2d8
1 #!/usr/bin/env perl
2 ## A wrapper script to call spades.py and collect its output
3 use strict;
4 use warnings;
5 use File::Temp qw/ tempfile tempdir /;
6 use File::Copy;
7 use Getopt::Long;
8
9 # Parse arguments
10 my ($out_contigs_file,
11 $out_contigs_stats,
12 $out_scaffolds_file,
13 $out_scaffolds_stats,
14 $out_log_file,
15 $new_name,
16 @sysargs) = @ARGV;
17
18
19 my $output_dir = 'output_dir';
20
21 # Create log handle
22 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
23
24 # Run program
25 runSpades(@sysargs);
26 collectOutput($new_name);
27 extractCoverageLength($out_contigs_file, $out_contigs_stats);
28 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
29 print $log "Done\n";
30 close $log;
31 exit 0;
32
33 # Run spades
34 sub runSpades {
35 my $cmd = join(" ", @_) . " -o $output_dir";
36 my $return_code = system($cmd);
37 if ($return_code) {
38 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
39 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
40 }
41 return 0;
42 }
43
44 # Collect output
45 sub collectOutput{
46 my ($new_name) = @_;
47
48 # To do: check that the files are there
49 # Collects output
50 if ( not -e "$output_dir/contigs.fasta") {
51 die "Could not find contigs.fasta file\n";
52 }
53 if ( not -e "$output_dir/scaffolds.fasta") {
54 die "Could not find scaffolds.fasta file\n";
55 }
56
57 #if a new name is given for the contigs and scaffolds, change them before moving them
58 if ( $new_name ne 'NODE') {
59 renameContigs($new_name);
60 }
61 else {
62 move "$output_dir/contigs.fasta", $out_contigs_file;
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
64 }
65
66
67
68 open LOG, '<', "$output_dir/spades.log"
69 or die "Cannot open log file $output_dir/spades.log: $?";
70 print $log $_ while (<LOG>);
71 return 0;
72 }
73
74 #Change name in contig and scaffolds file
75 sub renameContigs{
76 my ($name) = @_;
77
78 open my $in, '<',"$output_dir/contigs.fasta" or die $!;
79 open my $out,'>', $out_contigs_file;
80
81 while ( my $line = <$in>) {
82 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
83 #also move the remainder of the length
84 if ( $line =~ />NODE_(\d+)_(.+)/) {
85 $line = ">$name" . "_$1 $2\n";
86 }
87 print $out $line;
88 }
89 close $in;
90 close $out;
91
92
93 open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
94 open $out,'>', $out_scaffolds_file;
95
96 while ( my $line = <$in>) {
97 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
98 #also move the remainder of the length
99 if ( $line =~ />NODE_(\d+)_(.+)/) {
100 $line = ">$name" . "_$1 $2\n";
101 }
102 print $out $line;
103 }
104 close $in;
105 close $out;
106
107 }
108
109
110 # Extract
111 sub extractCoverageLength{
112 my ($in, $out) = @_;
113 open FASTA, '<', $in or die $!;
114 open TAB, '>', $out or die $!;
115 print TAB "#name\tlength\tcoverage\n";
116 while (<FASTA>){
117 next unless /^>/;
118 chomp;
119 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)_(component_\d+)/);
120 my ($name,$n, $l, $cov,$component) = ($1,$2, $3, $4,$5);
121 print TAB "$name" . "_$n" . "_$component\t$l\t$cov\n";
122 }
123 close TAB;
124 }