Mercurial > repos > nml > plasmidspades
view spades.pl @ 0:27b90e43e2d8 draft
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
author | nml |
---|---|
date | Mon, 06 Jun 2016 15:13:06 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env perl ## A wrapper script to call spades.py and collect its output use strict; use warnings; use File::Temp qw/ tempfile tempdir /; use File::Copy; use Getopt::Long; # Parse arguments my ($out_contigs_file, $out_contigs_stats, $out_scaffolds_file, $out_scaffolds_stats, $out_log_file, $new_name, @sysargs) = @ARGV; my $output_dir = 'output_dir'; # Create log handle open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; # Run program runSpades(@sysargs); collectOutput($new_name); extractCoverageLength($out_contigs_file, $out_contigs_stats); extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); print $log "Done\n"; close $log; exit 0; # Run spades sub runSpades { my $cmd = join(" ", @_) . " -o $output_dir"; my $return_code = system($cmd); if ($return_code) { print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; } return 0; } # Collect output sub collectOutput{ my ($new_name) = @_; # To do: check that the files are there # Collects output if ( not -e "$output_dir/contigs.fasta") { die "Could not find contigs.fasta file\n"; } if ( not -e "$output_dir/scaffolds.fasta") { die "Could not find scaffolds.fasta file\n"; } #if a new name is given for the contigs and scaffolds, change them before moving them if ( $new_name ne 'NODE') { renameContigs($new_name); } else { move "$output_dir/contigs.fasta", $out_contigs_file; move "$output_dir/scaffolds.fasta", $out_scaffolds_file; } open LOG, '<', "$output_dir/spades.log" or die "Cannot open log file $output_dir/spades.log: $?"; print $log $_ while (<LOG>); return 0; } #Change name in contig and scaffolds file sub renameContigs{ my ($name) = @_; open my $in, '<',"$output_dir/contigs.fasta" or die $!; open my $out,'>', $out_contigs_file; while ( my $line = <$in>) { #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. #also move the remainder of the length if ( $line =~ />NODE_(\d+)_(.+)/) { $line = ">$name" . "_$1 $2\n"; } print $out $line; } close $in; close $out; open $in, '<',"$output_dir/scaffolds.fasta" or die $!; open $out,'>', $out_scaffolds_file; while ( my $line = <$in>) { #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. #also move the remainder of the length if ( $line =~ />NODE_(\d+)_(.+)/) { $line = ">$name" . "_$1 $2\n"; } print $out $line; } close $in; close $out; } # Extract sub extractCoverageLength{ my ($in, $out) = @_; open FASTA, '<', $in or die $!; open TAB, '>', $out or die $!; print TAB "#name\tlength\tcoverage\n"; while (<FASTA>){ next unless /^>/; chomp; die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)_(component_\d+)/); my ($name,$n, $l, $cov,$component) = ($1,$2, $3, $4,$5); print TAB "$name" . "_$n" . "_$component\t$l\t$cov\n"; } close TAB; }