view minimus2/minimus2_wrapper.pl @ 0:937ba44abdb7 default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author edward-kirton
date Tue, 07 Jun 2011 17:29:43 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env perl

use strict;
use Getopt::Long;
use Env qw(TMPDIR TEMPDIR);

my $usage=<<'ENDHERE';
NAME:
    minimus2_wrapper.pl
PURPOSE:
    To combine two sets of assembled contig sequences. This script wraps Minimus2, part of the AMOS package.
REQUIRED ARGUMENTS:
    $1 : infile1 in Fasta format
    $2 : infile2 in Fasta format
    $4 : outfile of combined assembly in Fasta format
    $5 : outfile of singletons in Fasta format
OPTIONS:
    -tmpdir <dir> : path of temporary directory to use (optional); tempfiles will be discarded upon completion
    -prefix1 <string> : rename reads in infile1 using <prefix>.<counter> format
    -prefix2 <string> : rename reads in infile2 using <prefix>.<counter> format
NOTE:
    - Minimus2 will fail if there are duplicate IDs between infile1 and infile2; use prefix options to avoid this.
ENDHERE

# OPTIONS
our $tmpdir;
my ($help,$prefix1,$prefix2);
GetOptions(
    'tmpdir=s' => \$tmpdir,
    'prefix1=s' => \$prefix1,
    'prefix2=s' => \$prefix2,
    'help' => \$help
);
if ($help) { print $usage; exit; }  

# VALIDATE
die("Expect exactly four arguments\n") unless @ARGV == 4;
my ($infile1,$infile2,$contigs_outfile,$singletons_outfile)=@ARGV;
if ($tmpdir) {
    unless (-d $tmpdir) {
        mkdir($tmpdir) or die("Unable to create tmpdir, $tmpdir\n");
    }
} elsif ($TMPDIR and -d $TMPDIR) {
    $tmpdir=$TMPDIR;
} elsif ($TEMPDIR and -d $TEMPDIR) {
    $tmpdir=$TEMPDIR;
} elsif (-d "/tmp") {
    $tmpdir="/tmp";
} elsif (-d "/scratch") {
    $tmpdir="/scratch";
} else {
    die("Tmpdir required\n");
}
$tmpdir .= "/$$";
mkdir($tmpdir) or die("Unable to mkdir $tmpdir\n");

# CHECK EXECUTABLES
my $toAmos=`which toAmos`;
chomp $toAmos;
dienice("toAmos executable not found\n") unless $toAmos and -f $toAmos;
my $minimus2=`which minimus2`;
chomp $minimus2;
dienice("minimus2 executable not found\n") unless $minimus2 and -f $minimus2;
my $deltafilter=`which delta-filter`;
chomp $deltafilter;
dienice("delta-filter executable not found\n") unless $deltafilter and -f $deltafilter;
my $showcoords=`which show-coords`;
chomp $showcoords;
dienice("show-coords executable not found\n") unless $showcoords;

# CONCATENATE INFILES
open(IN1, "<$infile1") or dienice("Unable to open infile1, $infile1\n");
my $infile="$tmpdir/infile.seq";
open(OUT, ">$infile") or dienice("Unable to open tmpfile, $infile\n");
my $n1=0;
while (<IN1>) {
    if (/^>/) {
        ++$n1;
        if ($prefix1) {
            print OUT ">$prefix1.$n1\n";
        } else {
            print OUT;
        }
    } else {
        print OUT;
    }
}
close IN1;
dienice("Infile 1 contains no sequences or is not in Fasta format\n") unless $n1;
my $n2=0;
open(IN2, "<$infile2") or dienice("Unable to open infile2, $infile2\n");
while (<IN2>) {
    if (/^>/) {
        ++$n2; 
        if ($prefix2) {
            print OUT ">$prefix2.$n2\n";
        } else {
            print OUT;
        }
    } else {
        print OUT;
    }
}
close IN2;
close OUT;
dienice("Infile 2 contains no sequences or is not in Fasta format\n") unless $n2;

# CONVERT FORMAT
eval { `toAmos -s $infile -o $tmpdir/infile.afg` };
dienice("ERROR CONVERTING TO AMOS FORMAT\n") if $@;

# CO-ASSEMBLY
# explicitly defining the delta-filter and show-coords executables is more robust
eval { `minimus2 $tmpdir/infile -D REFCOUNT=$n1 -D DELTAFILTER=$deltafilter -D SHOWCOORDS=$showcoords` };
dienice("ERROR EXECUTING MINIMUS2\n") if $@;

# MOVE FILES AND CLEANUP TMPDIR
eval { `mv $tmpdir/infile.fasta $contigs_outfile` };
dienice("ERROR MOVING CONTIGS OUTFILE\n") if $@;
eval { `mv $tmpdir/infile.singletons.seq $singletons_outfile` };
dienice("ERROR MOVING SINGLETONS OUTFILE\n") if $@;
eval { `rm -rf $tmpdir` };
die("ERROR CLEANING UP TEMP DIR\n") if $@;
exit;

# CLEANUP TEMPFILES BEFORE QUITTING
sub dienice {
    my $msg=shift;
    `rm -rf $tmpdir`;
    die($msg);
}
__END__