changeset 0:937ba44abdb7 default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author edward-kirton
date Tue, 07 Jun 2011 17:29:43 -0400
parents
children
files minimus2/minimus2.xml minimus2/minimus2_wrapper.pl
diffstat 2 files changed, 187 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/minimus2/minimus2.xml	Tue Jun 07 17:29:43 2011 -0400
@@ -0,0 +1,55 @@
+<tool id="minimus2" name="Minimus2" version='1.0.1'>
+<description>Merge two sets of assembled contig sequences</description>
+<command interpreter='perl'>minimus2_wrapper.pl -tmpdir $contigs_outfile.extra_files_path $infile1 $infile2 $contigs_outfile $singletons_outfile
+#if $prefix1.select == 'y':
+-prefix1 $prefix1.prefix
+#end if
+#if $prefix2.select == 'y':
+-prefix2 $prefix2.prefix
+#end if
+</command>
+<inputs>
+    <param name="infile1" type="data" format="fasta" label="Contig sequences file 1"/>
+    <conditional name='prefix1'>
+        <param name='select' type='select' label='Rename contigs in file 1 by adding prefix?'>
+            <option value='y'>yes, add prefix</option>
+            <option value='n'>no, contig IDs are unique</option>
+        </param>
+        <when value='y'>
+            <param name="prefix" type="text" value="1" label="Prefix for sequences in file 1" />
+        </when>
+        <when value='n'>
+        </when>
+    </conditional>
+    <param name="infile2" type="data" format="fasta" label="Contig sequences file 2"/>
+    <conditional name='prefix2'>
+        <param name='select' type='select' label='Rename contigs in file 2 by adding prefix?'>
+            <option value='y'>yes, add prefix</option>
+            <option value='n'>no, contig IDs are unique</option>
+        </param>
+        <when value='y'>
+            <param name="prefix" type="text" value="2" label="Prefix for sequences in file 2" />
+        </when>
+        <when value='n'>
+        </when>
+    </conditional>
+</inputs>
+<outputs>
+    <data name="contigs_outfile" format="fasta" label="contigs" />
+    <data name="singletons_outfile" format="fasta" label="singletons" />
+</outputs>
+<help>
+**What it does**
+
+minimus2 is part of the AMOS assembler package, designed for merging one or two sets of contig sequences.
+
+This tool preprocesses the Fasta input files prior to coassembly with minimus2 and separately returns the new contigs
+and the sequences which did not co-assemble (singletons).
+
+The input sequences in each dataset must have unique IDs; use of the optional rename with prefix option avoids this.
+
+**Documentation**
+
+http://sourceforge.net/apps/mediawiki/amos/index.php?title=Minimus2
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/minimus2/minimus2_wrapper.pl	Tue Jun 07 17:29:43 2011 -0400
@@ -0,0 +1,132 @@
+#!/usr/bin/env perl
+
+use strict;
+use Getopt::Long;
+use Env qw(TMPDIR TEMPDIR);
+
+my $usage=<<'ENDHERE';
+NAME:
+    minimus2_wrapper.pl
+PURPOSE:
+    To combine two sets of assembled contig sequences. This script wraps Minimus2, part of the AMOS package.
+REQUIRED ARGUMENTS:
+    $1 : infile1 in Fasta format
+    $2 : infile2 in Fasta format
+    $4 : outfile of combined assembly in Fasta format
+    $5 : outfile of singletons in Fasta format
+OPTIONS:
+    -tmpdir <dir> : path of temporary directory to use (optional); tempfiles will be discarded upon completion
+    -prefix1 <string> : rename reads in infile1 using <prefix>.<counter> format
+    -prefix2 <string> : rename reads in infile2 using <prefix>.<counter> format
+NOTE:
+    - Minimus2 will fail if there are duplicate IDs between infile1 and infile2; use prefix options to avoid this.
+ENDHERE
+
+# OPTIONS
+our $tmpdir;
+my ($help,$prefix1,$prefix2);
+GetOptions(
+    'tmpdir=s' => \$tmpdir,
+    'prefix1=s' => \$prefix1,
+    'prefix2=s' => \$prefix2,
+    'help' => \$help
+);
+if ($help) { print $usage; exit; }  
+
+# VALIDATE
+die("Expect exactly four arguments\n") unless @ARGV == 4;
+my ($infile1,$infile2,$contigs_outfile,$singletons_outfile)=@ARGV;
+if ($tmpdir) {
+    unless (-d $tmpdir) {
+        mkdir($tmpdir) or die("Unable to create tmpdir, $tmpdir\n");
+    }
+} elsif ($TMPDIR and -d $TMPDIR) {
+    $tmpdir=$TMPDIR;
+} elsif ($TEMPDIR and -d $TEMPDIR) {
+    $tmpdir=$TEMPDIR;
+} elsif (-d "/tmp") {
+    $tmpdir="/tmp";
+} elsif (-d "/scratch") {
+    $tmpdir="/scratch";
+} else {
+    die("Tmpdir required\n");
+}
+$tmpdir .= "/$$";
+mkdir($tmpdir) or die("Unable to mkdir $tmpdir\n");
+
+# CHECK EXECUTABLES
+my $toAmos=`which toAmos`;
+chomp $toAmos;
+dienice("toAmos executable not found\n") unless $toAmos and -f $toAmos;
+my $minimus2=`which minimus2`;
+chomp $minimus2;
+dienice("minimus2 executable not found\n") unless $minimus2 and -f $minimus2;
+my $deltafilter=`which delta-filter`;
+chomp $deltafilter;
+dienice("delta-filter executable not found\n") unless $deltafilter and -f $deltafilter;
+my $showcoords=`which show-coords`;
+chomp $showcoords;
+dienice("show-coords executable not found\n") unless $showcoords;
+
+# CONCATENATE INFILES
+open(IN1, "<$infile1") or dienice("Unable to open infile1, $infile1\n");
+my $infile="$tmpdir/infile.seq";
+open(OUT, ">$infile") or dienice("Unable to open tmpfile, $infile\n");
+my $n1=0;
+while (<IN1>) {
+    if (/^>/) {
+        ++$n1;
+        if ($prefix1) {
+            print OUT ">$prefix1.$n1\n";
+        } else {
+            print OUT;
+        }
+    } else {
+        print OUT;
+    }
+}
+close IN1;
+dienice("Infile 1 contains no sequences or is not in Fasta format\n") unless $n1;
+my $n2=0;
+open(IN2, "<$infile2") or dienice("Unable to open infile2, $infile2\n");
+while (<IN2>) {
+    if (/^>/) {
+        ++$n2; 
+        if ($prefix2) {
+            print OUT ">$prefix2.$n2\n";
+        } else {
+            print OUT;
+        }
+    } else {
+        print OUT;
+    }
+}
+close IN2;
+close OUT;
+dienice("Infile 2 contains no sequences or is not in Fasta format\n") unless $n2;
+
+# CONVERT FORMAT
+eval { `toAmos -s $infile -o $tmpdir/infile.afg` };
+dienice("ERROR CONVERTING TO AMOS FORMAT\n") if $@;
+
+# CO-ASSEMBLY
+# explicitly defining the delta-filter and show-coords executables is more robust
+eval { `minimus2 $tmpdir/infile -D REFCOUNT=$n1 -D DELTAFILTER=$deltafilter -D SHOWCOORDS=$showcoords` };
+dienice("ERROR EXECUTING MINIMUS2\n") if $@;
+
+# MOVE FILES AND CLEANUP TMPDIR
+eval { `mv $tmpdir/infile.fasta $contigs_outfile` };
+dienice("ERROR MOVING CONTIGS OUTFILE\n") if $@;
+eval { `mv $tmpdir/infile.singletons.seq $singletons_outfile` };
+dienice("ERROR MOVING SINGLETONS OUTFILE\n") if $@;
+eval { `rm -rf $tmpdir` };
+die("ERROR CLEANING UP TEMP DIR\n") if $@;
+exit;
+
+# CLEANUP TEMPFILES BEFORE QUITTING
+sub dienice {
+    my $msg=shift;
+    `rm -rf $tmpdir`;
+    die($msg);
+}
+__END__