# HG changeset patch # User edward-kirton # Date 1307482183 14400 # Node ID 937ba44abdb72a4b784a42d568ef40d0d386071e Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository diff -r 000000000000 -r 937ba44abdb7 minimus2/minimus2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/minimus2/minimus2.xml Tue Jun 07 17:29:43 2011 -0400 @@ -0,0 +1,55 @@ + +Merge two sets of assembled contig sequences +minimus2_wrapper.pl -tmpdir $contigs_outfile.extra_files_path $infile1 $infile2 $contigs_outfile $singletons_outfile +#if $prefix1.select == 'y': +-prefix1 $prefix1.prefix +#end if +#if $prefix2.select == 'y': +-prefix2 $prefix2.prefix +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +minimus2 is part of the AMOS assembler package, designed for merging one or two sets of contig sequences. + +This tool preprocesses the Fasta input files prior to coassembly with minimus2 and separately returns the new contigs +and the sequences which did not co-assemble (singletons). + +The input sequences in each dataset must have unique IDs; use of the optional rename with prefix option avoids this. + +**Documentation** + +http://sourceforge.net/apps/mediawiki/amos/index.php?title=Minimus2 + + diff -r 000000000000 -r 937ba44abdb7 minimus2/minimus2_wrapper.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/minimus2/minimus2_wrapper.pl Tue Jun 07 17:29:43 2011 -0400 @@ -0,0 +1,132 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Long; +use Env qw(TMPDIR TEMPDIR); + +my $usage=<<'ENDHERE'; +NAME: + minimus2_wrapper.pl +PURPOSE: + To combine two sets of assembled contig sequences. This script wraps Minimus2, part of the AMOS package. +REQUIRED ARGUMENTS: + $1 : infile1 in Fasta format + $2 : infile2 in Fasta format + $4 : outfile of combined assembly in Fasta format + $5 : outfile of singletons in Fasta format +OPTIONS: + -tmpdir : path of temporary directory to use (optional); tempfiles will be discarded upon completion + -prefix1 : rename reads in infile1 using . format + -prefix2 : rename reads in infile2 using . format +NOTE: + - Minimus2 will fail if there are duplicate IDs between infile1 and infile2; use prefix options to avoid this. +ENDHERE + +# OPTIONS +our $tmpdir; +my ($help,$prefix1,$prefix2); +GetOptions( + 'tmpdir=s' => \$tmpdir, + 'prefix1=s' => \$prefix1, + 'prefix2=s' => \$prefix2, + 'help' => \$help +); +if ($help) { print $usage; exit; } + +# VALIDATE +die("Expect exactly four arguments\n") unless @ARGV == 4; +my ($infile1,$infile2,$contigs_outfile,$singletons_outfile)=@ARGV; +if ($tmpdir) { + unless (-d $tmpdir) { + mkdir($tmpdir) or die("Unable to create tmpdir, $tmpdir\n"); + } +} elsif ($TMPDIR and -d $TMPDIR) { + $tmpdir=$TMPDIR; +} elsif ($TEMPDIR and -d $TEMPDIR) { + $tmpdir=$TEMPDIR; +} elsif (-d "/tmp") { + $tmpdir="/tmp"; +} elsif (-d "/scratch") { + $tmpdir="/scratch"; +} else { + die("Tmpdir required\n"); +} +$tmpdir .= "/$$"; +mkdir($tmpdir) or die("Unable to mkdir $tmpdir\n"); + +# CHECK EXECUTABLES +my $toAmos=`which toAmos`; +chomp $toAmos; +dienice("toAmos executable not found\n") unless $toAmos and -f $toAmos; +my $minimus2=`which minimus2`; +chomp $minimus2; +dienice("minimus2 executable not found\n") unless $minimus2 and -f $minimus2; +my $deltafilter=`which delta-filter`; +chomp $deltafilter; +dienice("delta-filter executable not found\n") unless $deltafilter and -f $deltafilter; +my $showcoords=`which show-coords`; +chomp $showcoords; +dienice("show-coords executable not found\n") unless $showcoords; + +# CONCATENATE INFILES +open(IN1, "<$infile1") or dienice("Unable to open infile1, $infile1\n"); +my $infile="$tmpdir/infile.seq"; +open(OUT, ">$infile") or dienice("Unable to open tmpfile, $infile\n"); +my $n1=0; +while () { + if (/^>/) { + ++$n1; + if ($prefix1) { + print OUT ">$prefix1.$n1\n"; + } else { + print OUT; + } + } else { + print OUT; + } +} +close IN1; +dienice("Infile 1 contains no sequences or is not in Fasta format\n") unless $n1; +my $n2=0; +open(IN2, "<$infile2") or dienice("Unable to open infile2, $infile2\n"); +while () { + if (/^>/) { + ++$n2; + if ($prefix2) { + print OUT ">$prefix2.$n2\n"; + } else { + print OUT; + } + } else { + print OUT; + } +} +close IN2; +close OUT; +dienice("Infile 2 contains no sequences or is not in Fasta format\n") unless $n2; + +# CONVERT FORMAT +eval { `toAmos -s $infile -o $tmpdir/infile.afg` }; +dienice("ERROR CONVERTING TO AMOS FORMAT\n") if $@; + +# CO-ASSEMBLY +# explicitly defining the delta-filter and show-coords executables is more robust +eval { `minimus2 $tmpdir/infile -D REFCOUNT=$n1 -D DELTAFILTER=$deltafilter -D SHOWCOORDS=$showcoords` }; +dienice("ERROR EXECUTING MINIMUS2\n") if $@; + +# MOVE FILES AND CLEANUP TMPDIR +eval { `mv $tmpdir/infile.fasta $contigs_outfile` }; +dienice("ERROR MOVING CONTIGS OUTFILE\n") if $@; +eval { `mv $tmpdir/infile.singletons.seq $singletons_outfile` }; +dienice("ERROR MOVING SINGLETONS OUTFILE\n") if $@; +eval { `rm -rf $tmpdir` }; +die("ERROR CLEANING UP TEMP DIR\n") if $@; +exit; + +# CLEANUP TEMPFILES BEFORE QUITTING +sub dienice { + my $msg=shift; + `rm -rf $tmpdir`; + die($msg); +} +__END__