changeset 0:4d237a31970b default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author konradpaszkiewicz
date Tue, 07 Jun 2011 17:42:21 -0400
parents
children
files README_PREPILLUMINAREADS prepare_pe_reads_for_velvet.sh prepare_pe_reads_for_velvet_fasta.sh preppereads.py preppereads.xml preppereads_fasta.py preppereads_fasta.xml shuffleSequences_fasta.pl shuffleSequences_fastq.pl
diffstat 9 files changed, 288 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README_PREPILLUMINAREADS	Tue Jun 07 17:42:21 2011 -0400
@@ -0,0 +1,12 @@
+#Created 07/01/2011
+#Konrad Paszkiewicz, Exeter Sequencing Service, University of Exeter
+
+The enclosed tools are designed to prepare paired-end Illumina reads for use with the galaxy velvet or velvetoptimiser tools. After sequencing Illumina forward and reverse read files are always in the correct order. However after filtering and other QC steps these can quite often become out of sync. These tools operate on either fasta or fastq data sets and output two files. One contains the paired reads in Velvet's required order (i.e. read 1 followed by read 2), the other contains the reads which have lost their partner. Both files can be input to Velvet using the -shortPaired and -short2 flags respectively.
+
+Prerequisites:
+
+
+
+Improvements:
+
+1. Integration of fastq and fasta into a single tool.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_pe_reads_for_velvet.sh	Tue Jun 07 17:42:21 2011 -0400
@@ -0,0 +1,7 @@
+#!/bin/bash
+##<fastq1 in> <fastq1 out> <fastq2 in> <fastq2 out> <singletons reads out> <pe reads output>
+## $5 and $6 are outputs
+perl /users/galaxy/galaxyscripts/fastq_pe_even.pl $1 $2 $3 $4 $5
+perl /usr/local/velvet/shuffleSequences_fastq.pl $2 $4 $6
+rm -f $2
+rm -f $4 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_pe_reads_for_velvet_fasta.sh	Tue Jun 07 17:42:21 2011 -0400
@@ -0,0 +1,7 @@
+#!/bin/bash
+##<fastq1 in> <fastq1 out> <fastq2 in> <fastq2 out> <singletons reads out> <pe reads output>
+## $5 and $6 are outputs
+perl /usr/local/velvet/contrib/select_paired/select_paired.pl $1 $2 $3 $4 $5
+perl /usr/local/velvet/shuffleSequences_fasta.pl $2 $4 $6
+rm -f $2
+rm -f $4 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preppereads.py	Tue Jun 07 17:42:21 2011 -0400
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+"""
+Classes encapsulating decypher tool.
+James E Johnson - University of Minnesota
+"""
+import pkg_resources;
+import logging, os, string, sys, tempfile, glob, shutil, types, urllib
+import shlex, subprocess
+from optparse import OptionParser, OptionGroup
+from stat import *
+
+
+log = logging.getLogger( __name__ )
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write( "%s\n" % msg )
+    sys.exit()
+
+def __main__():
+    #Parse Command Line
+    s = 'preppereads.py:  argv = %s\n' % (sys.argv)
+    # print >> sys.stderr, s # so will appear as blurb for file
+    argcnt = len(sys.argv)
+    working_dir = sys.argv[1]
+    input1 = sys.argv[2]
+    input2 = sys.argv[3]
+    outpe = sys.argv[4]
+    outsingletons = sys.argv[5]
+    cmdline = '/users/galaxy/galaxyscripts/prepare_pe_reads_for_velvet.sh %s %s %s %s %s %s > /dev/null' % (input1, '1.fastq', input2, '2.fastq', 'single.fq', 'pe.fq')
+    #print >> sys.stderr, cmdline # so will appear as blurb for file
+    try:
+        proc = subprocess.Popen( args=cmdline, shell=True, stderr=subprocess.PIPE )
+        returncode = proc.wait()
+        # get stderr, allowing for case where it's very large
+        stderr = ''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += proc.stderr.read( buffsize )
+                if not stderr or len( stderr ) % buffsize != 0:
+                    break
+        except OverflowError:
+            pass
+        if returncode != 0:
+            raise Exception, stderr
+    except Exception, e:
+        stop_err( 'Error running preppereads.sh ' + str( e ) )
+
+    out = open(outpe,'w')
+    #outpe_path = os.path.join(working_dir,'')
+    for line in open('pe.fq'):
+        out.write( "%s" % (line) )
+    out.close()
+
+    out = open(outsingletons,'w')
+    #singletons_path = os.path.join(working_dir,'')
+    for line in open('single.fq'):
+        out.write( "%s" % (line) )
+    out.close()
+
+if __name__ == "__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preppereads.xml	Tue Jun 07 17:42:21 2011 -0400
@@ -0,0 +1,32 @@
+<tool id="preppereadsforvelet" name="Prepare paired-end reads for use with Velvet (FASTQ)"  version="1.0.0">
+	<description>Prepare paired-end reads for use with Velvet</description>
+	<command interpreter="python">
+	  preppereads.py 
+           '$__app__.config.new_file_path'
+	   '$input1'
+           '$input2'
+           '$outpe'
+	   '$outsingletons'; exit 0
+	</command>
+        <inputs>
+            <param name="input1" type="data" format="illuminafastq" label="Read 1 of paired-end dataset"/>
+            <param name="input2" type="data" format="illuminafastq" label="Read 2 of paired-end dataset"/>
+        </inputs>
+	<outputs>
+                <data format="illuminafastq" name="outpe" label="Merged paired-end reads ready for velvet"/>
+		<data format="illuminafastq" name="outsingletons" label="Singleton reads reads for velvet"/>
+		
+	</outputs>
+	<requirements>
+	</requirements>
+	<help>
+**Paired-end reads preparation for velvet**
+
+Velvet requires paired-end reads to be in the same file in an interleaved FASTQ format. This format specifies that read 1 should be followed immediately by read 2. 
+
+This script is useful to ensure reads are in the correct order prior to passing the data to Velvet.
+
+Singleton reads can also be produced if quality trimming or other filtering criteria have removed the read's mate. These can also be passed to Velvet using a second short read channel. 
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preppereads_fasta.py	Tue Jun 07 17:42:21 2011 -0400
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+"""
+Classes encapsulating decypher tool.
+James E Johnson - University of Minnesota
+"""
+import pkg_resources;
+import logging, os, string, sys, tempfile, glob, shutil, types, urllib
+import shlex, subprocess
+from optparse import OptionParser, OptionGroup
+from stat import *
+
+
+log = logging.getLogger( __name__ )
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write( "%s\n" % msg )
+    sys.exit()
+
+def __main__():
+    #Parse Command Line
+    s = 'preppereads.py:  argv = %s\n' % (sys.argv)
+    # print >> sys.stderr, s # so will appear as blurb for file
+    argcnt = len(sys.argv)
+    working_dir = sys.argv[1]
+    input1 = sys.argv[2]
+    input2 = sys.argv[3]
+    outpe = sys.argv[4]
+    outsingletons = sys.argv[5]
+    cmdline = '/users/galaxy/galaxyscripts/prepare_pe_reads_for_velvet_fasta.sh %s %s %s %s %s %s > /dev/null' % (input1, '1.fastq', input2, '2.fastq', 'single.fq', 'pe.fq')
+    #print >> sys.stderr, cmdline # so will appear as blurb for file
+    try:
+        proc = subprocess.Popen( args=cmdline, shell=True, stderr=subprocess.PIPE )
+        returncode = proc.wait()
+        # get stderr, allowing for case where it's very large
+        stderr = ''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += proc.stderr.read( buffsize )
+                if not stderr or len( stderr ) % buffsize != 0:
+                    break
+        except OverflowError:
+            pass
+        if returncode != 0:
+            raise Exception, stderr
+    except Exception, e:
+        stop_err( 'Error running preppereads.sh ' + str( e ) )
+
+    out = open(outpe,'w')
+    #outpe_path = os.path.join(working_dir,'')
+    for line in open('pe.fq'):
+        out.write( "%s" % (line) )
+    out.close()
+
+    out = open(outsingletons,'w')
+    #singletons_path = os.path.join(working_dir,'')
+    for line in open('single.fq'):
+        out.write( "%s" % (line) )
+    out.close()
+
+if __name__ == "__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preppereads_fasta.xml	Tue Jun 07 17:42:21 2011 -0400
@@ -0,0 +1,32 @@
+<tool id="preppereadsforveletfasta" name="Prepare reads for use with velvet (FASTA)" version="1.0.0">
+	<description>Prepare paired-end reads for use with Velvet</description>
+	<command interpreter="python">
+	  preppereads_fasta.py 
+           '$__app__.config.new_file_path'
+	   '$input1'
+           '$input2'
+           '$outpe'
+	   '$outsingletons'; exit 0
+	</command>
+        <inputs>
+            <param name="input1" type="data" format="fasta" label="Read 1 of paired-end dataset"/>
+            <param name="input2" type="data" format="fasta" label="Read 2 of paired-end dataset"/>
+        </inputs>
+	<outputs>
+                <data format="fasta" name="outpe" label="Merged paired-end reads ready for velvet"/>
+		<data format="fasta" name="outsingletons" label="Singleton reads reads for velvet"/>
+		
+	</outputs>
+	<requirements>
+	</requirements>
+	<help>
+**Paired-end reads preparation for velvet**
+
+Velvet requires paired-end reads to be in the same file in an interleaved FASTQ format. This format specifies that read 1 should be followed immediately by read 2. 
+
+This script is useful to ensure reads are in the correct order prior to passing the data to Velvet.
+
+Singleton reads can also be produced if quality trimming or other filtering criteria have removed the read's mate. These can also be passed to Velvet using a second short read channel. 
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shuffleSequences_fasta.pl	Tue Jun 07 17:42:21 2011 -0400
@@ -0,0 +1,41 @@
+#!/usr/bin/perl
+
+if (!@ARGV) {
+	print "Usage: $0 forward_reads.fa reverse_reaads.fa outfile.fa\n";
+	print "\tforward_reads.fa / reverse_reads.fa : paired reads to be merged\n";
+	print "\toutfile.fa : outfile to be created\n";
+	system.exit(0);	
+}
+
+$filenameA = $ARGV[0];
+$filenameB = $ARGV[1];
+$filenameOut = $ARGV[2];
+
+die "Could not open $filenameA" unless (-e $filenameA);
+die "Could not open $filenameB" unless (-e $filenameB);
+
+open FILEA, "< $filenameA";
+open FILEB, "< $filenameB";
+
+open OUTFILE, "> $filenameOut";
+
+my ($lineA, $lineB);
+
+$lineA = <FILEA>;
+$lineB = <FILEB>;
+
+while(defined $lineA) {
+	print OUTFILE $lineA;
+	$lineA = <FILEA>;
+	while (defined $lineA && $lineA !~ m/>/) { 
+		print OUTFILE $lineA;
+		$lineA = <FILEA>;
+	}
+
+	print OUTFILE $lineB;
+	$lineB = <FILEB>;
+	while (defined $lineB && $lineB !~ m/>/) { 
+		print OUTFILE $lineB;
+		$lineB = <FILEB>;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shuffleSequences_fastq.pl	Tue Jun 07 17:42:21 2011 -0400
@@ -0,0 +1,29 @@
+#!/usr/bin/perl
+
+$filenameA = $ARGV[0];
+$filenameB = $ARGV[1];
+$filenameOut = $ARGV[2];
+
+open $FILEA, "< $filenameA";
+open $FILEB, "< $filenameB";
+
+open $OUTFILE, "> $filenameOut";
+
+while(<$FILEA>) {
+	print $OUTFILE $_;
+	$_ = <$FILEA>;
+	print $OUTFILE $_; 
+	$_ = <$FILEA>;
+	print $OUTFILE $_; 
+	$_ = <$FILEA>;
+	print $OUTFILE $_; 
+
+	$_ = <$FILEB>;
+	print $OUTFILE $_; 
+	$_ = <$FILEB>;
+	print $OUTFILE $_;
+	$_ = <$FILEB>;
+	print $OUTFILE $_;
+	$_ = <$FILEB>;
+	print $OUTFILE $_;
+}