changeset 30:c92ea5a8dec6

planemo upload for repository https://github.com/lparsons/galaxy_tools/tree/master/tools/trtr commit 6f76316d57fa4ec6b16aeed61e197b2c117f006d-dirty
author lparsons
date Mon, 14 Mar 2016 16:18:55 -0400
parents 4e6b9640989f
children 13cad5d36301
files tool-dependencies/trtrR-1.0.0/Makefile tool-dependencies/trtrR-1.0.0/trtr tool-dependencies/trtrR-1.0.0/trtr.c tool-dependencies/v1.0.0.tar.gz trtr-4e6b9640989f.tar.gz trtr.xml
diffstat 6 files changed, 266 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-dependencies/trtrR-1.0.0/Makefile	Mon Mar 14 16:18:55 2016 -0400
@@ -0,0 +1,10 @@
+trtr : trtr.c
+	cc trtr.c -o trtr
+
+clean :
+	rm trtr
+
+
+
+
+
Binary file tool-dependencies/trtrR-1.0.0/trtr has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-dependencies/trtrR-1.0.0/trtr.c	Mon Mar 14 16:18:55 2016 -0400
@@ -0,0 +1,221 @@
+/**********************************************************************
+* Author: Jonathan Richards
+* Date: 12/11/2014
+*
+* This tool removes tandem repeats from ends of unaligned sequencing
+* reads (leaving one copy). This prevents reads that don't span the
+* repeated region from overlapping and leading to innaccurate SNPs
+* calls.
+* 
+* The maximimum repeat length is adjustable (use 1 to trim only
+* homopolymers).
+*
+* The "aggressive" option should not be touched in general. Setting to
+* 0 will prevent the program from trimming to exactly 1 copy of the
+* repeat, instead leaving between 1 and 2 copies. Why this would be
+* useful, I don't know.
+* 
+* This program could also be a useful first step before assembly. More
+* testing needs to be done.
+*
+* Special thanks to my advisor, Professor Alison Gammie, for bringing
+* this problem to my attention and to my reseach partner, Mitchell
+* Vollger, for help testing and finalizing.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <assert.h>
+#include <errno.h>
+
+//use my getline for portability
+//adapted from getline.c written by Jan Brittenson, bson@gnu.ai.mit.edu
+//http://www.opensource.apple.com/source/cvs/cvs-19/cvs/lib/getline.c
+ssize_t getline(char** lineptr, size_t* n, FILE* stream) {
+	size_t nchars_avail;
+	char* read_pos;
+	int save_errno;
+	ssize_t ret;
+	register int c;
+
+	if (!lineptr || !n || !stream) {
+		errno = EINVAL;
+		return -1;
+	}
+	if (!*lineptr) {
+		*n = 128;
+		*lineptr = malloc(*n);
+		if (!*lineptr) {
+			errno = ENOMEM;
+			return -1;
+		}
+	}
+	nchars_avail = *n;
+	read_pos = *lineptr;
+	for (;;) {
+		c = getc(stream);
+		save_errno = errno;
+		if (c != '\r') { //for portability...
+			assert((*lineptr+*n)==(read_pos+nchars_avail));
+			if (nchars_avail < 2) {
+				*n *= 2;
+				nchars_avail = *n + *lineptr - read_pos;
+				*lineptr = realloc(*lineptr, *n);
+				if (!*lineptr) {
+					errno = ENOMEM;
+					return -1;
+				}
+				read_pos = *n - nchars_avail + *lineptr;
+				assert((*lineptr+*n) == (read_pos+nchars_avail));
+			}
+			if (ferror(stream)) {
+				errno = save_errno;
+				return -1;
+			}
+			if (c == EOF) {
+				if (read_pos == *lineptr)
+					return -1;
+				else
+					break;
+			}
+			*read_pos++ = c;
+			nchars_avail--;
+			if (c == '\n')
+				break;
+		}
+	}
+	*read_pos = '\0';
+	ret = read_pos - *lineptr;
+	return ret;
+}
+
+int main(int argc, char *argv[]) {
+	char *line = NULL;
+	size_t len = 0;
+	ssize_t line_length;
+
+	int count = 0;
+	size_t leftTrim = 0;
+	size_t rightTrim = 0;
+	size_t i;
+	size_t i_max = 10;
+	size_t j;
+	size_t r;
+	size_t length;
+	size_t longest_region;
+	char *ptr;
+	bool matched = false;
+	bool aggressive_trim = true;
+	
+	FILE *file = fopen(argv[1], "r");
+
+	if (argc >= 3) {
+		i_max = strtol(argv[2], &ptr, 10);
+		if (argc >= 4) {
+			aggressive_trim = strtol(argv[3], &ptr, 10);
+		}
+	}
+	if (file != NULL) {
+		while ((line_length = getline(&line, &len, file)) != -1) {
+			count++;
+			switch (count) {
+
+				//read name
+				case 1:
+					fputs(line, stdout);
+					break;
+				
+				//read sequence
+				case 2:
+					//find leftTrim
+					longest_region = 0;
+					for (i=1; i<=i_max && i<=line_length/2; i++) { //size of repeat
+						if (line[0] == line[i]) {
+							matched = true;
+							j=1;
+							r=0;
+							while (matched == true) {
+								if (j == i) {
+									r++;
+									j=0;
+								} else if (line[j] != line[(r+1)*i+j]) {
+								//no length comparison needed because of \n at end
+									matched = false;
+									if (aggressive_trim) {
+										length = r*i+j;
+									} else {
+										length = r*i;
+									}
+									if (length > longest_region && r>0) {
+										longest_region = length;
+									}
+								} else {
+									j++;
+								}
+							}
+							
+						}
+					}
+					leftTrim = longest_region;
+
+					//find rightTrim
+					longest_region = 0;
+					for (i=1; i<=i_max && i<=line_length/2; i++) { //size of repeat
+						if (line[line_length-2] == line[line_length-2-i]) {
+							matched = true;
+							j=1;
+							r=0;
+							while (matched == true) {
+								if (j == i) {
+									r++;
+									j=0;
+								} else if ((line[line_length-2-j] != line[line_length-2-(r+1)*i-j]) 
+									|| line_length-2-(r+1)*i-j == leftTrim) {
+									matched = false;
+									if (aggressive_trim) {
+										length = r*i+j;
+									} else {
+										length = r*i;
+									}
+									if (length > longest_region && r>0) {
+										longest_region = length;
+									}
+								} else {
+									j++;
+								}
+							}
+						}
+					}
+					rightTrim = line_length-longest_region-1;
+					
+					//print trimmed line
+					line[rightTrim] = '\n';
+					line[rightTrim+1] = '\0';
+					fputs(line+leftTrim, stdout);
+					break;
+				
+				//+
+				case 3:
+					fputs(line, stdout);
+					break;
+				
+				//read qualities
+				case 4:
+					count = 0; //reset to read title
+					line[rightTrim] = '\n';
+					line[rightTrim+1] = '\0';
+					fputs(line+leftTrim, stdout);
+					break;
+				default:
+					break; 
+			}
+		}
+		free(line);
+		fclose(file);
+	} else {
+		perror(argv[1]);
+	}
+    return 0;
+}
\ No newline at end of file
Binary file tool-dependencies/v1.0.0.tar.gz has changed
Binary file trtr-4e6b9640989f.tar.gz has changed
--- a/trtr.xml	Sat Mar 12 14:50:15 2016 -0500
+++ b/trtr.xml	Mon Mar 14 16:18:55 2016 -0400
@@ -1,43 +1,43 @@
-<tool id="trtr" name="TRTR">
-	<requirements>
-		    <requirement type="package" version="1.0">trtr</requirement>
-	</requirements>
+<tool id="trtr" name="TRTR" version="1.0galaxy1">
+    <description>Trim Reads of Tandem Repeat in a fastq file. </description>
+
+    <requirements>
+        <requirement type="package" version="1.0">trtr</requirement>
+    </requirements>
 
-	<description>Trim Reads of Tandem Repeat in a fastq file. </description>
-	
-	<command>
-		trtr $input $max_repeat $aggressive > $output
-	</command>
-	
-	<inputs>
-		<param format="fastq" name="input" type="data" label="Source file"/>
-		<param name="max_repeat" type="integer" value="10" label="Maximum repeat length" />
-		<param name="aggressive" type="integer" value="1" label="Aggressive? See description."/>
-	</inputs>
-	
-	<outputs>
-		<data format_source="input" name="output" />
-	</outputs>
+    <command>
+        trtr $input $max_repeat $aggressive > $output
+    </command>
+
+    <inputs>
+        <param format="fastq" name="input" type="data" label="Source file"/>
+        <param name="max_repeat" type="integer" value="10" label="Maximum repeat length" />
+        <param name="aggressive" type="integer" value="1" label="Aggressive? See description."/>
+    </inputs>
+
+    <outputs>
+        <data format_source="input" name="output" />
+    </outputs>
 
-  <tests>
-    <test>
-      <param name="input" value="small.fastq"/>
-      <output name="output" file="smallTrimmed.fastq"/>
-    </test>
-    <test>
-      <param name="input" value="medium.fastq"/>
-      <output name="output" file="mediumTrimmed.fastq"/>
-    </test>
-  </tests>
+    <tests>
+        <test>
+            <param name="input" value="small.fastq"/>
+            <output name="output" file="smallTrimmed.fastq"/>
+        </test>
+        <test>
+            <param name="input" value="medium.fastq"/>
+            <output name="output" file="mediumTrimmed.fastq"/>
+        </test>
+    </tests>
 
-  <help>
-This tool removes tandem repeats from ends of unaligned sequencing reads (leaving one copy). This prevents reads that don't span the repeated region from overlapping and leading to innaccurate SNPs calls.
+    <help>
+        This tool removes tandem repeats from ends of unaligned sequencing reads (leaving one copy). This prevents reads that don't span the repeated region from overlapping and leading to innaccurate SNPs calls.
 
-The maximimum repeat length is adjustable (use 1 to trim only homopolymers).
+        The maximimum repeat length is adjustable (use 1 to trim only homopolymers).
 
-The "aggressive" option should not be touched in general. Setting to 0 will prevent the program from trimming to exactly 1 copy of the repeat, instead leaving between 1 and 2 copies.
+        The "aggressive" option should not be touched in general. Setting to 0 will prevent the program from trimming to exactly 1 copy of the repeat, instead leaving between 1 and 2 copies.
 
-This could also be a useful first step before assembly. More testing needs to be done.
-  </help>
+        This could also be a useful first step before assembly. More testing needs to be done.
+    </help>
 
 </tool>