# HG changeset patch
# User devteam
# Date 1390832852 18000
# Node ID 0b9feb0ed6286666d885fcb38b10e203cee0d537
Imported from capsule None
diff -r 000000000000 -r 0b9feb0ed628 fastq_trimmer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_trimmer.py Mon Jan 27 09:27:32 2014 -0500
@@ -0,0 +1,41 @@
+#Dan Blankenberg
+import sys
+from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
+
+def main():
+ input_filename = sys.argv[1]
+ output_filename = sys.argv[2]
+ left_offset = sys.argv[3]
+ right_offset = sys.argv[4]
+ percent_offsets = sys.argv[5] == 'offsets_percent'
+ input_type = sys.argv[6] or 'sanger'
+ keep_zero_length = sys.argv[7] == 'keep_zero_length'
+
+ out = fastqWriter( open( output_filename, 'wb' ), format = input_type )
+ num_reads_excluded = 0
+ num_reads = None
+ for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
+ if percent_offsets:
+ left_column_offset = int( round( float( left_offset ) / 100.0 * float( len( fastq_read ) ) ) )
+ right_column_offset = int( round( float( right_offset ) / 100.0 * float( len( fastq_read ) ) ) )
+ else:
+ left_column_offset = int( left_offset )
+ right_column_offset = int( right_offset )
+ if right_column_offset > 0:
+ right_column_offset = -right_column_offset
+ else:
+ right_column_offset = None
+ fastq_read = fastq_read.slice( left_column_offset, right_column_offset )
+ if keep_zero_length or len( fastq_read ):
+ out.write( fastq_read )
+ else:
+ num_reads_excluded += 1
+ out.close()
+ if num_reads is None:
+ print "No valid fastq reads could be processed."
+ else:
+ print "%i fastq reads were processed." % ( num_reads + 1 )
+ if num_reads_excluded:
+ print "%i reads of zero length were excluded from the output." % num_reads_excluded
+
+if __name__ == "__main__": main()
diff -r 000000000000 -r 0b9feb0ed628 fastq_trimmer.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_trimmer.xml Mon Jan 27 09:27:32 2014 -0500
@@ -0,0 +1,123 @@
+
+ by column
+
+ galaxy_sequence_utils
+
+ fastq_trimmer.py '$input_file' '$output_file' '${offset_type['left_column_offset']}' '${offset_type['right_column_offset']}' '${offset_type['base_offset_type']}' '${input_file.extension[len( 'fastq' ):]}' '$keep_zero_length'
+
+
+
+
+
+
+
+
+
+
+ int( float( value ) ) == float( value )
+
+
+
+ int( float( value ) ) == float( value )
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool allows you to trim the ends of reads.
+
+You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer.
+
+For example, if you have a read of length 36::
+
+ @Some FASTQ Sanger Read
+ CAATATGTNCTCACTGATAAGTGGATATNAGCNCCA
+ +
+ =@@.@;B-%?8>CBA@>7@7BBCA4-48%<;;%<B@
+
+And you set absolute offsets of 2 and 9::
+
+ @Some FASTQ Sanger Read
+ ATATGTNCTCACTGATAAGTGGATA
+ +
+ @.@;B-%?8>CBA@>7@7BBCA4-4
+
+Or you set percent offsets of 6% and 20% (corresponds to absolute offsets of 2,7 for a read length of 36)::
+
+ @Some FASTQ Sanger Read
+ ATATGTNCTCACTGATAAGTGGATATN
+ +
+ @.@;B-%?8>CBA@>7@7BBCA4-48%
+
+-----
+
+.. class:: warningmark
+
+Trimming a color space read will cause any adapter base to be lost.
+
+------
+
+**Citation**
+
+If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. <http://www.ncbi.nlm.nih.gov/pubmed/20562416>`_
+
+
+
+
diff -r 000000000000 -r 0b9feb0ed628 test-data/empty_file.dat
diff -r 000000000000 -r 0b9feb0ed628 test-data/fastq_trimmer_out1.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastq_trimmer_out1.fastqsanger Mon Jan 27 09:27:32 2014 -0500
@@ -0,0 +1,8 @@
+@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
+CGTA
++
+NOPQ
+@FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
+ATGC
++
+QPON
diff -r 000000000000 -r 0b9feb0ed628 test-data/sanger_full_range_original_sanger.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sanger_full_range_original_sanger.fastqsanger Mon Jan 27 09:27:32 2014 -0500
@@ -0,0 +1,8 @@
+@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
++
+!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+@FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
+CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
++
+~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"!
diff -r 000000000000 -r 0b9feb0ed628 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Jan 27 09:27:32 2014 -0500
@@ -0,0 +1,6 @@
+
+
+
+
+
+