changeset 4:c70137414dcd draft

sickle v1.33
author nikhil-joshi
date Wed, 23 Jul 2014 18:35:10 -0400
parents f6ebdaca9925
children 91f11cf03fa3
files LICENSE Makefile README.md sickle.xml sickle/LICENSE sickle/README.md sickle/sickle sickle/sickle.xml src/kseq.h src/print_record.c src/print_record.h src/sickle.c src/sickle.h src/sliding.c src/trim_paired.c src/trim_single.c
diffstat 16 files changed, 1766 insertions(+), 241 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,19 @@
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use, copy,
+modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,45 @@
+PROGRAM_NAME = sickle
+VERSION = 1.33
+CC = gcc
+CFLAGS = -Wall -pedantic -DVERSION=$(VERSION)
+DEBUG = -g
+OPT = -O3
+ARCHIVE = $(PROGRAM_NAME)_$(VERSION)
+LDFLAGS=
+LIBS = -lz
+SDIR = src
+
+.PHONY: clean default build distclean dist debug
+
+default: build
+
+sliding.o: $(SDIR)/sliding.c $(SDIR)/kseq.h $(SDIR)/sickle.h
+	$(CC) $(CFLAGS) $(OPT) -c $(SDIR)/$*.c
+
+trim_single.o: $(SDIR)/trim_single.c $(SDIR)/sickle.h $(SDIR)/kseq.h
+	$(CC) $(CFLAGS) $(OPT) -c $(SDIR)/$*.c
+
+trim_paired.o: $(SDIR)/trim_paired.c $(SDIR)/sickle.h $(SDIR)/kseq.h
+	$(CC) $(CFLAGS) $(OPT) -c $(SDIR)/$*.c
+
+sickle.o: $(SDIR)/sickle.c $(SDIR)/sickle.h
+	$(CC) $(CFLAGS) $(OPT) -c $(SDIR)/$*.c
+
+print_record.o: $(SDIR)/print_record.c $(SDIR)/print_record.h
+	$(CC) $(CFLAGS) $(OPT) -c $(SDIR)/$*.c
+
+clean:
+	rm -rf *.o $(SDIR)/*.gch ./sickle
+
+distclean: clean
+	rm -rf *.tar.gz
+
+dist:
+	tar -zcf $(ARCHIVE).tar.gz src Makefile README.md sickle.xml LICENSE
+
+build: sliding.o trim_single.o trim_paired.o sickle.o print_record.o
+	$(CC) $(CFLAGS) $(LDFLAGS) $(OPT) $? -o sickle $(LIBS)
+
+debug:
+	$(MAKE) build "CFLAGS=-Wall -pedantic -g -DDEBUG"
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,136 @@
+# sickle - A windowed adaptive trimming tool for FASTQ files using quality
+
+## About
+
+Most modern sequencing technologies produce reads that have
+deteriorating quality towards the 3'-end and some towards the 5'-end
+as well. Incorrectly called bases in both regions negatively impact
+assembles, mapping, and downstream bioinformatics analyses.
+
+Sickle is a tool that uses sliding windows along with quality and
+length thresholds to determine when quality is sufficiently low to
+trim the 3'-end of reads and also determines when the quality is
+sufficiently high enough to trim the 5'-end of reads.  It will also
+discard reads based upon the length threshold.  It takes the quality
+values and slides a window across them whose length is 0.1 times the
+length of the read.  If this length is less than 1, then the window is
+set to be equal to the length of the read.  Otherwise, the window
+slides along the quality values until the average quality in the
+window rises above the threshold, at which point the algorithm
+determines where within the window the rise occurs and cuts the read
+and quality there for the 5'-end cut.  Then when the average quality
+in the window drops below the threshold, the algorithm determines
+where in the window the drop occurs and cuts both the read and quality
+strings there for the 3'-end cut.  However, if the length of the
+remaining sequence is less than the minimum length threshold, then the
+read is discarded entirely (or replaced with an "N" record). 5'-end 
+trimming can be disabled.
+
+Sickle supports three types of quality values: Illumina, Solexa, and
+Sanger. Note that the Solexa quality setting is an approximation (the
+actual conversion is a non-linear transformation). The end
+approximation is close. Illumina quality refers to qualities encoded
+with the CASAVA pipeline between versions 1.3 and 1.7.  Illumina
+quality using CASAVA >= 1.8 is Sanger encoded.
+
+Note that Sickle will remove the 2nd fastq record header (on the "+"
+line) and replace it with simply a "+". This is the default format for
+CASAVA >= 1.8.
+
+Sickle also supports gzipped file inputs and optional gzipped outputs. By default,
+Sickle will produce regular (i.e. not gzipped) output, regardless of the input.
+Sickle also has an option to truncate reads with Ns at the first N position.
+
+There is also a sickle.xml file included in the package that can be used to add sickle to your
+local [Galaxy](http://galaxy.psu.edu/) server.
+
+## Citation
+Sickle doesn't have a paper, but you can cite it like this:
+
+    Joshi NA, Fass JN. (2011). Sickle: A sliding-window, adaptive, quality-based trimming tool for FastQ files 
+    (Version 1.33) [Software].  Available at https://github.com/najoshi/sickle.
+
+## Requirements 
+
+Sickle requires a C compiler; GCC or clang are recommended. Sickle
+relies on Heng Li's kseq.h, which is bundled with the source.
+
+Sickle also requires Zlib, which can be obtained at
+<http://www.zlib.net/>.
+
+## Building and Installing Sickle
+
+To build Sickle, enter:
+
+    make
+
+Then, copy or move "sickle" to a directory in your $PATH.
+
+## Usage
+
+Sickle has two modes to work with both paired-end and single-end
+reads: `sickle se` and `sickle pe`.
+
+Running sickle by itself will print the help:
+
+    sickle
+
+Running sickle with either the "se" or "pe" commands will give help
+specific to those commands:
+
+    sickle se
+    sickle pe
+
+### Sickle Single End (`sickle se`)
+
+`sickle se` takes an input fastq file and outputs a trimmed version of
+that file.  It also has options to change the length and quality
+thresholds for trimming, as well as disabling 5'-trimming and enabling
+truncation of sequences with Ns.
+
+#### Examples
+
+    sickle se -f input_file.fastq -t illumina -o trimmed_output_file.fastq
+    sickle se -f input_file.fastq -t illumina -o trimmed_output_file.fastq -q 33 -l 40
+    sickle se -f input_file.fastq -t illumina -o trimmed_output_file.fastq -x -n
+    sickle se -t sanger -g -f input_file.fastq -o trimmed_output_file.fastq.gz
+
+### Sickle Paired End (`sickle pe`)
+
+`sickle pe` can operate with two types of input.  First, it can take
+two paired-end files as input and outputs two trimmed paired-end files
+as well as a "singles" file.  The second form starts with a single
+combined input file of reads where you have already interleaved the
+reads from the sequencer.  In this form, you also supply a single
+output file name as well as a "singles" file.  The "singles" file
+contains reads that passed filter in either the forward or reverse
+direction, but not the other.  Finally, there is an option (-M) to only 
+produce one interleaved output file where any reads that did not pass 
+filter will be output as a FastQ record with a single "N" (whose quality 
+value is the lowest possible based upon the quality type), thus 
+preserving the paired nature of the data.  You can also change the length 
+and quality thresholds for trimming, as well as disable 5'-trimming and 
+enable truncation of sequences with Ns.
+
+#### Examples
+
+    sickle pe -f input_file1.fastq -r input_file2.fastq -t sanger \
+    -o trimmed_output_file1.fastq -p trimmed_output_file2.fastq \
+    -s trimmed_singles_file.fastq
+
+    sickle pe -f input_file1.fastq -r input_file2.fastq -t sanger \
+    -o trimmed_output_file1.fastq -p trimmed_output_file2.fastq \
+    -s trimmed_singles_file.fastq -q 12 -l 15
+
+    sickle pe -f input_file1.fastq -r input_file2.fastq -t sanger \
+    -o trimmed_output_file1.fastq -p trimmed_output_file2.fastq \
+    -s trimmed_singles_file.fastq -n
+
+    sickle pe -c combo.fastq -t sanger -m combo_trimmed.fastq \
+    -s trimmed_singles_file.fastq -n
+
+    sickle pe -t sanger -g -f input_file1.fastq -r input_file2.fastq \
+    -o trimmed_output_file1.fastq.gz -p trimmed_output_file2.fastq.gz \
+    -s trimmed_singles_file.fastq.gz
+
+    sickle pe -c combo.fastq -t sanger -M combo_trimmed_all.fastq
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sickle.xml	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,253 @@
+<tool id="sickle" name="Sickle" version="1.33">
+	<description>Windowed Adaptive Trimming of FastQ data</description>
+
+	<command>
+	sickle
+
+        #if str($readtype.single_or_paired) == "se":
+        se -f $input_single -o $output_single
+
+            #if $input_single.ext == "fastq":
+            -t sanger
+            #else if $input_single.ext == "fastqsanger":
+            -t sanger
+            #else if $input_single.ext == "fastqillumina":
+            -t illumina
+            #else if $input_single.ext == "fastqsolexa":
+            -t solexa
+            #end if
+
+		#end if
+
+        #if str($readtype.single_or_paired) == "pe_combo":
+            #if $readtype.output_n:
+            pe -c $input_combo -M $output_combo
+            #else
+            pe -c $input_combo -m $output_combo -s $output_combo_single
+            #end if
+
+            #if $input_combo.ext == "fastq":
+            -t sanger
+            #else if $input_combo.ext == "fastqsanger":
+            -t sanger
+            #else if $input_combo.ext == "fastqillumina":
+            -t illumina
+            #else if $input_combo.ext == "fastqsolexa":
+            -t solexa
+            #end if
+
+        #end if
+
+		#if str($readtype.single_or_paired) == "pe_sep":
+		pe -f $input_paired1 -r $input_paired2 -o $output_paired1 -p $output_paired2 -s $output_paired_single
+
+                #if $input_paired1.ext == "fastq":
+                -t sanger
+                #else if $input_paired1.ext == "fastqsanger":
+                -t sanger
+                #else if $input_paired1.ext == "fastqillumina":
+                -t illumina
+                #else if $input_paired1.ext == "fastqsolexa":
+                -t solexa
+                #end if
+
+		#end if
+
+		#if str($qual_threshold) != "":
+		-q $qual_threshold
+		#end if
+
+		#if str($length_threshold) != "":
+		-l $length_threshold
+		#end if
+
+		#if $no_five_prime:
+		-x
+		#end if
+
+		#if $trunc_n:
+		-n
+		#end if
+
+		--quiet
+	</command>
+
+	<inputs>
+		<conditional name="readtype">
+			<param name="single_or_paired" type="select" optional="false" label="Single-End or Paired-End reads?" help="Note: Sickle will infer the quality type of the file from its datatype.  I.e., if the datatype is fastqsanger, then the quality type is sanger.  The default is fastqsanger.">
+				<option value="se" selected="true">Single-End</option>
+				<option value="pe_combo">Paired-End (one interleaved input file)</option>
+                <option value="pe_sep">Paired-End (two separate input files)</option>
+			</param>
+
+			<when value="se">
+				<param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_single" type="data" optional="false" label="Single-End FastQ Reads"/>
+			</when>
+
+            <when value="pe_combo">
+                <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_combo" type="data" optional="false" label="Paired-End Interleaved FastQ Reads"/>
+                <param name="output_n" type="boolean" label="Output only one file with all reads" help="This will output only one file with all the reads, where the reads that did not pass filter will be replaced with a single 'N', rather than discarded."/>
+            </when>
+
+			<when value="pe_sep">
+				<param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired1" type="data" optional="false" label="Paired-End Forward Strand FastQ Reads"/>
+				<param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired2" type="data" optional="false" label="Paired-End Reverse Strand FastQ Reads"/>
+			</when>
+		</conditional>
+
+		<param name="qual_threshold" value="20" type="integer" optional="true" label="Quality Threshold">
+			<validator type="in_range" min="0" message="Minimum value is 0"/>
+		</param>
+
+		<param name="length_threshold" value="20" type="integer" optional="true" label="Length Threshold">
+			<validator type="in_range" min="0" message="Minimum value is 0"/>
+		</param>
+
+		<param name="no_five_prime" type="boolean" label="Don't do 5' trimming"/>
+		<param name="trunc_n" type="boolean" label="Truncate sequences with Ns at first N position"/>
+	</inputs>
+
+	<outputs>
+		<data format_source="input_single" name="output_single" label="Single-End output of ${tool.name} on ${on_string}">
+		<filter>(readtype['single_or_paired'] == 'se')</filter>
+		</data>
+
+        <data format_source="input_combo" name="output_combo" label="Paired-End interleaved output of ${tool.name} on ${on_string}">
+        <filter>(readtype['single_or_paired'] == 'pe_combo')</filter>
+        </data>
+
+        <data format_source="input_combo" name="output_combo_single" label="Singletons from Paired-End interleaved output of ${tool.name} on ${on_string}">
+        <filter>(readtype['single_or_paired'] == 'pe_combo')</filter>
+        <filter>(readtype['output_n'] == False)</filter>
+        </data>
+
+		<data format_source="input_paired1" name="output_paired1" label="Paired-End forward strand output of ${tool.name} on ${on_string}">
+		<filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
+		</data>
+
+		<data format_source="input_paired2" name="output_paired2" label="Paired-End reverse strand output of ${tool.name} on ${on_string}">
+		<filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
+		</data>
+
+		<data format_source="input_paired1" name="output_paired_single" label="Singletons from Paired-End output of ${tool.name} on ${on_string}">
+		<filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
+		</data>
+	</outputs>
+
+	<help>
+**Sickle - A windowed adaptive trimming tool for FASTQ files using quality**
+
+.. class:: infomark
+
+**About**
+
+Most modern sequencing technologies produce reads that have
+deteriorating quality towards the 3'-end and some towards the 5'-end
+as well. Incorrectly called bases in both regions negatively impact
+assembles, mapping, and downstream bioinformatics analyses.
+
+Sickle is a tool that uses sliding windows along with quality and
+length thresholds to determine when quality is sufficiently low to
+trim the 3'-end of reads and also determines when the quality is
+sufficiently high enough to trim the 5'-end of reads.  It will also
+discard reads based upon the length threshold.  It takes the quality
+values and slides a window across them whose length is 0.1 times the
+length of the read.  If this length is less than 1, then the window is
+set to be equal to the length of the read.  Otherwise, the window
+slides along the quality values until the average quality in the
+window rises above the threshold, at which point the algorithm
+determines where within the window the rise occurs and cuts the read
+and quality there for the 5'-end cut.  Then when the average quality
+in the window drops below the threshold, the algorithm determines
+where in the window the drop occurs and cuts both the read and quality
+strings there for the 3'-end cut.  However, if the length of the
+remaining sequence is less than the minimum length threshold, then the
+read is discarded entirely (or replaced with an "N" record). 5'-end
+trimming can be disabled. Sickle also has an option to truncate reads 
+with Ns at the first N position.
+
+Sickle supports three types of quality values: Illumina, Solexa, and
+Sanger. Note that the Solexa quality setting is an approximation (the
+actual conversion is a non-linear transformation). The end
+approximation is close. Illumina quality refers to qualities encoded
+with the CASAVA pipeline between versions 1.3 and 1.7.  Illumina
+quality using CASAVA >= 1.8 is Sanger encoded. The quality value will
+be determined from the datatype of the data, i.e. a fastqsanger datatype
+is assumed to be Sanger encoded.
+
+Note that Sickle will remove the 2nd fastq record header (on the "+"
+line) and replace it with simply a "+". This is the default format for
+CASAVA >= 1.8.
+
+-----
+
+.. class:: infomark
+
+**Options**
+
+**Single-end**
+
+This option takes one single-end input file and outputs one single-end 
+output file of reads that passed the filters.
+
+**Paired-End (one interleaved input file)**
+
+This option takes as input one interleaved paired-end file. If you then
+check the "Output only one file with all reads" checkbox, it will output
+one interleaved file where any read that did not pass filter will be replaced 
+with a FastQ record where the sequence is a single "N" and the quality is the 
+lowest quality possible for that quality type. This will preserve the paired 
+nature of the data. If you leave the checkbox unchecked, it will output two files,
+one interleaved file with all the passed pairs and one singletons file where only 
+one of the pair passed filter.
+
+**Paired-End (two separate input files)**
+
+This option takes two separate (forward and reverse) paired-end files as input. 
+The output is three files: Two paired-end files with pairs that passed filter and
+a singletons file where only one of the pair passed filter.
+
+**Quality threshold**
+
+Input your desired quality threshold. This threshold is phred-scaled, which is typically
+values between 0-41 for FastQ data.
+
+**Length threshold**
+
+Input your desired length threshold. This is the threshold to determine if a read is kept
+after all the trimming steps are done.
+
+**Disable 5-prime trimming**
+
+An option to disable trimming the read on the 5-prime end. This trimming trims the read 
+if the average quality values dip below the quality threshold at the 5-prime end.
+
+**Truncate sequences with Ns**
+
+This option will trim a read at the first "N" base in the read after doing quality trimming. 
+It is then still subject to the length threshold.
+
+-----
+
+.. class:: infomark
+
+**Citation**
+
+Sickle doesn't have a paper, but you can cite it like this::
+
+    Joshi NA, Fass JN. (2011). Sickle: A sliding-window, adaptive, quality-based trimming tool for FastQ files
+    (Version 1.33) [Software].  Available at https://github.com/najoshi/sickle.
+
+-----
+
+Copyright: Nikhil Joshi
+
+http://bioinformatics.ucdavis.edu
+
+http://github.com/ucdavis-bioinformatics
+
+http://github.com/najoshi
+
+	</help>
+
+</tool>
--- a/sickle/LICENSE	Thu Sep 05 19:20:33 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation
-files (the "Software"), to deal in the Software without
-restriction, including without limitation the rights to use, copy,
-modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/sickle/README.md	Thu Sep 05 19:20:33 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-# sickle - A windowed adaptive trimming tool for FASTQ files using quality
-
-## About
-
-Most modern sequencing technologies produce reads that have
-deteriorating quality towards the 3'-end and some towards the 5'-end as well. Incorrectly called bases
-in both regions negatively impact assembles, mapping, and downstream
-bioinformatics analyses.
-
-Sickle is a tool that uses sliding windows along with quality and
-length thresholds to determine when quality is sufficiently low to
-trim the 3'-end of reads and also determines when the quality is
-sufficiently high enough to trim the 5'-end of reads.  It will also discard reads based upon the
-length threshold.  It takes the quality values and slides a window
-across them whose length is 0.1 times the length of the read.  If this
-length is less than 1, then the window is set to be equal to the
-length of the read.  Otherwise, the window slides along the quality
-values until the average quality in the window rises above the threshold, at 
-which point the algorithm determines where within the window the rise occurs
-and cuts the read and quality there for the 5'-end cut.  Then when the average quality 
-in the window drops below the threshold, the algorithm determines where in the window
-the drop occurs and cuts both the read and quality strings there for the 3'-end cut.
-However, if the length of the remaining sequence is less than the minimum length threshold,
-then the read is discarded entirely.  5'-end trimming can be disabled.
-
-Sickle also has an option to discard reads with any Ns in them.
-
-Sickle supports three types of quality values: Illumina, Solexa, 
-and Sanger. Note that the Solexa quality setting is an approximation
-(the actual conversion is a non-linear transformation). The end
-approximation is close. Illumina quality refers to qualities encoded
-with the CASAVA pipeline between versions 1.3 and 1.7.  Illumina quality
-using CASAVA >= 1.8 is Sanger encoded.
-
-Note that Sickle will remove the 2nd fastq record header (on the "+" line) and replace it
-with simply a "+". This is the default format for CASAVA >= 1.8.
-
-Sickle also supports gzipped file inputs. There is also a sickle.xml file
-included in the package that can be used to add sickle to your local [Galaxy](http://galaxy.psu.edu/) server.
-
-## Requirements 
-
-Sickle requires a C compiler; GCC or clang are recommended. Sickle
-relies on Heng Li's kseq.h, which is bundled with the source.
-
-Sickle also requires Zlib, which can be obtained at
-<http://www.zlib.net/>.
-
-## Building and Installing Sickle
-
-To build Sickle, enter:
-
-    make
-
-Then, copy or move "sickle" to a directory in your $PATH.
-
-## Usage
-
-Sickle has two modes to work with both paired-end and single-end
-reads: `sickle se` and `sickle pe`.
-
-Running sickle by itself will print the help:
-
-    sickle
-
-Running sickle with either the "se" or "pe" commands will give help
-specific to those commands:
-
-    sickle se
-    sickle pe
-
-### Sickle Single End (`sickle se`)
-
-`sickle se` takes an input fastq file and outputs a trimmed version of
-that file.  It also has options to change the length and quality
-thresholds for trimming, as well as disabling 5'-trimming and enabling removal
-of sequences with Ns.
-
-#### Examples
-
-    sickle se -f input_file.fastq -t illumina -o trimmed_output_file.fastq
-    sickle se -f input_file.fastq -t illumina -o trimmed_output_file.fastq -q 33 -l 40
-	sickle se -f input_file.fastq -t illumina -o trimmed_output_file.fastq -x -n
-
-### Sickle Paired End (`sickle pe`)
-
-`sickle pe` takes two paired-end files as input and outputs two
-trimmed paired-end files as well as a "singles" file.  The "singles"
-file contains reads that passed filter in one of the paired-end files
-but not the other.  You can also change the length and quality
-thresholds for trimming, as well as disable 5'-trimming and enable removal
-of sequences with Ns.
-
-#### Examples
-
-    sickle pe -f input_file1.fastq -r input_file2.fastq -t sanger \
-    -o trimmed_output_file1.fastq -p trimmed_output_file2.fastq \
-    -s trimmed_singles_file.fastq
-
-    sickle pe -f input_file1.fastq -r input_file2.fastq -t sanger \
-    -o trimmed_output_file1.fastq -p trimmed_output_file2.fastq \
-    -s trimmed_singles_file.fastq -q 12 -l 15
-
-	sickle pe -f input_file1.fastq -r input_file2.fastq -t sanger \
-	-o trimmed_output_file1.fastq -p trimmed_output_file2.fastq \
-	-s trimmed_singles_file.fastq -n
-
Binary file sickle/sickle has changed
--- a/sickle/sickle.xml	Thu Sep 05 19:20:33 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-<tool id="sickle" name="Sickle">
-	<description>Windowed Adaptive Trimming of FastQ data</description>
-
-	<command>
-		sickle $readtype.single_or_paired --quiet
-
-		#if str($readtype.single_or_paired) == "se":
-		-f $input_single -o $output_single
-
-		#if $input_single.ext == "fastq":
-		-t sanger
-		#else if $input_single.ext == "fastqsanger":
-		-t sanger
-		#else if $input_single.ext == "fastqillumina":
-		-t illumina
-		#else if $input_single.ext == "fastqsolexa":
-		-t solexa
-		#end if
-
-		#end if
-
-		#if str($readtype.single_or_paired) == "pe":
-		-f $input_paired1 -r $input_paired2 -o $output_paired1 -p $output_paired2 -s $output_paired_single
-
-                #if $input_paired1.ext == "fastq":
-                -t sanger
-                #else if $input_paired1.ext == "fastqsanger":
-                -t sanger
-                #else if $input_paired1.ext == "fastqillumina":
-                -t illumina
-                #else if $input_paired1.ext == "fastqsolexa":
-                -t solexa
-                #end if
-
-		#end if
-
-		#if str($qual_threshold) != "":
-		-q $qual_threshold
-		#end if
-
-		#if str($length_threshold) != "":
-		-l $length_threshold
-		#end if
-
-		#if $no_five_prime:
-		-x
-		#end if
-
-		#if $discard_n:
-		-n
-		#end if
-	</command>
-
-	<inputs>
-		<conditional name="readtype">
-			<param name="single_or_paired" type="select" optional="false" label="Single-End or Paired-End reads?" help="Note: Sickle will infer the quality type of the file from its datatype.  I.e., if the datatype is fastqsanger, then the quality type is sanger.  The default is fastqsanger.">
-				<option value="se" selected="true">Single-End</option>
-				<option value="pe">Paired-End</option>
-			</param>
-
-			<when value="se">
-				<param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_single" type="data" optional="false" label="Single-End FastQ Reads"/>
-			</when>
-
-			<when value="pe">
-				<param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired1" type="data" optional="false" label="Paired-End Forward Strand FastQ Reads"/>
-				<param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired2" type="data" optional="false" label="Paired-End Reverse Strand FastQ Reads"/>
-			</when>
-		</conditional>
-
-		<param name="qual_threshold" value="20" type="integer" optional="true" label="Quality Threshold">
-			<validator type="in_range" min="0" message="Minimum value is 0"/>
-		</param>
-
-		<param name="length_threshold" value="20" type="integer" optional="true" label="Length Threshold">
-			<validator type="in_range" min="0" message="Minimum value is 0"/>
-		</param>
-
-		<param name="no_five_prime" type="boolean" label="Don't do 5' trimming"/>
-		<param name="discard_n" type="boolean" label="Discard sequences with Ns"/>
-	</inputs>
-
-	<outputs>
-		<data format_source="input_single" name="output_single" label="Single-End output of ${tool.name} on ${on_string}">
-		<filter>(readtype['single_or_paired'] == 'se')</filter>
-		</data>
-
-		<data format_source="input_paired1" name="output_paired1" label="Paired-End forward strand output of ${tool.name} on ${on_string}">
-		<filter>(readtype['single_or_paired'] == 'pe')</filter>
-		</data>
-
-		<data format_source="input_paired2" name="output_paired2" label="Paired-End reverse strand output of ${tool.name} on ${on_string}">
-		<filter>(readtype['single_or_paired'] == 'pe')</filter>
-		</data>
-
-		<data format_source="input_paired1" name="output_paired_single" label="Singletons from Paired-End output of ${tool.name} on ${on_string}">
-		<filter>(readtype['single_or_paired'] == 'pe')</filter>
-		</data>
-	</outputs>
-
-	<help>
-Most modern sequencing technologies produce reads that have deteriorating quality towards the 3'-end and some towards the 5'-end as well. Incorrectly called bases in both regions negatively impact assembles, mapping, and downstream bioinformatics analyses.
-
-Sickle is a tool that uses sliding windows along with quality and length thresholds to determine when quality is sufficiently low to trim the 3'-end of reads and also determines when the quality is sufficiently high enough to trim the 5'-end of reads. It will also discard reads based upon the length threshold. It takes the quality values and slides a window across them whose length is 0.1 times the length of the read. If this length is less than 1, then the window is set to be equal to the length of the read. Otherwise, the window slides along the quality values until the average quality in the window rises above the threshold, at which point the algorithm determines where within the window the rise occurs and cuts the read and quality there for the 5'-end cut. Then when the average quality in the window drops below the threshold, the algorithm determines where in the window the drop occurs and cuts both the read and quality strings there for the 3'-end cut. However, if the length of the remaining sequence is less than the minimum length threshold, then the read is discarded entirely. 5'-end trimming can be disabled.
-
-Sickle also has an option to discard reads with any Ns in them.
-
-Sickle supports three types of quality values: Illumina, Solexa, and Sanger. Note that the Solexa quality setting is an approximation (the actual conversion is a non-linear transformation). The end approximation is close. Illumina quality refers to qualities encoded with the CASAVA pipeline between versions 1.3 and 1.7. Illumina quality using CASAVA >= 1.8 is Sanger encoded.  Sickle will get the quality type from the datatype of the file.
-
-Note that Sickle will remove the 2nd fastq record header (on the "+" line) and replace it with simply a "+". This is the default format for CASAVA >= 1.8.
-
-Sickle also supports gzipped file inputs.
-	</help>
-
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/kseq.h	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,223 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+/* Last Modified: 12APR2009 */
+
+#ifndef AC_KSEQ_H
+#define AC_KSEQ_H
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define KS_SEP_SPACE 0 /* isspace(): \t, \n, \v, \f, \r */
+#define KS_SEP_TAB   1 /* isspace() && !' ' */
+#define KS_SEP_MAX   1
+
+#define __KS_TYPE(type_t)                       \
+  typedef struct __kstream_t {                  \
+    char *buf;                                  \
+    int begin, end, is_eof;                     \
+    type_t f;                                   \
+  } kstream_t;
+
+#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
+#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
+
+#define __KS_BASIC(type_t, __bufsize)                                   \
+  static inline kstream_t *ks_init(type_t f)                            \
+	{                                                               \
+          kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));	\
+          ks->f = f;                                                    \
+          ks->buf = (char*)malloc(__bufsize);                           \
+          return ks;                                                    \
+	}                                                               \
+        static inline void ks_destroy(kstream_t *ks)                    \
+	{                                                               \
+          if (ks) {                                                     \
+          free(ks->buf);                                                \
+          free(ks);                                                     \
+        }                                                               \
+          }
+
+#define __KS_GETC(__read, __bufsize)                                    \
+  static inline int ks_getc(kstream_t *ks)				\
+	{                                                               \
+          if (ks->is_eof && ks->begin >= ks->end) return -1;            \
+          if (ks->begin >= ks->end) {                                   \
+          ks->begin = 0;                                                \
+          ks->end = __read(ks->f, ks->buf, __bufsize);                  \
+          if (ks->end < __bufsize) ks->is_eof = 1;                      \
+          if (ks->end == 0) return -1;					\
+        }                                                               \
+          return (int)ks->buf[ks->begin++];                             \
+	}
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	size_t l, m;
+	char *s;
+} kstring_t;
+#endif
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#define __KS_GETUNTIL(__read, __bufsize)                                \
+  static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
+  {                                                                     \
+   if (dret) *dret = 0;                                                 \
+   str->l = 0;                                                          \
+   if (ks->begin >= ks->end && ks->is_eof) return -1;                   \
+   for (;;) {                                                           \
+             int i;                                                     \
+             if (ks->begin >= ks->end) {                                \
+               if (!ks->is_eof) {                                       \
+               ks->begin = 0;                                           \
+               ks->end = __read(ks->f, ks->buf, __bufsize);             \
+               if (ks->end < __bufsize) ks->is_eof = 1;                 \
+               if (ks->end == 0) break;                                 \
+             } else break;                                              \
+             }                                                          \
+               if (delimiter > KS_SEP_MAX) {                            \
+               for (i = ks->begin; i < ks->end; ++i)                    \
+                 if (ks->buf[i] == delimiter) break;                    \
+             } else if (delimiter == KS_SEP_SPACE) {                    \
+               for (i = ks->begin; i < ks->end; ++i)                    \
+                 if (isspace(ks->buf[i])) break;                        \
+             } else if (delimiter == KS_SEP_TAB) {                      \
+               for (i = ks->begin; i < ks->end; ++i)                    \
+                 if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break;   \
+             } else i = 0; /* never come to here! */                    \
+               if (str->m - str->l < i - ks->begin + 1) {               \
+               str->m = str->l + (i - ks->begin) + 1;                   \
+               kroundup32(str->m);                                      \
+               str->s = (char*)realloc(str->s, str->m);                 \
+             }                                                          \
+               memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
+               str->l = str->l + (i - ks->begin);                       \
+               ks->begin = i + 1;                                       \
+               if (i < ks->end) {                                       \
+               if (dret) *dret = ks->buf[i];                            \
+               break;                                                   \
+             }                                                          \
+             }                                                          \
+               if (str->l == 0) {                                       \
+               str->m = 1;                                              \
+               str->s = (char*)calloc(1, 1);                            \
+             }                                                          \
+               str->s[str->l] = '\0';                                   \
+               return str->l;                                           \
+               }
+
+#define KSTREAM_INIT(type_t, __read, __bufsize)                         \
+  __KS_TYPE(type_t)							\
+  __KS_BASIC(type_t, __bufsize)                                         \
+  __KS_GETC(__read, __bufsize)                                          \
+  __KS_GETUNTIL(__read, __bufsize)
+
+#define __KSEQ_BASIC(type_t)                                            \
+  static inline kseq_t *kseq_init(type_t fd)                            \
+  {                                                                     \
+   kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));                      \
+   s->f = ks_init(fd);                                                  \
+   return s;                                                            \
+   }                                                                    \
+  static inline void kseq_rewind(kseq_t *ks)                            \
+  {                                                                     \
+   ks->last_char = 0;                                                   \
+   ks->f->is_eof = ks->f->begin = ks->f->end = 0;                       \
+   }                                                                    \
+  static inline void kseq_destroy(kseq_t *ks)                           \
+  {                                                                     \
+   if (!ks) return;                                                     \
+   free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
+   ks_destroy(ks->f);                                                   \
+   free(ks);                                                            \
+   }
+
+/* Return value:
+   >=0  length of the sequence (normal)
+   -1   end-of-file
+   -2   truncated quality string
+*/
+#define __KSEQ_READ                                                     \
+  static int kseq_read(kseq_t *seq)                                     \
+  {                                                                     \
+   int c;                                                               \
+   kstream_t *ks = seq->f;                                              \
+   if (seq->last_char == 0) { /* then jump to the next header line */   \
+                             while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
+                             if (c == -1) return -1; /* end of file */  \
+                             seq->last_char = c;                        \
+                             } /* the first header char has been read */ \
+   seq->comment.l = seq->seq.l = seq->qual.l = 0;                       \
+   if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;               \
+   if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0);              \
+   while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
+     if (isgraph(c)) { /* printable non-space character */              \
+     if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */        \
+     seq->seq.m = seq->seq.l + 2;                                       \
+     kroundup32(seq->seq.m); /* rounded to next closest 2^k */          \
+     seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m);               \
+   }                                                                    \
+     seq->seq.s[seq->seq.l++] = (char)c;                                \
+   }                                                                    \
+   }                                                                    \
+     if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
+     seq->seq.s[seq->seq.l] = 0;	/* null terminated string */    \
+     if (c != '+') return seq->seq.l; /* FASTA */                       \
+     if (seq->qual.m < seq->seq.m) {	/* allocate enough memory */	\
+     seq->qual.m = seq->seq.m;                                          \
+     seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);		\
+   }                                                                    \
+     while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
+     if (c == -1) return -2; /* we should not stop here */              \
+     while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)        \
+       if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
+     seq->qual.s[seq->qual.l] = 0; /* null terminated string */		\
+     seq->last_char = 0;	/* we have not come to the next header line */ \
+     if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
+     return seq->seq.l;                                                 \
+     }
+
+#define __KSEQ_TYPE(type_t)						\
+  typedef struct {							\
+    kstring_t name, comment, seq, qual;                                 \
+    int last_char;							\
+    kstream_t *f;							\
+  } kseq_t;
+
+#define KSEQ_INIT(type_t, __read)                                       \
+  KSTREAM_INIT(type_t, __read, 4096)                                    \
+  __KSEQ_TYPE(type_t)							\
+  __KSEQ_BASIC(type_t)                                                  \
+  __KSEQ_READ
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/print_record.c	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,42 @@
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <zlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "sickle.h"
+#include "kseq.h"
+
+
+void print_record (FILE *fp, kseq_t *fqr, cutsites *cs) {
+    fprintf(fp, "@%s", fqr->name.s);
+    if (fqr->comment.l) fprintf(fp, " %s\n", fqr->comment.s);
+    else fprintf(fp, "\n");
+    fprintf(fp, "%.*s\n", cs->three_prime_cut - cs->five_prime_cut, fqr->seq.s + cs->five_prime_cut);
+    fprintf(fp, "+\n");
+    fprintf(fp, "%.*s\n", cs->three_prime_cut - cs->five_prime_cut, fqr->qual.s + cs->five_prime_cut);
+}
+
+void print_record_gzip (gzFile fp, kseq_t *fqr, cutsites *cs) {
+    gzprintf(fp, "@%s", fqr->name.s);
+    if (fqr->comment.l) gzprintf(fp, " %s\n", fqr->comment.s);
+    else gzprintf(fp, "\n");
+    gzprintf(fp, "%.*s\n", cs->three_prime_cut - cs->five_prime_cut, fqr->seq.s + cs->five_prime_cut);
+    gzprintf(fp, "+\n");
+    gzprintf(fp, "%.*s\n", cs->three_prime_cut - cs->five_prime_cut, fqr->qual.s + cs->five_prime_cut);
+}
+
+void print_record_N (FILE *fp, kseq_t *fqr, int qualtype) {
+    fprintf(fp, "@%s", fqr->name.s);
+    if (fqr->comment.l) fprintf(fp, " %s\n", fqr->comment.s);
+    else fprintf(fp, "\n");
+    fprintf(fp, "N\n+\n%c\n", quality_constants[qualtype][Q_MIN]);
+}
+
+void print_record_N_gzip (gzFile fp, kseq_t *fqr, int qualtype) {
+    gzprintf(fp, "@%s", fqr->name.s);
+    if (fqr->comment.l) gzprintf(fp, " %s\n", fqr->comment.s);
+    else gzprintf(fp, "\n");
+    gzprintf(fp, "N\n+\n%c\n", quality_constants[qualtype][Q_MIN]);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/print_record.h	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,13 @@
+#ifndef PRINT_RECORD_H
+#define PRINT_RECORD_H
+
+#include <stdio.h>
+#include <zlib.h>
+#include "kseq.h"
+
+void print_record (FILE *fp, kseq_t *fqr, cutsites *cs);
+void print_record_gzip (gzFile fp, kseq_t *fqr, cutsites *cs);
+void print_record_N (FILE *fp, kseq_t *fqr, int qualtype);
+void print_record_N_gzip (gzFile fp, kseq_t *fqr, int qualtype);
+
+#endif /* PRINT_RECORD_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sickle.c	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,54 @@
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <zlib.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <string.h>
+#include "sickle.h"
+
+void main_usage (int status) {
+
+	fprintf (stdout, "\nUsage: %s <command> [options]\n\
+\n\
+Command:\n\
+pe\tpaired-end sequence trimming\n\
+se\tsingle-end sequence trimming\n\
+\n\
+--help, display this help and exit\n\
+--version, output version information and exit\n\n", PROGRAM_NAME);
+
+	exit (status);
+}
+
+int main (int argc, char *argv[]) {
+	int retval=0;
+
+	if (argc < 2 || (strcmp (argv[1],"pe") != 0 && strcmp (argv[1],"se") != 0 && strcmp (argv[1],"--version") != 0 && strcmp (argv[1],"--help") != 0)) {
+		main_usage (EXIT_FAILURE);
+	}
+
+	if (strcmp (argv[1],"--version") == 0) {
+		fprintf(stdout, "%s version %0.2f\nCopyright (c) 2011 The Regents of University of California, Davis Campus.\n%s is free software and comes with ABSOLUTELY NO WARRANTY.\nDistributed under the MIT License.\n\nWritten by %s\n", PROGRAM_NAME, VERSION, PROGRAM_NAME, AUTHORS);
+
+		exit (EXIT_SUCCESS);
+
+	}
+
+	else if (strcmp (argv[1],"--help") == 0) {
+		main_usage (EXIT_SUCCESS);
+	}
+
+	else if (strcmp (argv[1],"pe") == 0) {
+		retval = paired_main (argc, argv);
+		return (retval);
+	}
+
+	else if (strcmp (argv[1],"se") == 0) {
+		retval = single_main (argc, argv);
+		return (retval);
+	}
+
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sickle.h	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,101 @@
+#ifndef SICKLE_H
+#define SICKLE_H
+
+#include <limits.h>
+#include <zlib.h>
+#include "kseq.h"
+
+
+/* KSEQ_INIT() cannot be called here, because we only need the types
+   defined. Calling KSEQ_INIT() would also define functions, leading
+   to an unused function warning with GCC. So, the basic typedefs
+   kseq.h has are included here, and each file that reads needs:
+
+   __KS_GETC(gzread, BUFFER_SIZE)
+   __KS_GETUNTIL(gzread, BUFFER_SIZE)
+   __KSEQ_READ
+
+*/
+
+#define BUFFER_SIZE 4096
+__KS_TYPE(gzFile)
+__KS_BASIC(gzFile, BUFFER_SIZE)
+__KSEQ_TYPE(gzFile)
+__KSEQ_BASIC(gzFile)
+
+#ifndef PROGRAM_NAME
+#define PROGRAM_NAME "sickle"
+#endif
+
+#ifndef AUTHORS
+#define AUTHORS "Nikhil Joshi, UC Davis Bioinformatics Core\n"
+#endif
+
+#ifndef VERSION
+#define VERSION 0.0
+#endif
+
+/* Options drawn from GNU's coreutils/src/system.h */
+/* These options are defined so as to avoid conflicting with option
+values used by commands */
+enum {
+  GETOPT_HELP_CHAR = (CHAR_MIN - 2),
+  GETOPT_VERSION_CHAR = (CHAR_MIN - 3)
+};
+#define GETOPT_HELP_OPTION_DECL \
+"help", no_argument, NULL, GETOPT_HELP_CHAR
+#define GETOPT_VERSION_OPTION_DECL \
+"version", no_argument, NULL, GETOPT_VERSION_CHAR
+#define case_GETOPT_HELP_CHAR(Usage_call) \
+case GETOPT_HELP_CHAR: \
+Usage_call(EXIT_SUCCESS, NULL); \
+break;
+#define case_GETOPT_VERSION_CHAR(Program_name, Version, Authors) \
+case GETOPT_VERSION_CHAR: \
+fprintf(stdout, "%s version %0.3f\nCopyright (c) 2011 The Regents " \
+"of University of California, Davis Campus.\n" \
+"%s is free software and comes with ABSOLUTELY NO WARRANTY.\n"\
+"Distributed under the MIT License.\n\nWritten by %s\n", \
+Program_name, Version, Program_name, Authors); \
+exit(EXIT_SUCCESS); \
+break;
+/* end code drawn from system.h */
+
+typedef enum {
+  PHRED,
+  SANGER,
+  SOLEXA,
+  ILLUMINA
+} quality_type;
+
+static const char typenames[4][10] = {
+	{"Phred"},
+	{"Sanger"},
+	{"Solexa"},
+	{"Illumina"}
+};
+
+#define Q_OFFSET 0
+#define Q_MIN 1
+#define Q_MAX 2
+
+static const int quality_constants[4][3] = {
+  /* offset, min, max */
+  {0, 4, 60}, /* PHRED */
+  {33, 33, 126}, /* SANGER */
+  {64, 58, 112}, /* SOLEXA; this is an approx; the transform is non-linear */
+  {64, 64, 110} /* ILLUMINA */
+};
+
+typedef struct __cutsites_ {
+    int five_prime_cut;
+	int three_prime_cut;
+} cutsites;
+
+
+/* Function Prototypes */
+int single_main (int argc, char *argv[]);
+int paired_main (int argc, char *argv[]);
+cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug);
+
+#endif /*SICKLE_H*/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sliding.c	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,137 @@
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <zlib.h>
+#include <stdio.h>
+#include <getopt.h>
+#include "sickle.h"
+#include "kseq.h"
+
+int get_quality_num (char qualchar, int qualtype, kseq_t *fqrec, int pos) {
+  /* 
+     Return the adjusted quality, depending on quality type.
+
+     Note that this uses the array in sickle.h, which *approximates*
+     the SOLEXA (pre-1.3 pipeline) qualities as linear. This is
+     inaccurate with low-quality bases.
+  */
+
+  int qual_value = (int) qualchar;
+
+  if (qual_value < quality_constants[qualtype][Q_MIN] || qual_value > quality_constants[qualtype][Q_MAX]) {
+	fprintf (stderr, "ERROR: Quality value (%d) does not fall within correct range for %s encoding.\n", qual_value, typenames[qualtype]);
+	fprintf (stderr, "Range for %s encoding: %d-%d\n", typenames[qualtype], quality_constants[qualtype][Q_MIN], quality_constants[qualtype][Q_MAX]);
+	fprintf (stderr, "FastQ record: %s\n", fqrec->name.s);
+	fprintf (stderr, "Quality string: %s\n", fqrec->qual.s);
+	fprintf (stderr, "Quality char: '%c'\n", qualchar);
+	fprintf (stderr, "Quality position: %d\n", pos+1);
+	exit(1);
+  }
+
+  return (qual_value - quality_constants[qualtype][Q_OFFSET]);
+}
+
+
+cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug) {
+
+	int window_size = (int) (0.1 * fqrec->seq.l);
+	int i,j;
+	int window_start=0;
+	int window_total=0;
+	int three_prime_cut = fqrec->seq.l;
+	int five_prime_cut = 0;
+	int found_five_prime = 0;
+	double window_avg;
+	cutsites* retvals;
+    char *npos;
+
+	/* discard if the length of the sequence is less than the length threshold */
+    if (fqrec->seq.l < length_threshold) {
+		retvals = (cutsites*) malloc (sizeof(cutsites));
+		retvals->three_prime_cut = -1;
+		retvals->five_prime_cut = -1;
+		return (retvals);
+	}
+
+	/* if the seq length is less then 10bp, */
+	/* then make the window size the length of the seq */
+	if (window_size == 0) window_size = fqrec->seq.l;
+
+	for (i=0; i<window_size; i++) {
+		window_total += get_quality_num (fqrec->qual.s[i], qualtype, fqrec, i);
+	}
+
+	for (i=0; i <= fqrec->qual.l - window_size; i++) {
+
+		window_avg = (double)window_total / (double)window_size;
+
+        if (debug) printf ("no_fiveprime: %d, found 5prime: %d, window_avg: %f\n", no_fiveprime, found_five_prime, window_avg);
+
+		/* Finding the 5' cutoff */
+		/* Find when the average quality in the window goes above the threshold starting from the 5' end */
+		if (no_fiveprime == 0 && found_five_prime == 0 && window_avg >= qual_threshold) {
+
+        if (debug) printf ("inside 5-prime cut\n");
+
+			/* at what point in the window does the quality go above the threshold? */
+			for (j=window_start; j<window_start+window_size; j++) {
+				if (get_quality_num (fqrec->qual.s[j], qualtype, fqrec, j) >= qual_threshold) {
+					five_prime_cut = j;
+					break;
+				}
+			}
+
+            if (debug) printf ("five_prime_cut: %d\n", five_prime_cut);
+
+			found_five_prime = 1;
+		}
+
+		/* Finding the 3' cutoff */
+		/* if the average quality in the window is less than the threshold */
+		/* or if the window is the last window in the read */
+		if ((window_avg < qual_threshold || 
+			window_start+window_size > fqrec->qual.l) && (found_five_prime == 1 || no_fiveprime)) {
+
+			/* at what point in the window does the quality dip below the threshold? */
+			for (j=window_start; j<window_start+window_size; j++) {
+				if (get_quality_num (fqrec->qual.s[j], qualtype, fqrec, j) < qual_threshold) {
+					three_prime_cut = j;
+					break;
+				}
+			}
+
+			break;
+		}
+
+		/* instead of sliding the window, subtract the first qual and add the next qual */
+		window_total -= get_quality_num (fqrec->qual.s[window_start], qualtype, fqrec, window_start);
+		if (window_start+window_size < fqrec->qual.l) {
+			window_total += get_quality_num (fqrec->qual.s[window_start+window_size], qualtype, fqrec, window_start+window_size);
+		}
+		window_start++;
+	}
+
+
+    /* If truncate N option is selected, and sequence has Ns, then */
+    /* change 3' cut site to be the base before the first N */
+    if (trunc_n && ((npos = strstr(fqrec->seq.s, "N")) || (npos = strstr(fqrec->seq.s, "n")))) {
+        three_prime_cut = npos - fqrec->seq.s;
+    }
+
+    /* if cutting length is less than threshold then return -1 for both */
+    /* to indicate that the read should be discarded */
+    /* Also, if you never find a five prime cut site, then discard whole read */
+    if ((found_five_prime == 0 && !no_fiveprime) || (three_prime_cut - five_prime_cut < length_threshold)) {
+        three_prime_cut = -1;
+        five_prime_cut = -1;
+
+        if (debug) printf("%s\n", fqrec->name.s);
+    }
+
+    if (debug) printf ("\n\n");
+
+	retvals = (cutsites*) malloc (sizeof(cutsites));
+	retvals->three_prime_cut = three_prime_cut;
+	retvals->five_prime_cut = five_prime_cut;
+	return (retvals);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/trim_paired.c	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,517 @@
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <zlib.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <unistd.h>
+#include "sickle.h"
+#include "kseq.h"
+#include "print_record.h"
+
+__KS_GETC(gzread, BUFFER_SIZE)
+__KS_GETUNTIL(gzread, BUFFER_SIZE)
+__KSEQ_READ
+
+int paired_qual_threshold = 20;
+int paired_length_threshold = 20;
+
+static struct option paired_long_options[] = {
+    {"qual-type", required_argument, 0, 't'},
+    {"pe-file1", optional_argument, 0, 'f'},
+    {"pe-file2", optional_argument, 0, 'r'},
+    {"pe-combo", optional_argument, 0, 'c'},
+    {"output-pe1", optional_argument, 0, 'o'},
+    {"output-pe2", optional_argument, 0, 'p'},
+    {"output-single", optional_argument, 0, 's'},
+    {"output-combo", optional_argument, 0, 'm'},
+    {"qual-threshold", optional_argument, 0, 'q'},
+    {"length-threshold", optional_argument, 0, 'l'},
+    {"no-fiveprime", optional_argument, 0, 'x'},
+    {"truncate-n", optional_argument, 0, 'n'},
+    {"gzip-output", optional_argument, 0, 'g'},
+    {"output-combo-all", optional_argument, 0, 'M'},
+    {"quiet", optional_argument, 0, 'z'},
+    {GETOPT_HELP_OPTION_DECL},
+    {GETOPT_VERSION_OPTION_DECL},
+    {NULL, 0, NULL, 0}
+};
+
+void paired_usage (int status, char *msg) {
+
+    fprintf(stderr, "\nIf you have separate files for forward and reverse reads:\n");
+    fprintf(stderr, "Usage: %s pe [options] -f <paired-end forward fastq file> -r <paired-end reverse fastq file> -t <quality type> -o <trimmed PE forward file> -p <trimmed PE reverse file> -s <trimmed singles file>\n\n", PROGRAM_NAME);
+    fprintf(stderr, "If you have one file with interleaved forward and reverse reads:\n");
+    fprintf(stderr, "Usage: %s pe [options] -c <interleaved input file> -t <quality type> -m <interleaved trimmed paired-end output> -s <trimmed singles file>\n\n\
+If you have one file with interleaved reads as input and you want ONLY one interleaved file as output:\n\
+Usage: %s pe [options] -c <interleaved input file> -t <quality type> -M <interleaved trimmed output>\n\n", PROGRAM_NAME, PROGRAM_NAME);
+    fprintf(stderr, "Options:\n\
+Paired-end separated reads\n\
+--------------------------\n\
+-f, --pe-file1, Input paired-end forward fastq file (Input files must have same number of records)\n\
+-r, --pe-file2, Input paired-end reverse fastq file\n\
+-o, --output-pe1, Output trimmed forward fastq file\n\
+-p, --output-pe2, Output trimmed reverse fastq file. Must use -s option.\n\n\
+Paired-end interleaved reads\n\
+----------------------------\n");
+    fprintf(stderr,"-c, --pe-combo, Combined (interleaved) input paired-end fastq\n\
+-m, --output-combo, Output combined (interleaved) paired-end fastq file. Must use -s option.\n\
+-M, --output-combo-all, Output combined (interleaved) paired-end fastq file with any discarded read written to output file as a single N. Cannot be used with the -s option.\n\n\
+Global options\n\
+--------------\n\
+-t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n");
+    fprintf(stderr, "-s, --output-single, Output trimmed singles fastq file\n\
+-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\
+-l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\
+-x, --no-fiveprime, Don't do five prime trimming.\n\
+-n, --truncate-n, Truncate sequences at position of first N.\n");
+
+
+    fprintf(stderr, "-g, --gzip-output, Output gzipped files.\n--quiet, do not output trimming info\n\
+--help, display this help and exit\n\
+--version, output version information and exit\n\n");
+
+    if (msg) fprintf(stderr, "%s\n\n", msg);
+    exit(status);
+}
+
+
+int paired_main(int argc, char *argv[]) {
+
+    gzFile pe1 = NULL;          /* forward input file handle */
+    gzFile pe2 = NULL;          /* reverse input file handle */
+    gzFile pec = NULL;          /* combined input file handle */
+    kseq_t *fqrec1 = NULL;
+    kseq_t *fqrec2 = NULL;
+    int l1, l2;
+    FILE *outfile1 = NULL;      /* forward output file handle */
+    FILE *outfile2 = NULL;      /* reverse output file handle */
+    FILE *combo = NULL;         /* combined output file handle */
+    FILE *single = NULL;        /* single output file handle */
+    gzFile outfile1_gzip = NULL;
+    gzFile outfile2_gzip = NULL;
+    gzFile combo_gzip = NULL;
+    gzFile single_gzip = NULL;
+    int debug = 0;
+    int optc;
+    extern char *optarg;
+    int qualtype = -1;
+    cutsites *p1cut;
+    cutsites *p2cut;
+    char *outfn1 = NULL;        /* forward file out name */
+    char *outfn2 = NULL;        /* reverse file out name */
+    char *outfnc = NULL;        /* combined file out name */
+    char *sfn = NULL;           /* single/combined file out name */
+    char *infn1 = NULL;         /* forward input filename */
+    char *infn2 = NULL;         /* reverse input filename */
+    char *infnc = NULL;         /* combined input filename */
+    int kept_p = 0;
+    int discard_p = 0;
+    int kept_s1 = 0;
+    int kept_s2 = 0;
+    int discard_s1 = 0;
+    int discard_s2 = 0;
+    int quiet = 0;
+    int no_fiveprime = 0;
+    int trunc_n = 0;
+    int gzip_output = 0;
+    int combo_all=0;
+    int combo_s=0;
+
+    while (1) {
+        int option_index = 0;
+        optc = getopt_long(argc, argv, "df:r:c:t:o:p:m:M:s:q:l:xng", paired_long_options, &option_index);
+
+        if (optc == -1)
+            break;
+
+        switch (optc) {
+            if (paired_long_options[option_index].flag != 0)
+                break;
+
+        case 'f':
+            infn1 = (char *) malloc(strlen(optarg) + 1);
+            strcpy(infn1, optarg);
+            break;
+
+        case 'r':
+            infn2 = (char *) malloc(strlen(optarg) + 1);
+            strcpy(infn2, optarg);
+            break;
+
+        case 'c':
+            infnc = (char *) malloc(strlen(optarg) + 1);
+            strcpy(infnc, optarg);
+            break;
+
+        case 't':
+            if (!strcmp(optarg, "illumina")) qualtype = ILLUMINA;
+            else if (!strcmp(optarg, "solexa")) qualtype = SOLEXA;
+            else if (!strcmp(optarg, "sanger")) qualtype = SANGER;
+            else {
+                fprintf(stderr, "Error: Quality type '%s' is not a valid type.\n", optarg);
+                return EXIT_FAILURE;
+            }
+            break;
+
+        case 'o':
+            outfn1 = (char *) malloc(strlen(optarg) + 1);
+            strcpy(outfn1, optarg);
+            break;
+
+        case 'p':
+            outfn2 = (char *) malloc(strlen(optarg) + 1);
+            strcpy(outfn2, optarg);
+            break;
+
+        case 'm':
+            outfnc = (char *) malloc(strlen(optarg) + 1);
+            strcpy(outfnc, optarg);
+            combo_s = 1;
+            break;
+
+        case 'M':
+            outfnc = (char *) malloc(strlen(optarg) + 1);
+            strcpy(outfnc, optarg);
+            combo_all = 1;
+            break;
+
+        case 's':
+            sfn = (char *) malloc(strlen(optarg) + 1);
+            strcpy(sfn, optarg);
+            break;
+
+        case 'q':
+            paired_qual_threshold = atoi(optarg);
+            if (paired_qual_threshold < 0) {
+                fprintf(stderr, "Quality threshold must be >= 0\n");
+                return EXIT_FAILURE;
+            }
+            break;
+
+        case 'l':
+            paired_length_threshold = atoi(optarg);
+            if (paired_length_threshold < 0) {
+                fprintf(stderr, "Length threshold must be >= 0\n");
+                return EXIT_FAILURE;
+            }
+            break;
+
+        case 'x':
+            no_fiveprime = 1;
+            break;
+
+        case 'n':
+            trunc_n = 1;
+            break;
+
+        case 'g':
+            gzip_output = 1;
+            break;
+
+        case 'z':
+            quiet = 1;
+            break;
+
+        case 'd':
+            debug = 1;
+            break;
+
+        case_GETOPT_HELP_CHAR(paired_usage);
+        case_GETOPT_VERSION_CHAR(PROGRAM_NAME, VERSION, AUTHORS);
+
+        case '?':
+            paired_usage(EXIT_FAILURE, NULL);
+            break;
+
+        default:
+            paired_usage(EXIT_FAILURE, NULL);
+            break;
+        }
+    }
+
+    /* required: qualtype */
+    if (qualtype == -1) {
+        paired_usage(EXIT_FAILURE, "****Error: Quality type is required.");
+    }
+
+    /* make sure minimum input filenames are specified */
+    if (!infn1 && !infnc) {
+        paired_usage(EXIT_FAILURE, "****Error: Must have either -f OR -c argument.");
+    }
+
+    if (infnc) {      /* using combined input file */
+
+        if (infn1 || infn2 || outfn1 || outfn2) {
+            paired_usage(EXIT_FAILURE, "****Error: Cannot have -f, -r, -o, or -p options with -c.");
+        }
+
+        if ((combo_all && combo_s) || (!combo_all && !combo_s)) {
+            paired_usage(EXIT_FAILURE, "****Error: Must have only one of either -m or -M options with -c.");
+        }
+
+        if ((combo_s && !sfn) || (combo_all && sfn)) {
+            paired_usage(EXIT_FAILURE, "****Error: -m option must have -s option, and -M option cannot have -s option.");
+        }
+
+        /* check for duplicate file names */
+        if (!strcmp(infnc, outfnc) || (combo_s && (!strcmp(infnc, sfn) || !strcmp(outfnc, sfn)))) {
+            fprintf(stderr, "****Error: Duplicate filename between combo input, combo output, and/or single output file names.\n\n");
+            return EXIT_FAILURE;
+        }
+
+        /* get combined output file */
+        if (!gzip_output) {
+            combo = fopen(outfnc, "w");
+            if (!combo) {
+                fprintf(stderr, "****Error: Could not open combo output file '%s'.\n\n", outfnc);
+                return EXIT_FAILURE;
+            }
+        } else {
+            combo_gzip = gzopen(outfnc, "w");
+            if (!combo_gzip) {
+                fprintf(stderr, "****Error: Could not open combo output file '%s'.\n\n", outfnc);
+                return EXIT_FAILURE;
+            }
+        }
+
+        pec = gzopen(infnc, "r");
+        if (!pec) {
+            fprintf(stderr, "****Error: Could not open combined input file '%s'.\n\n", infnc);
+            return EXIT_FAILURE;
+        }
+
+    } else {     /* using forward and reverse input files */
+
+        if (infn1 && (!infn2 || !outfn1 || !outfn2 || !sfn)) {
+            paired_usage(EXIT_FAILURE, "****Error: Using the -f option means you must have the -r, -o, -p, and -s options.");
+        }
+
+        if (infn1 && (infnc || combo_all || combo_s)) {
+            paired_usage(EXIT_FAILURE, "****Error: The -f option cannot be used in combination with -c, -m, or -M.");
+        }
+
+        if (!strcmp(infn1, infn2) || !strcmp(infn1, outfn1) || !strcmp(infn1, outfn2) ||
+            !strcmp(infn1, sfn) || !strcmp(infn2, outfn1) || !strcmp(infn2, outfn2) || 
+            !strcmp(infn2, sfn) || !strcmp(outfn1, outfn2) || !strcmp(outfn1, sfn) || !strcmp(outfn2, sfn)) {
+
+            fprintf(stderr, "****Error: Duplicate input and/or output file names.\n\n");
+            return EXIT_FAILURE;
+        }
+
+        pe1 = gzopen(infn1, "r");
+        if (!pe1) {
+            fprintf(stderr, "****Error: Could not open input file '%s'.\n\n", infn1);
+            return EXIT_FAILURE;
+        }
+
+        pe2 = gzopen(infn2, "r");
+        if (!pe2) {
+            fprintf(stderr, "****Error: Could not open input file '%s'.\n\n", infn2);
+            return EXIT_FAILURE;
+        }
+
+        if (!gzip_output) {
+            outfile1 = fopen(outfn1, "w");
+            if (!outfile1) {
+                fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn1);
+                return EXIT_FAILURE;
+            }
+
+            outfile2 = fopen(outfn2, "w");
+            if (!outfile2) {
+                fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn2);
+                return EXIT_FAILURE;
+            }
+        } else {
+            outfile1_gzip = gzopen(outfn1, "w");
+            if (!outfile1_gzip) {
+                fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn1);
+                return EXIT_FAILURE;
+            }
+
+            outfile2_gzip = gzopen(outfn2, "w");
+            if (!outfile2_gzip) {
+                fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn2);
+                return EXIT_FAILURE;
+            }
+
+        }
+    }
+
+    /* get singles output file handle */
+    if (sfn && !combo_all) {
+        if (!gzip_output) {
+            single = fopen(sfn, "w");
+            if (!single) {
+                fprintf(stderr, "****Error: Could not open single output file '%s'.\n\n", sfn);
+                return EXIT_FAILURE;
+            }
+        } else {
+            single_gzip = gzopen(sfn, "w");
+            if (!single_gzip) {
+                fprintf(stderr, "****Error: Could not open single output file '%s'.\n\n", sfn);
+                return EXIT_FAILURE;
+            }
+        }
+    }
+
+    if (pec) {
+        fqrec1 = kseq_init(pec);
+        fqrec2 = (kseq_t *) malloc(sizeof(kseq_t));
+        fqrec2->f = fqrec1->f;
+    } else {
+        fqrec1 = kseq_init(pe1);
+        fqrec2 = kseq_init(pe2);
+    }
+
+    while ((l1 = kseq_read(fqrec1)) >= 0) {
+
+        l2 = kseq_read(fqrec2);
+        if (l2 < 0) {
+            fprintf(stderr, "Warning: PE file 2 is shorter than PE file 1. Disregarding rest of PE file 1.\n");
+            break;
+        }
+
+        p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
+        p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
+
+        if (debug) printf("p1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);
+        if (debug) printf("p2cut: %d,%d\n", p2cut->five_prime_cut, p2cut->three_prime_cut);
+
+        /* The sequence and quality print statements below print out the sequence string starting from the 5' cut */
+        /* and then only print out to the 3' cut, however, we need to adjust the 3' cut */
+        /* by subtracting the 5' cut because the 3' cut was calculated on the original sequence */
+
+        /* if both sequences passed quality and length filters, then output both records */
+        if (p1cut->three_prime_cut >= 0 && p2cut->three_prime_cut >= 0) {
+            if (!gzip_output) {
+                if (pec) {
+                    print_record (combo, fqrec1, p1cut);
+                    print_record (combo, fqrec2, p2cut);
+                } else {
+                    print_record (outfile1, fqrec1, p1cut);
+                    print_record (outfile2, fqrec2, p2cut);
+                }
+            } else {
+                if (pec) {
+                    print_record_gzip (combo_gzip, fqrec1, p1cut);
+                    print_record_gzip (combo_gzip, fqrec2, p2cut);
+                } else {
+                    print_record_gzip (outfile1_gzip, fqrec1, p1cut);
+                    print_record_gzip (outfile2_gzip, fqrec2, p2cut);
+                }
+            }
+
+            kept_p += 2;
+        }
+
+        /* if only one sequence passed filter, then put its record in singles and discard the other */
+        /* or put an "N" record in if that option was chosen. */
+        else if (p1cut->three_prime_cut >= 0 && p2cut->three_prime_cut < 0) {
+            if (!gzip_output) {
+                if (combo_all) {
+                    print_record (combo, fqrec1, p1cut);
+                    print_record_N (combo, fqrec2, qualtype);
+                } else {
+                    print_record (single, fqrec1, p1cut);
+                }
+            } else {
+                if (combo_all) {
+                    print_record_gzip (combo_gzip, fqrec1, p1cut);
+                    print_record_N_gzip (combo_gzip, fqrec2, qualtype);
+                } else {
+                    print_record_gzip (single_gzip, fqrec1, p1cut);
+                }
+            }
+
+            kept_s1++;
+            discard_s2++;
+        }
+
+        else if (p1cut->three_prime_cut < 0 && p2cut->three_prime_cut >= 0) {
+            if (!gzip_output) {
+                if (combo_all) {
+                    print_record_N (combo, fqrec1, qualtype);
+                    print_record (combo, fqrec2, p2cut);
+                } else {
+                    print_record (single, fqrec2, p2cut);
+                }
+            } else {
+                if (combo_all) {
+                    print_record_N_gzip (combo_gzip, fqrec1, qualtype);
+                    print_record_gzip (combo_gzip, fqrec2, p2cut);
+                } else {
+                    print_record_gzip (single_gzip, fqrec2, p2cut);
+                }
+            }
+
+            kept_s2++;
+            discard_s1++;
+
+        } else {
+
+            /* If both records are to be discarded, but the -M option */
+            /* is being used, then output two "N" records */
+            if (combo_all) {
+                if (!gzip_output) {
+                    print_record_N (combo, fqrec1, qualtype);
+                    print_record_N (combo, fqrec2, qualtype);
+                } else {
+                    print_record_N_gzip (combo_gzip, fqrec1, qualtype);
+                    print_record_N_gzip (combo_gzip, fqrec2, qualtype);
+                }
+            }
+
+            discard_p += 2;
+        }
+
+        free(p1cut);
+        free(p2cut);
+    }             /* end of while ((l1 = kseq_read (fqrec1)) >= 0) */
+
+    if (l1 < 0) {
+        l2 = kseq_read(fqrec2);
+        if (l2 >= 0) {
+            fprintf(stderr, "Warning: PE file 1 is shorter than PE file 2. Disregarding rest of PE file 2.\n");
+        }
+    }
+
+    if (!quiet) {
+        fprintf(stdout, "\nFastQ paired records kept: %d (%d pairs)\n", kept_p, (kept_p / 2));
+        if (pec) fprintf(stdout, "FastQ single records kept: %d\n", (kept_s1 + kept_s2));
+        else fprintf(stdout, "FastQ single records kept: %d (from PE1: %d, from PE2: %d)\n", (kept_s1 + kept_s2), kept_s1, kept_s2);
+
+        fprintf(stdout, "FastQ paired records discarded: %d (%d pairs)\n", discard_p, (discard_p / 2));
+
+        if (pec) fprintf(stdout, "FastQ single records discarded: %d\n\n", (discard_s1 + discard_s2));
+        else fprintf(stdout, "FastQ single records discarded: %d (from PE1: %d, from PE2: %d)\n\n", (discard_s1 + discard_s2), discard_s1, discard_s2);
+    }
+
+    kseq_destroy(fqrec1);
+    if (pec) free(fqrec2);
+    else kseq_destroy(fqrec2);
+
+    if (sfn && !combo_all) {
+        if (!gzip_output) fclose(single);
+        else gzclose(single_gzip);
+    }
+
+    if (pec) {
+        gzclose(pec);
+        if (!gzip_output) fclose(combo);
+        else gzclose(combo_gzip);
+    } else {
+        gzclose(pe1);
+        gzclose(pe2);
+        if (!gzip_output) {
+            fclose(outfile1);
+            fclose(outfile2);
+        } else {
+            gzclose(outfile1_gzip);
+            gzclose(outfile2_gzip);
+        }
+    }
+
+    return EXIT_SUCCESS;
+}                               /* end of paired_main() */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/trim_single.c	Wed Jul 23 18:35:10 2014 -0400
@@ -0,0 +1,226 @@
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <zlib.h>
+#include <stdio.h>
+#include <getopt.h>
+#include "sickle.h"
+#include "kseq.h"
+#include "print_record.h"
+
+__KS_GETC(gzread, BUFFER_SIZE)
+__KS_GETUNTIL(gzread, BUFFER_SIZE)
+__KSEQ_READ
+
+int single_qual_threshold = 20;
+int single_length_threshold = 20;
+
+static struct option single_long_options[] = {
+    {"fastq-file", required_argument, 0, 'f'},
+    {"output-file", required_argument, 0, 'o'},
+    {"qual-type", required_argument, 0, 't'},
+    {"qual-threshold", optional_argument, 0, 'q'},
+    {"length-threshold", optional_argument, 0, 'l'},
+    {"no-fiveprime", optional_argument, 0, 'x'},
+    {"discard-n", optional_argument, 0, 'n'},
+    {"gzip-output", optional_argument, 0, 'g'},
+    {"quiet", optional_argument, 0, 'z'},
+    {GETOPT_HELP_OPTION_DECL},
+    {GETOPT_VERSION_OPTION_DECL},
+    {NULL, 0, NULL, 0}
+};
+
+void single_usage(int status, char *msg) {
+
+    fprintf(stderr, "\nUsage: %s se [options] -f <fastq sequence file> -t <quality type> -o <trimmed fastq file>\n\
+\n\
+Options:\n\
+-f, --fastq-file, Input fastq file (required)\n\
+-t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n\
+-o, --output-file, Output trimmed fastq file (required)\n", PROGRAM_NAME);
+
+    fprintf(stderr, "-q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\
+-l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\
+-x, --no-fiveprime, Don't do five prime trimming.\n\
+-n, --trunc-n, Truncate sequences at position of first N.\n\
+-g, --gzip-output, Output gzipped files.\n\
+--quiet, Don't print out any trimming information\n\
+--help, display this help and exit\n\
+--version, output version information and exit\n\n");
+
+    if (msg) fprintf(stderr, "%s\n\n", msg);
+    exit(status);
+}
+
+int single_main(int argc, char *argv[]) {
+
+    gzFile se = NULL;
+    kseq_t *fqrec;
+    int l;
+    FILE *outfile = NULL;
+    gzFile outfile_gzip = NULL;
+    int debug = 0;
+    int optc;
+    extern char *optarg;
+    int qualtype = -1;
+    cutsites *p1cut;
+    char *outfn = NULL;
+    char *infn = NULL;
+    int kept = 0;
+    int discard = 0;
+    int quiet = 0;
+    int no_fiveprime = 0;
+    int trunc_n = 0;
+    int gzip_output = 0;
+
+    while (1) {
+        int option_index = 0;
+        optc = getopt_long(argc, argv, "df:t:o:q:l:zxng", single_long_options, &option_index);
+
+        if (optc == -1)
+            break;
+
+        switch (optc) {
+            if (single_long_options[option_index].flag != 0)
+                break;
+
+        case 'f':
+            infn = (char *) malloc(strlen(optarg) + 1);
+            strcpy(infn, optarg);
+            break;
+
+        case 't':
+            if (!strcmp(optarg, "illumina"))
+                qualtype = ILLUMINA;
+            else if (!strcmp(optarg, "solexa"))
+                qualtype = SOLEXA;
+            else if (!strcmp(optarg, "sanger"))
+                qualtype = SANGER;
+            else {
+                fprintf(stderr, "Error: Quality type '%s' is not a valid type.\n", optarg);
+                return EXIT_FAILURE;
+            }
+            break;
+
+        case 'o':
+            outfn = (char *) malloc(strlen(optarg) + 1);
+            strcpy(outfn, optarg);
+            break;
+
+        case 'q':
+            single_qual_threshold = atoi(optarg);
+            if (single_qual_threshold < 0) {
+                fprintf(stderr, "Quality threshold must be >= 0\n");
+                return EXIT_FAILURE;
+            }
+            break;
+
+        case 'l':
+            single_length_threshold = atoi(optarg);
+            if (single_length_threshold < 0) {
+                fprintf(stderr, "Length threshold must be >= 0\n");
+                return EXIT_FAILURE;
+            }
+            break;
+
+        case 'x':
+            no_fiveprime = 1;
+            break;
+
+        case 'n':
+            trunc_n = 1;
+            break;
+
+        case 'g':
+            gzip_output = 1;
+            break;
+
+        case 'z':
+            quiet = 1;
+            break;
+
+        case 'd':
+            debug = 1;
+            break;
+
+        case_GETOPT_HELP_CHAR(single_usage)
+        case_GETOPT_VERSION_CHAR(PROGRAM_NAME, VERSION, AUTHORS);
+
+        case '?':
+            single_usage(EXIT_FAILURE, NULL);
+            break;
+
+        default:
+            single_usage(EXIT_FAILURE, NULL);
+            break;
+        }
+    }
+
+
+    if (qualtype == -1 || !infn || !outfn) {
+        single_usage(EXIT_FAILURE, "****Error: Must have quality type, input file, and output file.");
+    }
+
+    if (!strcmp(infn, outfn)) {
+        fprintf(stderr, "****Error: Input file is same as output file.\n\n");
+        return EXIT_FAILURE;
+    }
+
+    se = gzopen(infn, "r");
+    if (!se) {
+        fprintf(stderr, "****Error: Could not open input file '%s'.\n\n", infn);
+        return EXIT_FAILURE;
+    }
+
+    if (!gzip_output) {
+        outfile = fopen(outfn, "w");
+        if (!outfile) {
+            fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn);
+            return EXIT_FAILURE;
+        }
+    } else {
+        outfile_gzip = gzopen(outfn, "w");
+        if (!outfile_gzip) {
+            fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn);
+            return EXIT_FAILURE;
+        }
+    }
+
+
+    fqrec = kseq_init(se);
+
+    while ((l = kseq_read(fqrec)) >= 0) {
+
+        p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, no_fiveprime, trunc_n, debug);
+
+        if (debug) printf("P1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);
+
+        /* if sequence quality and length pass filter then output record, else discard */
+        if (p1cut->three_prime_cut >= 0) {
+            if (!gzip_output) {
+                /* This print statement prints out the sequence string starting from the 5' cut */
+                /* and then only prints out to the 3' cut, however, we need to adjust the 3' cut */
+                /* by subtracting the 5' cut because the 3' cut was calculated on the original sequence */
+
+                print_record (outfile, fqrec, p1cut);
+            } else {
+                print_record_gzip (outfile_gzip, fqrec, p1cut);
+            }
+
+            kept++;
+        }
+
+        else discard++;
+
+        free(p1cut);
+    }
+
+    if (!quiet) fprintf(stdout, "\nFastQ records kept: %d\nFastQ records discarded: %d\n\n", kept, discard);
+
+    kseq_destroy(fqrec);
+    gzclose(se);
+    if (!gzip_output) fclose(outfile);
+    else gzclose(outfile_gzip);
+
+    return EXIT_SUCCESS;
+}