Repository 'fastx_barcode_splitter'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/fastx_barcode_splitter

Changeset 4:015dc921d814 (2018-05-08)
Previous changeset 3:8abdedf55101 (2015-11-11) Next changeset 5:4bedca26c133 (2018-10-11)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/fastx_toolkit/fastx_barcode_splitter commit bbb2e6b6769b03602a8ab97001f88fbec52080a1
modified:
fastx_barcode_splitter.xml
test-data/fastx_barcode_splitter1.fastq
test-data/fastx_barcode_splitter1.out
added:
fastx_barcode_splitter.pl
macros.xml
test-data/fastx_barcode_splitter1_BC1.out
test-data/fastx_barcode_splitter1_BC2.out
test-data/fastx_barcode_splitter1_BC3.out
test-data/fastx_barcode_splitter1_BC4.out
test-data/fastx_barcode_splitter1_unmatched.out
test-data/fastx_barcode_splitter_index.fastq
removed:
fastx_barcode_splitter_galaxy_wrapper.sh
tool_dependencies.xml
b
diff -r 8abdedf55101 -r 015dc921d814 fastx_barcode_splitter.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastx_barcode_splitter.pl Tue May 08 13:27:14 2018 -0400
[
b'@@ -0,0 +1,571 @@\n+#!/usr/bin/env perl\n+\n+#    FASTX-toolkit - FASTA/FASTQ preprocessing tools.\n+#    Copyright (C) 2009-2013  A. Gordon (assafgordon@gmail.com)\n+#\n+#   Lance Parsons (lparsons@princeton.edu)\n+#   3/21/2011 - Modified to accept separate index file for barcodes\n+#   4/6/2011 - Modified to cleanup bad barcode identifiers (esp. useful for Galaxy)\n+#   4/28/2016 - Modified summary output to remove file paths and add comment\n+#               character \'#\'\n+\n+#   This program is free software: you can redistribute it and/or modify\n+#   it under the terms of the GNU Affero General Public License as\n+#   published by the Free Software Foundation, either version 3 of the\n+#   License, or (at your option) any later version.\n+#\n+#   This program is distributed in the hope that it will be useful,\n+#   but WITHOUT ANY WARRANTY; without even the implied warranty of\n+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n+#   GNU Affero General Public License for more details.\n+#\n+#    You should have received a copy of the GNU Affero General Public License\n+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.\n+\n+use strict;\n+use warnings;\n+use IO::Handle;\n+use Data::Dumper;\n+use Getopt::Long;\n+use Carp;\n+\n+##\n+## This program splits a FASTQ/FASTA file into several smaller files,\n+## Based on barcode matching.\n+##\n+## run with "--help" for usage information\n+##\n+## Assaf Gordon <assafgordon@gmail.com> , 11sep2008\n+\n+# Forward declarations\n+sub load_barcode_file ($);\n+sub parse_command_line ;\n+sub match_sequences ;\n+sub mismatch_count($$) ;\n+sub print_results;\n+sub open_and_detect_input_format;\n+sub open_index_and_detect_input_format($);\n+sub read_index_record;\n+sub read_record;\n+sub write_record($);\n+sub usage();\n+\n+# Global flags and arguments,\n+# Set by command line argumens\n+my $barcode_file ;\n+my $barcodes_at_eol = 0 ;\n+my $barcodes_at_bol = 0 ;\n+my $index_read_file ;\n+my $exact_match = 0 ;\n+my $allow_partial_overlap = 0;\n+my $allowed_mismatches = 1;\n+my $newfile_suffix = \'\';\n+my $newfile_prefix  ;\n+my $quiet = 0 ;\n+my $debug = 0 ;\n+my $fastq_format = 1;\n+my $index_fastq_format = 1;\n+my $read_id_check_strip_characters = 1;\n+\n+# Global variables\n+# Populated by \'create_output_files\'\n+my %filenames;\n+my %files;\n+my %counts = ( \'unmatched\' => 0 );\n+my $barcodes_length;\n+my @barcodes;\n+my $input_file_io;\n+\n+\n+# The Four lines per record in FASTQ format.\n+# (when using FASTA format, only the first two are used)\n+my $seq_name;\n+my $seq_bases;\n+my $seq_name2;\n+my $seq_qualities;\n+\n+# Values used for index read file\n+my $index_seq_name;\n+my $index_seq_bases;\n+my $index_seq_name2;\n+my $index_seq_qualities;\n+\n+#\n+# Start of Program\n+#\n+parse_command_line ;\n+\n+load_barcode_file ( $barcode_file ) ;\n+\n+open_and_detect_input_format;\n+\n+if (defined $index_read_file) {open_index_and_detect_input_format ( $index_read_file );}\n+\n+match_sequences ;\n+\n+print_results unless $quiet;\n+\n+#\n+# End of program\n+#\n+\n+sub parse_command_line {\n+    my $help;\n+\n+    usage() if (scalar @ARGV==0);\n+\n+    my $result = GetOptions ( "bcfile=s" => \\$barcode_file,\n+                  "eol"  => \\$barcodes_at_eol,\n+                  "bol"  => \\$barcodes_at_bol,\n+                  "idxfile=s"  => \\$index_read_file,\n+                  "idxidstrip=i" => \\$read_id_check_strip_characters,\n+                  "exact" => \\$exact_match,\n+                  "prefix=s" => \\$newfile_prefix,\n+                  "suffix=s" => \\$newfile_suffix,\n+                  "quiet" => \\$quiet,\n+                  "partial=i" => \\$allow_partial_overlap,\n+                  "debug" => \\$debug,\n+                  "mismatches=i" => \\$allowed_mismatches,\n+                  "help" => \\$help\n+    ) ;\n+\n+    usage() if ($help);\n+\n+    die "Error: barcode file not specified (use \'--bcfile [FILENAME]\')\\n" unless defined $barcode_file;\n+    die "Error: prefix path/filename not specified (use \'--prefix [PATH]\')\\n" unless defined $newfile_prefix;\n+\n+    if (! defined $in'..b' programmers\n+      would call index 0.)\n+--eol    - Try to match barcodes at the END of sequences.\n+      (What biologists would call the 3\' end, and programmers\n+      would call the end of the string.)\n+--idxfile FILE  - Read barcodes from separate index file (fasta or fastq)\n+      NOTE: one of --bol, --eol, --idxfile must be specified,\n+           but not more than one.\n+--idxidstrip N  - When using index file, strip this number of characters\n+      from the end of the sequence id before matching.\n+      Automatically detects CASAVA 1.8 format and strips at a\n+      space in the id, use 0 to disable this.\n+      (Default is 1).\n+--mismatches N  - Max. number of mismatches allowed. default is 1.\n+--exact    - Same as \'--mismatches 0\'. If both --exact and --mismatches\n+      are specified, \'--exact\' takes precedence.\n+--partial N  - Allow partial overlap of barcodes. (see explanation below.)\n+      (Default is not partial matching)\n+--quiet    - Don\'t print counts and summary at the end of the run.\n+      (Default is to print.)\n+--debug    - Print lots of useless debug information to STDERR.\n+--help    - This helpful help screen.\n+\n+Example (Assuming \'s_2_100.txt\' is a FASTQ file, \'mybarcodes.txt\' is\n+the barcodes file):\n+\n+  \\$ cat s_2_100.txt | $0 --bcfile mybarcodes.txt --bol --mismatches 2 \\\\\n+  --prefix /tmp/bla_ --suffix ".txt"\n+\n+Barcode file format\n+-------------------\n+Barcode files are simple text files. Each line should contain an identifier\n+(descriptive name for the barcode), and the barcode itself (A/C/G/T),\n+separated by a TAB character. Example:\n+\n+    #This line is a comment (starts with a \'number\' sign)\n+    BC1 GATCT\n+    BC2 ATCGT\n+    BC3 GTGAT\n+    BC4 TGTCT\n+\n+For each barcode, a new FASTQ file will be created (with the barcode\'s\n+identifier as part of the file name). Sequences matching the barcode\n+will be stored in the appropriate file.\n+\n+Running the above example (assuming "mybarcodes.txt" contains the above\n+barcodes), will create the following files:\n+  /tmp/bla_BC1.txt\n+  /tmp/bla_BC2.txt\n+  /tmp/bla_BC3.txt\n+  /tmp/bla_BC4.txt\n+  /tmp/bla_unmatched.txt\n+The \'unmatched\' file will contain all sequences that didn\'t match any barcode.\n+\n+Barcode matching\n+----------------\n+\n+** Without partial matching:\n+\n+Count mismatches between the FASTA/Q sequences and the barcodes.\n+The barcode which matched with the lowest mismatches count (providing the\n+count is small or equal to \'--mismatches N\') \'gets\' the sequences.\n+\n+Example (using the above barcodes):\n+Input Sequence:\n+GATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG\n+\n+Matching with \'--bol --mismatches 1\':\n+GATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG\n+GATCT (1 mismatch, BC1)\n+ATCGT (4 mismatches, BC2)\n+GTGAT (3 mismatches, BC3)\n+TGTCT (3 mismatches, BC4)\n+\n+This sequence will be classified as \'BC1\' (it has the lowest mismatch count).\n+If \'--exact\' or \'--mismatches 0\' were specified, this sequence would be\n+classified as \'unmatched\' (because, although BC1 had the lowest mismatch count,\n+it is above the maximum allowed mismatches).\n+\n+Matching with \'--eol\' (end of line) does the same, but from the other side\n+of the sequence.\n+\n+** With partial matching (very similar to indels):\n+\n+Same as above, with the following addition: barcodes are also checked for\n+partial overlap (number of allowed non-overlapping bases is \'--partial N\').\n+\n+Example:\n+Input sequence is ATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG\n+(Same as above, but note the missing \'G\' at the beginning.)\n+\n+Matching (without partial overlapping) against BC1 yields 4 mismatches:\n+ATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG\n+GATCT (4 mismatches)\n+\n+Partial overlapping would also try the following match:\n+-ATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG\n+GATCT (1 mismatch)\n+\n+Note: scoring counts a missing base as a mismatch, so the final\n+mismatch count is 2 (1 \'real\' mismatch, 1 \'missing base\' mismatch).\n+If running with \'--mismatches 2\' (meaning allowing upto 2 mismatches) - this\n+seqeunce will be classified as BC1.\n+\n+EOF\n+\n+exit 1;\n+}\n'
b
diff -r 8abdedf55101 -r 015dc921d814 fastx_barcode_splitter.xml
--- a/fastx_barcode_splitter.xml Wed Nov 11 12:38:37 2015 -0500
+++ b/fastx_barcode_splitter.xml Tue May 08 13:27:14 2018 -0400
[
@@ -1,38 +1,94 @@
-<tool id="cshl_fastx_barcode_splitter" version="1.0.0" name="Barcode Splitter">
+<tool id="cshl_fastx_barcode_splitter" version="1.0.1" name="Barcode Splitter">
     <description></description>
-    <requirements>
-        <requirement type="package" version="0.0.13">fastx_toolkit</requirement>
-    </requirements>
-    <command interpreter="bash">fastx_barcode_splitter_galaxy_wrapper.sh '$BARCODE' '$input' "$input.name" "$output.files_path" --mismatches $mismatches --partial $partial $EOL > '$output' </command>
-
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="aggressive"><![CDATA[
+mkdir split &&
+@CATS@ '$__tool_directory__/fastx_barcode_splitter.pl' --bcfile '$BARCODE'
+--prefix 'split/'
+--suffix '.$input.extension'
+--mismatches $mismatches
+--partial $partial
+#if $refBarcodeLocation.barcodeLocation == "idxfile":
+    --idxfile '$refBarcodeLocation.idxfile'
+    --idxidstrip $refBarcodeLocation.idxidstrip
+#else:
+    $refBarcodeLocation.EOL
+#end if
+> '$summary'
+    ]]></command>
     <inputs>
-        <param format="txt" name="BARCODE" type="data" label="Barcodes to use" />
-        <param format="fasta,fastqsanger,fastqsolexa,fastqillumina" name="input" type="data" label="Library to split" />
+        <param name="BARCODE" type="data" format="txt" label="Barcodes to use" />
+        <param name="input" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" label="Library to split" />
 
-        <param name="EOL" type="select" label="Barcodes found at">
-            <option value="--bol">Start of sequence (5' end)</option>
-            <option value="--eol">End of sequence (3' end)</option>
-        </param>
+        <conditional name="refBarcodeLocation">
+            <param name="barcodeLocation" type="select" label="Barcodes found at">
+                <option value="bol">Start of sequence (5' end)</option>
+                <option value="eol">End of sequence (3' end)</option>
+                <option value="idxfile">Separate index file</option>
+            </param>
+            <when value="bol">
+                <param name="EOL" type="hidden" value="--bol" />
+            </when>
+            <when value="eol">
+                <param name="EOL" type="hidden" value="--eol" />
+            </when>
+            <when value="idxfile">
+                <param argument="--idxidstrip" type="integer" value="1" label="Characters to strip from the end of the sequence id before matching" />
+                <param argument="--idxfile" type="data" format="fasta,fastq,fastqsanger" label="Select index read file" />
+            </when>
+        </conditional>
+        <param argument="--mismatches" type="integer" value="0" label="Number of allowed mismatches" />
+        <param argument="--partial" type="integer" value="0" label="Number of allowed barcodes nucleotide deletions" />
+    </inputs>
 
-        <param name="mismatches" type="integer" value="2" label="Number of allowed mismatches" />
+    <outputs>
+        <data name="summary" format="tabular" label="${tool.name} on ${on_string}: Summary" />
+        <collection name="split_output" type="list" format_source="input" label="${tool.name} on ${on_string}">
+            <discover_datasets pattern="__designation_and_ext__" directory="split" visible="false" />
+        </collection>
+    </outputs>
 
-        <param name="partial" type="integer" value="0" label="Number of allowed barcodes nucleotide deletions" />
-    </inputs>
-    <outputs>
-        <data format="html" name="output" />
-    </outputs>
     <tests>
         <test>
             <!-- Split a FASTQ file -->
             <param name="BARCODE" value="fastx_barcode_splitter1.txt" />
             <param name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" />
-            <param name="EOL" value="Start of sequence (5' end)" />
+            <param name="barcodeLocation" value="bol" />
             <param name="mismatches" value="2" />
             <param name="partial" value="0" />
-            <output name="output" file="fastx_barcode_splitter1.out" />
+            <output name="summary" file="fastx_barcode_splitter1.out" />
+            <output_collection name="split_output" type="list">
+                <element name="BC1" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC1.out" />
+                <element name="BC2" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC2.out" />
+                <element name="BC3" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC3.out" />
+                <element name="BC4" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC4.out" />
+                <element name="unmatched" ftype="fastqsolexa" file="fastx_barcode_splitter1_unmatched.out" />
+            </output_collection>
+        </test>
+
+        <test>
+            <!-- Split a FASTQ file, using separate index read -->
+            <param name="BARCODE" value="fastx_barcode_splitter1.txt" />
+            <param name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" />
+            <param name="idxfile" value="fastx_barcode_splitter_index.fastq" ftype="fastqsolexa" />
+            <param name="barcodeLocation" value="idxfile" />
+            <param name="mismatches" value="2" />
+            <param name="partial" value="0" />
+            <output name="summary" file="fastx_barcode_splitter1.out" />
+            <output_collection name="split_output" type="list">
+                <element name="BC1" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC1.out" />
+                <element name="BC2" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC2.out" />
+                <element name="BC3" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC3.out" />
+                <element name="BC4" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC4.out" />
+                <element name="unmatched" ftype="fastqsolexa" file="fastx_barcode_splitter1_unmatched.out" />
+            </output_collection>
         </test>
     </tests>
-    <help>
+
+    <help><![CDATA[
 **What it does**
 
 This tool splits a Solexa library (FASTQ file) or a regular FASTA file into several files, using barcodes as the split criteria.
@@ -62,12 +118,12 @@
 
 .. image:: barcode_splitter_output_example.png
 
-
 ------
 
 This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
 
  .. __: http://hannonlab.cshl.edu/fastx_toolkit/
-    </help>
+    ]]></help>
+    <expand macro="citations" />
 <!-- FASTX-barcode-splitter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
 </tool>
b
diff -r 8abdedf55101 -r 015dc921d814 fastx_barcode_splitter_galaxy_wrapper.sh
--- a/fastx_barcode_splitter_galaxy_wrapper.sh Wed Nov 11 12:38:37 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-#    FASTX-toolkit - FASTA/FASTQ preprocessing tools.
-#    Copyright (C) 2009  A. Gordon (gordon@cshl.edu)
-#
-#   This program is free software: you can redistribute it and/or modify
-#   it under the terms of the GNU Affero General Public License as
-#   published by the Free Software Foundation, either version 3 of the
-#   License, or (at your option) any later version.
-#
-#   This program is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU Affero General Public License for more details.
-#
-#    You should have received a copy of the GNU Affero General Public License
-#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-#
-#This is a shell script wrapper for 'fastx_barcode_splitter.pl'
-#
-# 1. Output files are saved at the dataset's files_path directory.
-#    
-# 2. 'fastx_barcode_splitter.pl' outputs a textual table.
-#    This script turns it into pretty HTML with working URL
-#    (so lazy users can just click on the URLs and get their files)
-
-BARCODE_FILE="$1"
-FASTQ_FILE="$2"
-LIBNAME="$3"
-OUTPUT_PATH="$4"
-shift 4
-# The rest of the parameters are passed to the split program
-
-if [ "$OUTPUT_PATH" == "" ]; then
- echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2
- exit 1
-fi
-
-#Sanitize library name, make sure we can create a file with this name
-LIBNAME=${LIBNAME//\.gz/}
-LIBNAME=${LIBNAME//\.txt/}
-LIBNAME=${LIBNAME//[^[:alnum:]]/_}
-
-if [ ! -r "$FASTQ_FILE" ]; then
- echo "Error: Input file ($FASTQ_FILE) not found!" >&2
- exit 1
-fi
-if [ ! -r "$BARCODE_FILE" ]; then
- echo "Error: barcode file ($BARCODE_FILE) not found!" >&2
- exit 1
-fi
-mkdir -p "$OUTPUT_PATH"
-if [ ! -d "$OUTPUT_PATH" ]; then
- echo "Error: failed to create output path '$OUTPUT_PATH'" >&2
- exit 1
-fi
-
-PUBLICURL=""
-BASEPATH="$OUTPUT_PATH/"
-#PREFIX="$BASEPATH"`date "+%Y-%m-%d_%H%M__"`"${LIBNAME}__"
-PREFIX="$BASEPATH""${LIBNAME}__"
-SUFFIX=".txt"
-
-RESULTS=`zcat -f < "$FASTQ_FILE" | fastx_barcode_splitter.pl --bcfile "$BARCODE_FILE" --prefix "$PREFIX" --suffix "$SUFFIX" "$@"`
-if [ $? != 0 ]; then
- echo "error"
-fi
-
-#
-# Convert the textual tab-separated table into simple HTML table,
-# with the local path replaces with a valid URL
-echo "<html><body><table border=1>"
-echo "$RESULTS" | sed -r "s|$BASEPATH(.*)|<a href=\"\\1\">\\1</a>|" | sed '
-i<tr><td>
-s|\t|</td><td>|g
-a<\/td><\/tr>
-'
-echo "<p>"
-echo "</table></body></html>"
b
diff -r 8abdedf55101 -r 015dc921d814 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue May 08 13:27:14 2018 -0400
[
@@ -0,0 +1,54 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@CATS@">
+        #if $input.is_of_type('fasta.gz', 'fastqsanger.gz', 'fastqsolexa.gz', 'fastqillumina.gz'):
+            zcat -f '$input' |
+        #elif $input.is_of_type('fastqsanger.bz2', 'fastqsolexa.bz2', 'fastqillumina.bz2'):
+            bzcat -f '$input' |
+        #else:
+            cat '$input' |
+        #end if
+    </token>
+    <token name="@FQQUAL@">
+        <![CDATA[
+            #if $input.is_of_type('fastqsanger', 'fastqsanger.gz', 'fastqsanger.bz2'):
+                -Q 33
+            #elif $input.is_of_type('fastqsolexa', 'fastqsolexa.gz', 'fastqsolexa.bz2', 'fastqillumina', 'fastqillumina.gz', 'fastqillumina.bz2'):
+                -Q 64
+            #end if
+        ]]>
+    </token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">fastx_toolkit</requirement>
+            <yield />
+        </requirements>
+    </xml>
+    <token name="@VERSION@">0.0.14</token>
+    <token name="@SANGER@">fastqsanger,fastqsanger.gz,fastqsanger.bz2</token>
+    <token name="@SOLEXA@">fastqsolexa,fastqsolexa.gz,fastqsolexa.bz2</token>
+    <token name="@ILLUMINA@">fastqillumina,fastqillumina.gz,fastqillumina.bz2</token>
+    <token name="@FASTQS@">@SANGER@,@SOLEXA@,@ILLUMINA@</token>
+    <token name="@FASTAS@">fasta,fasta.gz</token>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @UNPUBLISHED{agordon,
+                    author = "Assaf Gordon",
+                    title = "FASTQ/A short-reads pre-processing tools",
+                    year = "2010",
+                    note = "http://hannonlab.cshl.edu/fastx_toolkit/",
+                    url = "http://hannonlab.cshl.edu/fastx_toolkit/"}
+            </citation>
+        </citations>
+    </xml>
+    <xml name="fasta_input">
+        <param name="input" type="data" format="@FASTAS@" label="Input FASTA file" />
+    </xml>
+    <xml name="fastq_input">
+        <param name="input" type="data" format="@FASTQS@" label="Input FASTQ file" />
+    </xml>
+    <xml name="fastx_input">
+        <param name="input" type="data" format="@FASTAS@,@FASTQS@" label="Input file in FASTA or FASTQ format" />
+    </xml>
+</macros>
b
diff -r 8abdedf55101 -r 015dc921d814 test-data/fastx_barcode_splitter1.fastq
--- a/test-data/fastx_barcode_splitter1.fastq Wed Nov 11 12:38:37 2015 -0500
+++ b/test-data/fastx_barcode_splitter1.fastq Tue May 08 13:27:14 2018 -0400
b
@@ -158,11 +158,11 @@
 TAGTTGAGTATACACAT
 +CSHL_3_FC042AGLLWW:1:2:7:203
 aab^V^aU]`aa^aZaa
-@CSHL_3_FC042AGLLWW:1:2:7:203
+@CSHL_3_FC042AGLLWW:1:2:7:203/1
 TAGTTTCTCTATGTACA
-+CSHL_3_FC042AGLLWW:1:2:7:203
++CSHL_3_FC042AGLLWW:1:2:7:203/1
 aab^V^aU]`aa^aZaa
-@CSHL_3_FC042AGLLWW:1:2:7:203
+@HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 1:N:0:
 TGTCTGAGTATACACAT
-+CSHL_3_FC042AGLLWW:1:2:7:203
-aab^V^aU]`aa^aZaa
\ No newline at end of file
++HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 1:N:0:
+aab^V^aU]`aa^aZaa
b
diff -r 8abdedf55101 -r 015dc921d814 test-data/fastx_barcode_splitter1.out
--- a/test-data/fastx_barcode_splitter1.out Wed Nov 11 12:38:37 2015 -0500
+++ b/test-data/fastx_barcode_splitter1.out Tue May 08 13:27:14 2018 -0400
b
@@ -1,24 +1,7 @@
-<html><body><table border=1>
-<tr><td>
-Barcode</td><td>Count</td><td>Location
-</td></tr>
-<tr><td>
-BC1</td><td>11</td><td><a href="fastx_barcode_splitter1_fastq__BC1.txt">fastx_barcode_splitter1_fastq__BC1.txt</a>
-</td></tr>
-<tr><td>
-BC2</td><td>12</td><td><a href="fastx_barcode_splitter1_fastq__BC2.txt">fastx_barcode_splitter1_fastq__BC2.txt</a>
-</td></tr>
-<tr><td>
-BC3</td><td>9</td><td><a href="fastx_barcode_splitter1_fastq__BC3.txt">fastx_barcode_splitter1_fastq__BC3.txt</a>
-</td></tr>
-<tr><td>
-BC4</td><td>1</td><td><a href="fastx_barcode_splitter1_fastq__BC4.txt">fastx_barcode_splitter1_fastq__BC4.txt</a>
-</td></tr>
-<tr><td>
-unmatched</td><td>9</td><td><a href="fastx_barcode_splitter1_fastq__unmatched.txt">fastx_barcode_splitter1_fastq__unmatched.txt</a>
-</td></tr>
-<tr><td>
-total</td><td>42
-</td></tr>
-<p>
-</table></body></html>
+# Barcode Count
+BC1 11
+BC2 12
+BC3 9
+BC4 1
+unmatched 9
+total 42
b
diff -r 8abdedf55101 -r 015dc921d814 test-data/fastx_barcode_splitter1_BC1.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastx_barcode_splitter1_BC1.out Tue May 08 13:27:14 2018 -0400
b
@@ -0,0 +1,44 @@
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCTAGTAGTAGTAGA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCTAGTAGTAGTAGA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCTAGTAGTAGTAGA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCTAGTAGTAGTAGA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCTAGTAGTAGTAGA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTCTAGTAGTAGTAGA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTCTTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTCTGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTATTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTATTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTATTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
b
diff -r 8abdedf55101 -r 015dc921d814 test-data/fastx_barcode_splitter1_BC2.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastx_barcode_splitter1_BC2.out Tue May 08 13:27:14 2018 -0400
b
@@ -0,0 +1,48 @@
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCGTTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCGTGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCGTTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCGTGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTTTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTTGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTTGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTTTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTCGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTCGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTCTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTCGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
b
diff -r 8abdedf55101 -r 015dc921d814 test-data/fastx_barcode_splitter1_BC3.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastx_barcode_splitter1_BC3.out Tue May 08 13:27:14 2018 -0400
b
@@ -0,0 +1,36 @@
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAATGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAATTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAATGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAATTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAATGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAATTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAATGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAATTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAATGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
b
diff -r 8abdedf55101 -r 015dc921d814 test-data/fastx_barcode_splitter1_BC4.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastx_barcode_splitter1_BC4.out Tue May 08 13:27:14 2018 -0400
b
@@ -0,0 +1,4 @@
+@HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 1:N:0:
+TGTCTGAGTATACACAT
++HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 1:N:0:
+aab^V^aU]`aa^aZaa
b
diff -r 8abdedf55101 -r 015dc921d814 test-data/fastx_barcode_splitter1_unmatched.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastx_barcode_splitter1_unmatched.out Tue May 08 13:27:14 2018 -0400
b
@@ -0,0 +1,36 @@
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTACGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTACTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTACGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTTGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTTGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTTTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTTTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTTGAGTATACACAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaa
+@CSHL_3_FC042AGLLWW:1:2:7:203/1
+TAGTTTCTCTATGTACA
++CSHL_3_FC042AGLLWW:1:2:7:203/1
+aab^V^aU]`aa^aZaa
b
diff -r 8abdedf55101 -r 015dc921d814 test-data/fastx_barcode_splitter_index.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastx_barcode_splitter_index.fastq Tue May 08 13:27:14 2018 -0400
b
@@ -0,0 +1,168 @@
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GATCT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTCT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTCT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTCT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTAC
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTAC
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGTAC
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCGT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCGT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCGT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCGT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTC
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTC
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTC
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+ATCTC
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GGAAT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203
+TAGTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V
+@CSHL_3_FC042AGLLWW:1:2:7:203/2
+TAGTT
++CSHL_3_FC042AGLLWW:1:2:7:203/2
+aab^V
+@HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 3:N:0:
+TGTCT
++HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 3:N:0:
+aab^V
b
diff -r 8abdedf55101 -r 015dc921d814 tool_dependencies.xml
--- a/tool_dependencies.xml Wed Nov 11 12:38:37 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="fastx_toolkit" version="0.0.13">
-        <repository changeset_revision="ec66ae4c269b" name="package_fastx_toolkit_0_0_13" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>