Mercurial > repos > devteam > fastx_barcode_splitter
changeset 4:015dc921d814 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/fastx_toolkit/fastx_barcode_splitter commit bbb2e6b6769b03602a8ab97001f88fbec52080a1
author | iuc |
---|---|
date | Tue, 08 May 2018 13:27:14 -0400 |
parents | 8abdedf55101 |
children | 4bedca26c133 |
files | fastx_barcode_splitter.pl fastx_barcode_splitter.xml fastx_barcode_splitter_galaxy_wrapper.sh macros.xml test-data/fastx_barcode_splitter1.fastq test-data/fastx_barcode_splitter1.out test-data/fastx_barcode_splitter1_BC1.out test-data/fastx_barcode_splitter1_BC2.out test-data/fastx_barcode_splitter1_BC3.out test-data/fastx_barcode_splitter1_BC4.out test-data/fastx_barcode_splitter1_unmatched.out test-data/fastx_barcode_splitter_index.fastq tool_dependencies.xml |
diffstat | 13 files changed, 1052 insertions(+), 138 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_barcode_splitter.pl Tue May 08 13:27:14 2018 -0400 @@ -0,0 +1,571 @@ +#!/usr/bin/env perl + +# FASTX-toolkit - FASTA/FASTQ preprocessing tools. +# Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) +# +# Lance Parsons (lparsons@princeton.edu) +# 3/21/2011 - Modified to accept separate index file for barcodes +# 4/6/2011 - Modified to cleanup bad barcode identifiers (esp. useful for Galaxy) +# 4/28/2016 - Modified summary output to remove file paths and add comment +# character '#' + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +use strict; +use warnings; +use IO::Handle; +use Data::Dumper; +use Getopt::Long; +use Carp; + +## +## This program splits a FASTQ/FASTA file into several smaller files, +## Based on barcode matching. +## +## run with "--help" for usage information +## +## Assaf Gordon <assafgordon@gmail.com> , 11sep2008 + +# Forward declarations +sub load_barcode_file ($); +sub parse_command_line ; +sub match_sequences ; +sub mismatch_count($$) ; +sub print_results; +sub open_and_detect_input_format; +sub open_index_and_detect_input_format($); +sub read_index_record; +sub read_record; +sub write_record($); +sub usage(); + +# Global flags and arguments, +# Set by command line argumens +my $barcode_file ; +my $barcodes_at_eol = 0 ; +my $barcodes_at_bol = 0 ; +my $index_read_file ; +my $exact_match = 0 ; +my $allow_partial_overlap = 0; +my $allowed_mismatches = 1; +my $newfile_suffix = ''; +my $newfile_prefix ; +my $quiet = 0 ; +my $debug = 0 ; +my $fastq_format = 1; +my $index_fastq_format = 1; +my $read_id_check_strip_characters = 1; + +# Global variables +# Populated by 'create_output_files' +my %filenames; +my %files; +my %counts = ( 'unmatched' => 0 ); +my $barcodes_length; +my @barcodes; +my $input_file_io; + + +# The Four lines per record in FASTQ format. +# (when using FASTA format, only the first two are used) +my $seq_name; +my $seq_bases; +my $seq_name2; +my $seq_qualities; + +# Values used for index read file +my $index_seq_name; +my $index_seq_bases; +my $index_seq_name2; +my $index_seq_qualities; + +# +# Start of Program +# +parse_command_line ; + +load_barcode_file ( $barcode_file ) ; + +open_and_detect_input_format; + +if (defined $index_read_file) {open_index_and_detect_input_format ( $index_read_file );} + +match_sequences ; + +print_results unless $quiet; + +# +# End of program +# + +sub parse_command_line { + my $help; + + usage() if (scalar @ARGV==0); + + my $result = GetOptions ( "bcfile=s" => \$barcode_file, + "eol" => \$barcodes_at_eol, + "bol" => \$barcodes_at_bol, + "idxfile=s" => \$index_read_file, + "idxidstrip=i" => \$read_id_check_strip_characters, + "exact" => \$exact_match, + "prefix=s" => \$newfile_prefix, + "suffix=s" => \$newfile_suffix, + "quiet" => \$quiet, + "partial=i" => \$allow_partial_overlap, + "debug" => \$debug, + "mismatches=i" => \$allowed_mismatches, + "help" => \$help + ) ; + + usage() if ($help); + + die "Error: barcode file not specified (use '--bcfile [FILENAME]')\n" unless defined $barcode_file; + die "Error: prefix path/filename not specified (use '--prefix [PATH]')\n" unless defined $newfile_prefix; + + if (! defined $index_read_file) { + if ($barcodes_at_bol == $barcodes_at_eol) { + die "Error: can't specify both --eol & --bol\n" if $barcodes_at_eol; + die "Error: must specify either --eol or --bol or --idxfile\n" ; + } + } + elsif ($barcodes_at_bol || $barcodes_at_eol) { + die "Error: Must specify only one of --idxfile, --eol, or --bol"; + } + + die "Error: invalid for value partial matches (valid values are 0 or greater)\n" if $allow_partial_overlap<0; + + $allowed_mismatches = 0 if $exact_match; + + die "Error: invalid value for mismatches (valid values are 0 or more)\n" if ($allowed_mismatches<0); + + die "Error: partial overlap value ($allow_partial_overlap) bigger than " . + "max. allowed mismatches ($allowed_mismatches)\n" if ($allow_partial_overlap > $allowed_mismatches); + + + exit unless $result; +} + + + +# +# Read the barcode file +# +sub load_barcode_file ($) { + my $filename = shift or croak "Missing barcode file name"; + + open BCFILE,"<$filename" or die "Error: failed to open barcode file ($filename)\n"; + while (<BCFILE>) { + next if m/^#/; + chomp; + my ($ident, $barcode) = split('\t') ; + + $barcode = uc($barcode); + + # Sanity checks on the barcodes + die "Error: bad data at barcode file ($filename) line $.\n" unless defined $barcode; + die "Error: bad barcode value ($barcode) at barcode file ($filename) line $.\n" + unless $barcode =~ m/^[AGCT]+$/; + + # Cleanup Identifiers (only allow alphanumeric, replace others with dash '-') + $ident =~ s/[^A-Za-z0-9]/-/g; + die "Error: bad identifier value ($ident) at barcode file ($filename) line $. (must be alphanumeric)\n" + unless $ident =~ m/^\w+$/; + + die "Error: badcode($ident, $barcode) is shorter or equal to maximum number of " . + "mismatches ($allowed_mismatches). This makes no sense. Specify fewer mismatches.\n" + if length($barcode)<=$allowed_mismatches; + + $barcodes_length = length($barcode) unless defined $barcodes_length; + die "Error: found barcodes in different lengths. this feature is not supported yet.\n" + unless $barcodes_length == length($barcode); + + push @barcodes, [$ident, $barcode]; + + if ($allow_partial_overlap>0) { + foreach my $i (1 .. $allow_partial_overlap) { + substr $barcode, ($barcodes_at_bol)?0:-1, 1, ''; + push @barcodes, [$ident, $barcode]; + } + } + } + close BCFILE; + + if ($debug) { + print STDERR "barcode\tsequence\n"; + foreach my $barcoderef (@barcodes) { + my ($ident, $seq) = @{$barcoderef}; + print STDERR $ident,"\t", $seq ,"\n"; + } + } +} + +# Create one output file for each barcode. +# (Also create a file for the dummy 'unmatched' barcode) +sub create_output_files { + my %barcodes = map { $_->[0] => 1 } @barcodes; #generate a uniq list of barcode identifiers; + $barcodes{'unmatched'} = 1 ; + + foreach my $ident (keys %barcodes) { + my $new_filename = $newfile_prefix . $ident . $newfile_suffix; + $filenames{$ident} = $new_filename; + open my $file, ">$new_filename" or die "Error: failed to create output file ($new_filename)\n"; + $files{$ident} = $file ; + } +} + +sub match_sequences { + + my %barcodes = map { $_->[0] => 1 } @barcodes; #generate a uniq list of barcode identifiers; + $barcodes{'unmatched'} = 1 ; + + #reset counters + foreach my $ident ( keys %barcodes ) { + $counts{$ident} = 0; + } + + create_output_files; + + # Read file FASTQ file + # split accotding to barcodes + while ( read_record ) { + chomp $seq_name; + chomp $seq_bases; + if (defined $index_read_file) { + read_index_record() or die "Error: Unable to read index sequence for sequence name ($seq_name), check to make sure the file lengths match.\n"; + chomp $index_seq_name; + chomp $index_seq_bases; + + # Assert that the read ids match + my $seq_name_match = &strip_read_id($seq_name); + my $index_seq_name_match = &strip_read_id($index_seq_name); + if ($seq_name_match ne $index_seq_name_match) { + die "Error: Index sequence name ($index_seq_name) does not match sequence name ($seq_name)\n"; + } + + } + + print STDERR "sequence $seq_bases: \n" if $debug; + + my $best_barcode_mismatches_count = $barcodes_length; + my $best_barcode_ident = undef; + + #Try all barcodes, find the one with the lowest mismatch count + foreach my $barcoderef (@barcodes) { + my ($ident, $barcode) = @{$barcoderef}; + + # Get DNA fragment (in the length of the barcodes) + # The barcode will be tested only against this fragment + # (no point in testing the barcode against the whole sequence) + my $sequence_fragment; + if ($barcodes_at_bol) { + $sequence_fragment = substr $seq_bases, 0, $barcodes_length; + } elsif ($barcodes_at_eol) { + $sequence_fragment = substr $seq_bases, - $barcodes_length; + } else { + $sequence_fragment = substr $index_seq_bases, 0, $barcodes_length; + } + + my $mm = mismatch_count($sequence_fragment, $barcode) ; + + # if this is a partial match, add the non-overlap as a mismatch + # (partial barcodes are shorter than the length of the original barcodes) + $mm += ($barcodes_length - length($barcode)); + + if ( $mm < $best_barcode_mismatches_count ) { + $best_barcode_mismatches_count = $mm ; + $best_barcode_ident = $ident ; + } + } + + $best_barcode_ident = 'unmatched' + if ( (!defined $best_barcode_ident) || $best_barcode_mismatches_count>$allowed_mismatches) ; + + print STDERR "sequence $seq_bases matched barcode: $best_barcode_ident\n" if $debug; + + $counts{$best_barcode_ident}++; + + #get the file associated with the matched barcode. + #(note: there's also a file associated with 'unmatched' barcode) + my $file = $files{$best_barcode_ident}; + + write_record($file); + } +} + +# Strip end of readids when matching to avoid mismatch between read 1, 2, 3, etc. +sub strip_read_id { + my $read_id = shift; + my $stripped_read_id = $read_id; + if ($read_id_check_strip_characters) { + if ($read_id =~ /@([^:]+):([0-9]+):([^:]+):([0-9]+):([0-9]+):([0-9]+):([0-9]+) ([0-9]+):([YN]):([0-9]+):([ACGT]+){0,1}/) { # CASAVA 1.8+ + my @parts = split(/ /,$read_id); + $stripped_read_id = $parts[0]; + } else { # CASAVA 1.7 and earlier + $stripped_read_id = substr($read_id, 0, length($read_id)-$read_id_check_strip_characters); + } + } + return $stripped_read_id; +} + +#Quickly calculate hamming distance between two strings +# +#NOTE: Strings must be same length. +# returns number of different characters. +#see http://www.perlmonks.org/?node_id=500235 +sub mismatch_count($$) { length( $_[ 0 ] ) - ( ( $_[ 0 ] ^ $_[ 1 ] ) =~ tr[\0][\0] ) } + +sub print_results +{ + print "# Barcode\tCount\n"; + my $total = 0 ; + foreach my $ident (sort keys %counts) { + print $ident, "\t", $counts{$ident},"\n"; + $total += $counts{$ident}; + } + print "total\t",$total,"\n"; +} + +sub read_record +{ + $seq_name = $input_file_io->getline(); + + return undef unless defined $seq_name; # End of file? + + $seq_bases = $input_file_io->getline(); + die "Error: bad input file, expecting line with sequences\n" unless defined $seq_bases; + + # If using FASTQ format, read two more lines + if ($fastq_format) { + $seq_name2 = $input_file_io->getline(); + die "Error: bad input file, expecting line with sequence name2\n" unless defined $seq_name2; + + $seq_qualities = $input_file_io->getline(); + die "Error: bad input file, expecting line with quality scores\n" unless defined $seq_qualities; + } + return 1; +} + +sub write_record($) +{ + my $file = shift; + + croak "Bad file handle" unless defined $file; + + print $file $seq_name,"\n"; + print $file $seq_bases,"\n"; + + #if using FASTQ format, write two more lines + if ($fastq_format) { + print $file $seq_name2; + print $file $seq_qualities; + } +} + +sub open_and_detect_input_format +{ + $input_file_io = new IO::Handle; + die "Failed to open STDIN " unless $input_file_io->fdopen(fileno(STDIN),"r"); + + # Get the first characeter, and push it back + my $first_char = $input_file_io->getc(); + $input_file_io->ungetc(ord $first_char); + + if ($first_char eq '>') { + # FASTA format + $fastq_format = 0 ; + print STDERR "Detected FASTA format\n" if $debug; + } elsif ($first_char eq '@') { + # FASTQ format + $fastq_format = 1; + print STDERR "Detected FASTQ format\n" if $debug; + } else { + die "Error: unknown file format. First character = '$first_char' (expecting > or \@)\n"; + } +} + +sub open_index_and_detect_input_format($) { + my $filename = shift or croak "Missing index read file name"; + + open IDXFILE,"<$filename" or die "Error: failed to open index read file ($filename)\n"; + + # Get the first line, and reset file pointer + my $first_line = <IDXFILE>; + my $first_char = substr($first_line, 0, 1); + seek(IDXFILE, 0, 0); + + if ($first_char eq '>') { + # FASTA format + $index_fastq_format = 0 ; + print STDERR "Detected FASTA format for index file\n" if $debug; + } elsif ($first_char eq '@') { + # FASTQ format + $index_fastq_format = 1; + print STDERR "Detected FASTQ format for index file\n" if $debug; + } else { + die "Error: unknown index file format. First character = '$first_char' (expecting > or \@)\n"; + } +} + +sub read_index_record +{ + $index_seq_name = <IDXFILE>; + + return undef unless defined $index_seq_name; # End of file? + + $index_seq_bases = <IDXFILE>; + die "Error: bad input file, expecting line with sequences\n" unless defined $index_seq_bases; + + # If using FASTQ format, read two more lines + if ($index_fastq_format) { + $index_seq_name2 = <IDXFILE>; + die "Error: bad input file, expecting line with sequence name2\n" unless defined $index_seq_name2; + + $index_seq_qualities = <IDXFILE>; + die "Error: bad input file, expecting line with quality scores\n" unless defined $index_seq_qualities; + } + return 1; +} + +sub usage() +{ + print<<EOF; +Barcode Splitter, by Assaf Gordon (gordon\@cshl.edu), 11sep2008 + +This program reads FASTA/FASTQ file and splits it into several smaller files, +Based on barcode matching. +FASTA/FASTQ data is read from STDIN (format is auto-detected.) +Output files will be writen to disk. +Summary will be printed to STDOUT. + +usage: $0 --bcfile FILE --prefix PREFIX [--suffix SUFFIX] [--bol|--eol|--idxfile] + [--mismatches N] [--exact] [--partial N] [--idxidstrip N] + [--help] [--quiet] [--debug] + +Arguments: + +--bcfile FILE - Barcodes file name. (see explanation below.) +--prefix PREFIX - File prefix. will be added to the output files. Can be used + to specify output directories. +--suffix SUFFIX - File suffix (optional). Can be used to specify file + extensions. +--bol - Try to match barcodes at the BEGINNING of sequences. + (What biologists would call the 5' end, and programmers + would call index 0.) +--eol - Try to match barcodes at the END of sequences. + (What biologists would call the 3' end, and programmers + would call the end of the string.) +--idxfile FILE - Read barcodes from separate index file (fasta or fastq) + NOTE: one of --bol, --eol, --idxfile must be specified, + but not more than one. +--idxidstrip N - When using index file, strip this number of characters + from the end of the sequence id before matching. + Automatically detects CASAVA 1.8 format and strips at a + space in the id, use 0 to disable this. + (Default is 1). +--mismatches N - Max. number of mismatches allowed. default is 1. +--exact - Same as '--mismatches 0'. If both --exact and --mismatches + are specified, '--exact' takes precedence. +--partial N - Allow partial overlap of barcodes. (see explanation below.) + (Default is not partial matching) +--quiet - Don't print counts and summary at the end of the run. + (Default is to print.) +--debug - Print lots of useless debug information to STDERR. +--help - This helpful help screen. + +Example (Assuming 's_2_100.txt' is a FASTQ file, 'mybarcodes.txt' is +the barcodes file): + + \$ cat s_2_100.txt | $0 --bcfile mybarcodes.txt --bol --mismatches 2 \\ + --prefix /tmp/bla_ --suffix ".txt" + +Barcode file format +------------------- +Barcode files are simple text files. Each line should contain an identifier +(descriptive name for the barcode), and the barcode itself (A/C/G/T), +separated by a TAB character. Example: + + #This line is a comment (starts with a 'number' sign) + BC1 GATCT + BC2 ATCGT + BC3 GTGAT + BC4 TGTCT + +For each barcode, a new FASTQ file will be created (with the barcode's +identifier as part of the file name). Sequences matching the barcode +will be stored in the appropriate file. + +Running the above example (assuming "mybarcodes.txt" contains the above +barcodes), will create the following files: + /tmp/bla_BC1.txt + /tmp/bla_BC2.txt + /tmp/bla_BC3.txt + /tmp/bla_BC4.txt + /tmp/bla_unmatched.txt +The 'unmatched' file will contain all sequences that didn't match any barcode. + +Barcode matching +---------------- + +** Without partial matching: + +Count mismatches between the FASTA/Q sequences and the barcodes. +The barcode which matched with the lowest mismatches count (providing the +count is small or equal to '--mismatches N') 'gets' the sequences. + +Example (using the above barcodes): +Input Sequence: +GATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG + +Matching with '--bol --mismatches 1': +GATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG +GATCT (1 mismatch, BC1) +ATCGT (4 mismatches, BC2) +GTGAT (3 mismatches, BC3) +TGTCT (3 mismatches, BC4) + +This sequence will be classified as 'BC1' (it has the lowest mismatch count). +If '--exact' or '--mismatches 0' were specified, this sequence would be +classified as 'unmatched' (because, although BC1 had the lowest mismatch count, +it is above the maximum allowed mismatches). + +Matching with '--eol' (end of line) does the same, but from the other side +of the sequence. + +** With partial matching (very similar to indels): + +Same as above, with the following addition: barcodes are also checked for +partial overlap (number of allowed non-overlapping bases is '--partial N'). + +Example: +Input sequence is ATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG +(Same as above, but note the missing 'G' at the beginning.) + +Matching (without partial overlapping) against BC1 yields 4 mismatches: +ATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG +GATCT (4 mismatches) + +Partial overlapping would also try the following match: +-ATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG +GATCT (1 mismatch) + +Note: scoring counts a missing base as a mismatch, so the final +mismatch count is 2 (1 'real' mismatch, 1 'missing base' mismatch). +If running with '--mismatches 2' (meaning allowing upto 2 mismatches) - this +seqeunce will be classified as BC1. + +EOF + +exit 1; +}
--- a/fastx_barcode_splitter.xml Wed Nov 11 12:38:37 2015 -0500 +++ b/fastx_barcode_splitter.xml Tue May 08 13:27:14 2018 -0400 @@ -1,38 +1,94 @@ -<tool id="cshl_fastx_barcode_splitter" version="1.0.0" name="Barcode Splitter"> +<tool id="cshl_fastx_barcode_splitter" version="1.0.1" name="Barcode Splitter"> <description></description> - <requirements> - <requirement type="package" version="0.0.13">fastx_toolkit</requirement> - </requirements> - <command interpreter="bash">fastx_barcode_splitter_galaxy_wrapper.sh '$BARCODE' '$input' "$input.name" "$output.files_path" --mismatches $mismatches --partial $partial $EOL > '$output' </command> - + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="aggressive"><![CDATA[ +mkdir split && +@CATS@ '$__tool_directory__/fastx_barcode_splitter.pl' --bcfile '$BARCODE' +--prefix 'split/' +--suffix '.$input.extension' +--mismatches $mismatches +--partial $partial +#if $refBarcodeLocation.barcodeLocation == "idxfile": + --idxfile '$refBarcodeLocation.idxfile' + --idxidstrip $refBarcodeLocation.idxidstrip +#else: + $refBarcodeLocation.EOL +#end if +> '$summary' + ]]></command> <inputs> - <param format="txt" name="BARCODE" type="data" label="Barcodes to use" /> - <param format="fasta,fastqsanger,fastqsolexa,fastqillumina" name="input" type="data" label="Library to split" /> + <param name="BARCODE" type="data" format="txt" label="Barcodes to use" /> + <param name="input" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" label="Library to split" /> - <param name="EOL" type="select" label="Barcodes found at"> - <option value="--bol">Start of sequence (5' end)</option> - <option value="--eol">End of sequence (3' end)</option> - </param> + <conditional name="refBarcodeLocation"> + <param name="barcodeLocation" type="select" label="Barcodes found at"> + <option value="bol">Start of sequence (5' end)</option> + <option value="eol">End of sequence (3' end)</option> + <option value="idxfile">Separate index file</option> + </param> + <when value="bol"> + <param name="EOL" type="hidden" value="--bol" /> + </when> + <when value="eol"> + <param name="EOL" type="hidden" value="--eol" /> + </when> + <when value="idxfile"> + <param argument="--idxidstrip" type="integer" value="1" label="Characters to strip from the end of the sequence id before matching" /> + <param argument="--idxfile" type="data" format="fasta,fastq,fastqsanger" label="Select index read file" /> + </when> + </conditional> + <param argument="--mismatches" type="integer" value="0" label="Number of allowed mismatches" /> + <param argument="--partial" type="integer" value="0" label="Number of allowed barcodes nucleotide deletions" /> + </inputs> - <param name="mismatches" type="integer" value="2" label="Number of allowed mismatches" /> + <outputs> + <data name="summary" format="tabular" label="${tool.name} on ${on_string}: Summary" /> + <collection name="split_output" type="list" format_source="input" label="${tool.name} on ${on_string}"> + <discover_datasets pattern="__designation_and_ext__" directory="split" visible="false" /> + </collection> + </outputs> - <param name="partial" type="integer" value="0" label="Number of allowed barcodes nucleotide deletions" /> - </inputs> - <outputs> - <data format="html" name="output" /> - </outputs> <tests> <test> <!-- Split a FASTQ file --> <param name="BARCODE" value="fastx_barcode_splitter1.txt" /> <param name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" /> - <param name="EOL" value="Start of sequence (5' end)" /> + <param name="barcodeLocation" value="bol" /> <param name="mismatches" value="2" /> <param name="partial" value="0" /> - <output name="output" file="fastx_barcode_splitter1.out" /> + <output name="summary" file="fastx_barcode_splitter1.out" /> + <output_collection name="split_output" type="list"> + <element name="BC1" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC1.out" /> + <element name="BC2" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC2.out" /> + <element name="BC3" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC3.out" /> + <element name="BC4" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC4.out" /> + <element name="unmatched" ftype="fastqsolexa" file="fastx_barcode_splitter1_unmatched.out" /> + </output_collection> + </test> + + <test> + <!-- Split a FASTQ file, using separate index read --> + <param name="BARCODE" value="fastx_barcode_splitter1.txt" /> + <param name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" /> + <param name="idxfile" value="fastx_barcode_splitter_index.fastq" ftype="fastqsolexa" /> + <param name="barcodeLocation" value="idxfile" /> + <param name="mismatches" value="2" /> + <param name="partial" value="0" /> + <output name="summary" file="fastx_barcode_splitter1.out" /> + <output_collection name="split_output" type="list"> + <element name="BC1" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC1.out" /> + <element name="BC2" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC2.out" /> + <element name="BC3" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC3.out" /> + <element name="BC4" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC4.out" /> + <element name="unmatched" ftype="fastqsolexa" file="fastx_barcode_splitter1_unmatched.out" /> + </output_collection> </test> </tests> - <help> + + <help><![CDATA[ **What it does** This tool splits a Solexa library (FASTQ file) or a regular FASTA file into several files, using barcodes as the split criteria. @@ -62,12 +118,12 @@ .. image:: barcode_splitter_output_example.png - ------ This tool is based on `FASTX-toolkit`__ by Assaf Gordon. .. __: http://hannonlab.cshl.edu/fastx_toolkit/ - </help> + ]]></help> + <expand macro="citations" /> <!-- FASTX-barcode-splitter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> </tool>
--- a/fastx_barcode_splitter_galaxy_wrapper.sh Wed Nov 11 12:38:37 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -#!/bin/bash - -# FASTX-toolkit - FASTA/FASTQ preprocessing tools. -# Copyright (C) 2009 A. Gordon (gordon@cshl.edu) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -# -#This is a shell script wrapper for 'fastx_barcode_splitter.pl' -# -# 1. Output files are saved at the dataset's files_path directory. -# -# 2. 'fastx_barcode_splitter.pl' outputs a textual table. -# This script turns it into pretty HTML with working URL -# (so lazy users can just click on the URLs and get their files) - -BARCODE_FILE="$1" -FASTQ_FILE="$2" -LIBNAME="$3" -OUTPUT_PATH="$4" -shift 4 -# The rest of the parameters are passed to the split program - -if [ "$OUTPUT_PATH" == "" ]; then - echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2 - exit 1 -fi - -#Sanitize library name, make sure we can create a file with this name -LIBNAME=${LIBNAME//\.gz/} -LIBNAME=${LIBNAME//\.txt/} -LIBNAME=${LIBNAME//[^[:alnum:]]/_} - -if [ ! -r "$FASTQ_FILE" ]; then - echo "Error: Input file ($FASTQ_FILE) not found!" >&2 - exit 1 -fi -if [ ! -r "$BARCODE_FILE" ]; then - echo "Error: barcode file ($BARCODE_FILE) not found!" >&2 - exit 1 -fi -mkdir -p "$OUTPUT_PATH" -if [ ! -d "$OUTPUT_PATH" ]; then - echo "Error: failed to create output path '$OUTPUT_PATH'" >&2 - exit 1 -fi - -PUBLICURL="" -BASEPATH="$OUTPUT_PATH/" -#PREFIX="$BASEPATH"`date "+%Y-%m-%d_%H%M__"`"${LIBNAME}__" -PREFIX="$BASEPATH""${LIBNAME}__" -SUFFIX=".txt" - -RESULTS=`zcat -f < "$FASTQ_FILE" | fastx_barcode_splitter.pl --bcfile "$BARCODE_FILE" --prefix "$PREFIX" --suffix "$SUFFIX" "$@"` -if [ $? != 0 ]; then - echo "error" -fi - -# -# Convert the textual tab-separated table into simple HTML table, -# with the local path replaces with a valid URL -echo "<html><body><table border=1>" -echo "$RESULTS" | sed -r "s|$BASEPATH(.*)|<a href=\"\\1\">\\1</a>|" | sed ' -i<tr><td> -s|\t|</td><td>|g -a<\/td><\/tr> -' -echo "<p>" -echo "</table></body></html>"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue May 08 13:27:14 2018 -0400 @@ -0,0 +1,54 @@ +<?xml version="1.0"?> +<macros> + <token name="@CATS@"> + #if $input.is_of_type('fasta.gz', 'fastqsanger.gz', 'fastqsolexa.gz', 'fastqillumina.gz'): + zcat -f '$input' | + #elif $input.is_of_type('fastqsanger.bz2', 'fastqsolexa.bz2', 'fastqillumina.bz2'): + bzcat -f '$input' | + #else: + cat '$input' | + #end if + </token> + <token name="@FQQUAL@"> + <![CDATA[ + #if $input.is_of_type('fastqsanger', 'fastqsanger.gz', 'fastqsanger.bz2'): + -Q 33 + #elif $input.is_of_type('fastqsolexa', 'fastqsolexa.gz', 'fastqsolexa.bz2', 'fastqillumina', 'fastqillumina.gz', 'fastqillumina.bz2'): + -Q 64 + #end if + ]]> + </token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">fastx_toolkit</requirement> + <yield /> + </requirements> + </xml> + <token name="@VERSION@">0.0.14</token> + <token name="@SANGER@">fastqsanger,fastqsanger.gz,fastqsanger.bz2</token> + <token name="@SOLEXA@">fastqsolexa,fastqsolexa.gz,fastqsolexa.bz2</token> + <token name="@ILLUMINA@">fastqillumina,fastqillumina.gz,fastqillumina.bz2</token> + <token name="@FASTQS@">@SANGER@,@SOLEXA@,@ILLUMINA@</token> + <token name="@FASTAS@">fasta,fasta.gz</token> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @UNPUBLISHED{agordon, + author = "Assaf Gordon", + title = "FASTQ/A short-reads pre-processing tools", + year = "2010", + note = "http://hannonlab.cshl.edu/fastx_toolkit/", + url = "http://hannonlab.cshl.edu/fastx_toolkit/"} + </citation> + </citations> + </xml> + <xml name="fasta_input"> + <param name="input" type="data" format="@FASTAS@" label="Input FASTA file" /> + </xml> + <xml name="fastq_input"> + <param name="input" type="data" format="@FASTQS@" label="Input FASTQ file" /> + </xml> + <xml name="fastx_input"> + <param name="input" type="data" format="@FASTAS@,@FASTQS@" label="Input file in FASTA or FASTQ format" /> + </xml> +</macros>
--- a/test-data/fastx_barcode_splitter1.fastq Wed Nov 11 12:38:37 2015 -0500 +++ b/test-data/fastx_barcode_splitter1.fastq Tue May 08 13:27:14 2018 -0400 @@ -158,11 +158,11 @@ TAGTTGAGTATACACAT +CSHL_3_FC042AGLLWW:1:2:7:203 aab^V^aU]`aa^aZaa -@CSHL_3_FC042AGLLWW:1:2:7:203 +@CSHL_3_FC042AGLLWW:1:2:7:203/1 TAGTTTCTCTATGTACA -+CSHL_3_FC042AGLLWW:1:2:7:203 ++CSHL_3_FC042AGLLWW:1:2:7:203/1 aab^V^aU]`aa^aZaa -@CSHL_3_FC042AGLLWW:1:2:7:203 +@HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 1:N:0: TGTCTGAGTATACACAT -+CSHL_3_FC042AGLLWW:1:2:7:203 -aab^V^aU]`aa^aZaa \ No newline at end of file ++HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 1:N:0: +aab^V^aU]`aa^aZaa
--- a/test-data/fastx_barcode_splitter1.out Wed Nov 11 12:38:37 2015 -0500 +++ b/test-data/fastx_barcode_splitter1.out Tue May 08 13:27:14 2018 -0400 @@ -1,24 +1,7 @@ -<html><body><table border=1> -<tr><td> -Barcode</td><td>Count</td><td>Location -</td></tr> -<tr><td> -BC1</td><td>11</td><td><a href="fastx_barcode_splitter1_fastq__BC1.txt">fastx_barcode_splitter1_fastq__BC1.txt</a> -</td></tr> -<tr><td> -BC2</td><td>12</td><td><a href="fastx_barcode_splitter1_fastq__BC2.txt">fastx_barcode_splitter1_fastq__BC2.txt</a> -</td></tr> -<tr><td> -BC3</td><td>9</td><td><a href="fastx_barcode_splitter1_fastq__BC3.txt">fastx_barcode_splitter1_fastq__BC3.txt</a> -</td></tr> -<tr><td> -BC4</td><td>1</td><td><a href="fastx_barcode_splitter1_fastq__BC4.txt">fastx_barcode_splitter1_fastq__BC4.txt</a> -</td></tr> -<tr><td> -unmatched</td><td>9</td><td><a href="fastx_barcode_splitter1_fastq__unmatched.txt">fastx_barcode_splitter1_fastq__unmatched.txt</a> -</td></tr> -<tr><td> -total</td><td>42 -</td></tr> -<p> -</table></body></html> +# Barcode Count +BC1 11 +BC2 12 +BC3 9 +BC4 1 +unmatched 9 +total 42
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_barcode_splitter1_BC1.out Tue May 08 13:27:14 2018 -0400 @@ -0,0 +1,44 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_barcode_splitter1_BC2.out Tue May 08 13:27:14 2018 -0400 @@ -0,0 +1,48 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_barcode_splitter1_BC3.out Tue May 08 13:27:14 2018 -0400 @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_barcode_splitter1_BC4.out Tue May 08 13:27:14 2018 -0400 @@ -0,0 +1,4 @@ +@HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 1:N:0: +TGTCTGAGTATACACAT ++HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 1:N:0: +aab^V^aU]`aa^aZaa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_barcode_splitter1_unmatched.out Tue May 08 13:27:14 2018 -0400 @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTACGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTACTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTACGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203/1 +TAGTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203/1 +aab^V^aU]`aa^aZaa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_barcode_splitter_index.fastq Tue May 08 13:27:14 2018 -0400 @@ -0,0 +1,168 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTAC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTAC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTAC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V +@CSHL_3_FC042AGLLWW:1:2:7:203/2 +TAGTT ++CSHL_3_FC042AGLLWW:1:2:7:203/2 +aab^V +@HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 3:N:0: +TGTCT ++HWI-X00000:2:000000000-XXXXX:1:1117:19643:1028 3:N:0: +aab^V
--- a/tool_dependencies.xml Wed Nov 11 12:38:37 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="fastx_toolkit" version="0.0.13"> - <repository changeset_revision="ec66ae4c269b" name="package_fastx_toolkit_0_0_13" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>