Mercurial > repos > devteam > fastx_barcode_splitter
changeset 0:a12850d0559b
Uploaded tool tarball.
author | devteam |
---|---|
date | Wed, 25 Sep 2013 11:06:38 -0400 |
parents | |
children | c5300ff2aa1e |
files | fastx_barcode_splitter.xml fastx_barcode_splitter_galaxy_wrapper.sh test-data/fastx_barcode_splitter1.fastq test-data/fastx_barcode_splitter1.out test-data/fastx_barcode_splitter1.txt tool_dependencies.xml |
diffstat | 6 files changed, 360 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_barcode_splitter.xml Wed Sep 25 11:06:38 2013 -0400 @@ -0,0 +1,78 @@ +<tool id="cshl_fastx_barcode_splitter" version="1.0.0" name="Barcode Splitter"> + <description></description> + <requirements> + <requirement type="package" version="0.0.13">fastx_toolkit</requirement> + </requirements> + <command interpreter="bash">fastx_barcode_splitter_galaxy_wrapper.sh $BARCODE $input "$input.name" "$output.files_path" --mismatches $mismatches --partial $partial $EOL > $output </command> + + <inputs> + <param format="txt" version="1.0.0" name="BARCODE" type="data" label="Barcodes to use" /> + <param format="fasta,fastqsanger,fastqsolexa,fastqillumina" version="1.0.0" name="input" type="data" label="Library to split" /> + + <param version="1.0.0" name="EOL" type="select" label="Barcodes found at"> + <option value="--bol">Start of sequence (5' end)</option> + <option value="--eol">End of sequence (3' end)</option> + </param> + + <param version="1.0.0" name="mismatches" type="integer" size="3" value="2" label="Number of allowed mismatches" /> + + <param version="1.0.0" name="partial" type="integer" size="3" value="0" label="Number of allowed barcodes nucleotide deletions" /> + + </inputs> + + <tests> + <test> + <!-- Split a FASTQ file --> + <param version="1.0.0" name="BARCODE" value="fastx_barcode_splitter1.txt" /> + <param version="1.0.0" name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" /> + <param version="1.0.0" name="EOL" value="Start of sequence (5' end)" /> + <param version="1.0.0" name="mismatches" value="2" /> + <param version="1.0.0" name="partial" value="0" /> + <output version="1.0.0" name="output" file="fastx_barcode_splitter1.out" /> + </test> + </tests> + + <outputs> + <data format="html" version="1.0.0" name="output" /> + </outputs> +<help> + +**What it does** + +This tool splits a Solexa library (FASTQ file) or a regular FASTA file into several files, using barcodes as the split criteria. + +-------- + +**Barcode file Format** + +Barcode files are simple text files. +Each line should contain an identifier (descriptive name for the barcode), and the barcode itself (A/C/G/T), separated by a TAB character. +Example:: + + #This line is a comment (starts with a 'number' sign) + BC1 GATCT + BC2 ATCGT + BC3 GTGAT + BC4 TGTCT + +For each barcode, a new FASTQ file will be created (with the barcode's identifier as part of the file name). +Sequences matching the barcode will be stored in the appropriate file. + +One additional FASTQ file will be created (the 'unmatched' file), where sequences not matching any barcode will be stored. + +The output of this tool is an HTML file, displaying the split counts and the file locations. + +**Output Example** + +.. image:: ${static_path}/fastx_icons/barcode_splitter_output_example.png + + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +<!-- FASTX-barcode-splitter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_barcode_splitter_galaxy_wrapper.sh Wed Sep 25 11:06:38 2013 -0400 @@ -0,0 +1,80 @@ +#!/bin/bash + +# FASTX-toolkit - FASTA/FASTQ preprocessing tools. +# Copyright (C) 2009 A. Gordon (gordon@cshl.edu) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# +#This is a shell script wrapper for 'fastx_barcode_splitter.pl' +# +# 1. Output files are saved at the dataset's files_path directory. +# +# 2. 'fastx_barcode_splitter.pl' outputs a textual table. +# This script turns it into pretty HTML with working URL +# (so lazy users can just click on the URLs and get their files) + +BARCODE_FILE="$1" +FASTQ_FILE="$2" +LIBNAME="$3" +OUTPUT_PATH="$4" +shift 4 +# The rest of the parameters are passed to the split program + +if [ "$OUTPUT_PATH" == "" ]; then + echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2 + exit 1 +fi + +#Sanitize library name, make sure we can create a file with this name +LIBNAME=${LIBNAME//\.gz/} +LIBNAME=${LIBNAME//\.txt/} +LIBNAME=${LIBNAME//[^[:alnum:]]/_} + +if [ ! -r "$FASTQ_FILE" ]; then + echo "Error: Input file ($FASTQ_FILE) not found!" >&2 + exit 1 +fi +if [ ! -r "$BARCODE_FILE" ]; then + echo "Error: barcode file ($BARCODE_FILE) not found!" >&2 + exit 1 +fi +mkdir -p "$OUTPUT_PATH" +if [ ! -d "$OUTPUT_PATH" ]; then + echo "Error: failed to create output path '$OUTPUT_PATH'" >&2 + exit 1 +fi + +PUBLICURL="" +BASEPATH="$OUTPUT_PATH/" +#PREFIX="$BASEPATH"`date "+%Y-%m-%d_%H%M__"`"${LIBNAME}__" +PREFIX="$BASEPATH""${LIBNAME}__" +SUFFIX=".txt" + +RESULTS=`zcat -f "$FASTQ_FILE" | fastx_barcode_splitter.pl --bcfile "$BARCODE_FILE" --prefix "$PREFIX" --suffix "$SUFFIX" "$@"` +if [ $? != 0 ]; then + echo "error" +fi + +# +# Convert the textual tab-separated table into simple HTML table, +# with the local path replaces with a valid URL +echo "<html><body><table border=1>" +echo "$RESULTS" | sed -r "s|$BASEPATH(.*)|<a href=\"\\1\">\\1</a>|" | sed ' +i<tr><td> +s|\t|</td><td>|g +a<\/td><\/tr> +' +echo "<p>" +echo "</table></body></html>"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_barcode_splitter1.fastq Wed Sep 25 11:06:38 2013 -0400 @@ -0,0 +1,168 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTACGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTACTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTACGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TGTCTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_barcode_splitter1.out Wed Sep 25 11:06:38 2013 -0400 @@ -0,0 +1,24 @@ +<html><body><table border=1> +<tr><td> +Barcode</td><td>Count</td><td>Location +</td></tr> +<tr><td> +BC1</td><td>11</td><td><a href="fastx_barcode_splitter1_fastq__BC1.txt">fastx_barcode_splitter1_fastq__BC1.txt</a> +</td></tr> +<tr><td> +BC2</td><td>12</td><td><a href="fastx_barcode_splitter1_fastq__BC2.txt">fastx_barcode_splitter1_fastq__BC2.txt</a> +</td></tr> +<tr><td> +BC3</td><td>9</td><td><a href="fastx_barcode_splitter1_fastq__BC3.txt">fastx_barcode_splitter1_fastq__BC3.txt</a> +</td></tr> +<tr><td> +BC4</td><td>1</td><td><a href="fastx_barcode_splitter1_fastq__BC4.txt">fastx_barcode_splitter1_fastq__BC4.txt</a> +</td></tr> +<tr><td> +unmatched</td><td>9</td><td><a href="fastx_barcode_splitter1_fastq__unmatched.txt">fastx_barcode_splitter1_fastq__unmatched.txt</a> +</td></tr> +<tr><td> +total</td><td>42 +</td></tr> +<p> +</table></body></html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_barcode_splitter1.txt Wed Sep 25 11:06:38 2013 -0400 @@ -0,0 +1,4 @@ +BC1 GATCT +BC2 ATCGT +BC3 GTGAT +BC4 TGTCT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Sep 25 11:06:38 2013 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="fastx_toolkit" version="0.0.13"> + <repository changeset_revision="ec66ae4c269b" name="package_fastx_toolkit_0_0_13" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>