view fastx_barcode_splitter.xml @ 5:4bedca26c133 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/fastx_toolkit/fastx_barcode_splitter commit b2735803f9719ff955a49a5bde54a211d6d48bb5
author iuc
date Thu, 11 Oct 2018 09:35:51 -0400
parents 015dc921d814
children 0b7ec11b3c7c
line wrap: on
line source

<tool id="cshl_fastx_barcode_splitter" version="1.0.1" name="Barcode Splitter">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="aggressive"><![CDATA[
mkdir split &&
@CATS@ '$__tool_directory__/fastx_barcode_splitter.pl' --bcfile '$BARCODE'
--prefix 'split/'
--suffix 
#if $input.is_of_type('fastqsanger', 'fastqsanger.gz', 'fastqsanger.bz2'):
    '.fastqsanger'
#elif $input.is_of_type('fastqsolexa', 'fastqsolexa.gz', 'fastqsolexa.bz2'):
    '.fastqsolexa'
#elif $input.is_of_type('fastqillumina', 'fastqillumina.gz', 'fastqillumina.bz2'):
    '.fastqillumina'
#elif $input.is_of_type('fasta', 'fasta.gz'):
    '.fasta'
#else:
    '.$input.extension'
#end if
--mismatches $mismatches
--partial $partial
#if $refBarcodeLocation.barcodeLocation == "idxfile":
    --idxfile '$refBarcodeLocation.idxfile'
    --idxidstrip $refBarcodeLocation.idxidstrip
#else:
    $refBarcodeLocation.EOL
#end if
> '$summary'
    ]]></command>
    <inputs>
        <param name="BARCODE" type="data" format="txt" label="Barcodes to use" />
        <param name="input" type="data" format="@FASTAS@,@FASTQS@,fastq" label="Library to split" />
        <conditional name="refBarcodeLocation">
            <param name="barcodeLocation" type="select" label="Barcodes found at">
                <option value="bol">Start of sequence (5' end)</option>
                <option value="eol">End of sequence (3' end)</option>
                <option value="idxfile">Separate index file</option>
            </param>
            <when value="bol">
                <param name="EOL" type="hidden" value="--bol" />
            </when>
            <when value="eol">
                <param name="EOL" type="hidden" value="--eol" />
            </when>
            <when value="idxfile">
                <param argument="--idxidstrip" type="integer" value="1" label="Characters to strip from the end of the sequence id before matching" />
                <param argument="--idxfile" type="data" format="fasta,fastq,fastqsanger" label="Select index read file" />
            </when>
        </conditional>
        <param argument="--mismatches" type="integer" value="0" label="Number of allowed mismatches" />
        <param argument="--partial" type="integer" value="0" label="Number of allowed barcodes nucleotide deletions" />
    </inputs>

    <outputs>
        <data name="summary" format="tabular" label="${tool.name} on ${on_string}: Summary" />
        <collection name="split_output" type="list" label="${tool.name} on ${on_string}">
            <discover_datasets pattern="__designation_and_ext__" directory="split" visible="false" />
        </collection>
    </outputs>

    <tests>
        <test>
            <!-- Split a FASTQ file -->
            <param name="BARCODE" value="fastx_barcode_splitter1.txt" />
            <param name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" />
            <param name="barcodeLocation" value="bol" />
            <param name="mismatches" value="2" />
            <param name="partial" value="0" />
            <output name="summary" file="fastx_barcode_splitter1.out" />
            <output_collection name="split_output" type="list">
                <element name="BC1" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC1.out" />
                <element name="BC2" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC2.out" />
                <element name="BC3" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC3.out" />
                <element name="BC4" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC4.out" />
                <element name="unmatched" ftype="fastqsolexa" file="fastx_barcode_splitter1_unmatched.out" />
            </output_collection>
        </test>
        
        <test>
          <!-- Split a FASTQ.gz file -->
          <param name="BARCODE" value="fastx_barcode_splitter1.txt" />
          <param name="input" value="fastx_barcode_splitter1.fastq.gz" ftype="fastqsolexa.gz" />
          <param name="barcodeLocation" value="bol" />
          <param name="mismatches" value="2" />
          <param name="partial" value="0" />
          <output name="summary" file="fastx_barcode_splitter1.out" />
          <output_collection name="split_output" type="list">
            <element name="BC1" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC1.out" />
            <element name="BC2" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC2.out" />
            <element name="BC3" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC3.out" />
            <element name="BC4" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC4.out" />
            <element name="unmatched" ftype="fastqsolexa" file="fastx_barcode_splitter1_unmatched.out" />
          </output_collection>
        </test>

        <test>
            <!-- Split a FASTQ file, using separate index read -->
            <param name="BARCODE" value="fastx_barcode_splitter1.txt" />
            <param name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" />
            <param name="idxfile" value="fastx_barcode_splitter_index.fastq" ftype="fastqsolexa" />
            <param name="barcodeLocation" value="idxfile" />
            <param name="mismatches" value="2" />
            <param name="partial" value="0" />
            <output name="summary" file="fastx_barcode_splitter1.out" />
            <output_collection name="split_output" type="list">
                <element name="BC1" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC1.out" />
                <element name="BC2" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC2.out" />
                <element name="BC3" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC3.out" />
                <element name="BC4" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC4.out" />
                <element name="unmatched" ftype="fastqsolexa" file="fastx_barcode_splitter1_unmatched.out" />
            </output_collection>
        </test>
    </tests>

    <help><![CDATA[
**What it does**

This tool splits a Solexa library (FASTQ file) or a regular FASTA file into several files, using barcodes as the split criteria.

--------

**Barcode file Format**

Barcode files are simple text files.
Each line should contain an identifier (descriptive name for the barcode), and the barcode itself (A/C/G/T), separated by a TAB character.
Example::

    #This line is a comment (starts with a 'number' sign)
    BC1    GATCT
    BC2    ATCGT
    BC3    GTGAT
    BC4 TGTCT

For each barcode, a new FASTQ file will be created (with the barcode's identifier as part of the file name).
Sequences matching the barcode will be stored in the appropriate file.

One additional FASTQ file will be created (the 'unmatched' file), where sequences not matching any barcode will be stored.

The output of this tool is an HTML file, displaying the split counts and the file locations.

**Output Example**

.. image:: barcode_splitter_output_example.png

------

This tool is based on `FASTX-toolkit`__ by Assaf Gordon.

 .. __: http://hannonlab.cshl.edu/fastx_toolkit/
    ]]></help>
    <expand macro="citations" />
<!-- FASTX-barcode-splitter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
</tool>