Mercurial > repos > lparsons > fastq_join

<tool id="fastq_join" name="fastq-join" version="1.1.2-806.1">
    <description> - Joins two paired-end reads on the overlapping ends</description>

    <requirements>
        <requirement type="package" version="1.1.2-806">ea-utils</requirement>
    </requirements>

    <stdio>
        <exit_code range="1:" level="fatal" description="Unknown error occurred" />
    </stdio>

    <command><![CDATA[
        fastq-join
        -v "${splitChar}"
        -p "${pctMaxDiff}"
        -m "${minOverlap}"
        #if $stitchLengthReport:
        -r "${outputStitchLengthReport}"
        #end if
        #if str( $input_type.input_type_selector ) == 'paired':
            "${input_type.read1}"
            "${input_type.read2}"
        #else:
            "${input_type.input_collection.forward}"
            "${input_type.input_collection.reverse}"
        #end if
        -o "${outputUnmatched1}" -o "${outputUnmatched2}" -o "${outputJoined}"
        ]]>
    </command>

    <inputs>
      <conditional name="input_type">
            <param name="input_type_selector" type="select" label="Dataset type">
              <option value="paired">Paired-end</option>
              <option value="paired_collection">Paired-end Dataset Collection</option>
            </param>
            <when value="paired">
                <param format="fastqsanger" name="read1" type="data" label="Read 1 Fastq" help="Nucleotide-space: Must have PHRED-scaled quality values with offset 33 (fastqsanger)." />
                <param format="fastqsanger" name="read2" type="data" label="Read 2 Fastq" help="Nucleotide-space: Must have PHRED-scaled quality values with offset 33 (fastqsanger)." />
            </when>
            <when value="paired_collection">
                <param name="input_collection" format="fastqsanger"
                    type="data_collection" collection_type="paired"
                    label="FASTQ Paired Dataset" help="Nucleotide-space: Must have PHRED-scaled quality values with offset 33 (fastqsanger)." />
            </when>
        </conditional>
        <param name="splitChar" type="text" value=" " label="Split read ids on this character" help="Default is space ' ' for Illumina reads" />
        <param name="pctMaxDiff" type="float" value="8" min="0" max="100" label="Maximum percentage difference between matching segments" />
        <param name="minOverlap" type="integer" value="6" min="1" label="Minimum length of matching segements" />
        <param name="stitchLengthReport" type="boolean" value="False" label="Output verbose stitch length report" />
    </inputs>

    <outputs>
        <data format="fastqsanger" name="outputJoined" label="${tool.name} on ${on_string} (joined)"/>
        <data format="fastqsanger" name="outputUnmatched1" label="${tool.name} on ${on_string} (unmatched1)"/>
        <data format="fastqsanger" name="outputUnmatched2" label="${tool.name} on ${on_string} (unmatched2)"/>
        <data format="tabular" name="outputStitchLengthReport" label="${tool.name} on ${on_string} (stitch length report)">
            <filter>stitchLengthReport</filter>
        </data>
    </outputs>

    <tests>
        <test>
            <param name="read1" value="test_read1.fastq" ftype="fastqsanger" />
            <param name="read2" value="test_read3.fastq" ftype="fastqsanger" />
            <output name="outputJoined" file="testout.join.fastq" />
            <output name="outputUnmatched1" file="testout.un1.fastq" />
            <output name="outputUnmatched2" file="testout.un2.fastq" />
        </test>
    </tests>

    <help><![CDATA[
Overview
--------
fastq-join joins two paired-end reads on the overlapping ends.

Split read ids character: Verifies that the 2 files probe id's match up to char C. Use ' ' for Illumina reads.

Maximum difference is the maximum allowed percentage of bases that differ in the matching region.

Minimum overlap is the minimum number of bases that must overlap (with no more than the maximum difference) for reads to be joined.

Verbose stitch length report is a report for each joined paired of reads showing how large the overlapping section was.

This tool uses sqr(distance)/len for anchored alignment quality algorithm. It's a good measure of anchored alignment quality, akin to squared-deviation for means.  This tool uses the fastq-join program that is part of the ea-utils suite. See http://code.google.com/p/ea-utils/wiki/FastqJoin for details.
]]>
    </help>

    <citations>
        <citation type="bibtex">
            @article{aronesty_comparison_2013,
                title = {Comparison of {Sequencing} {Utility} {Programs}},
                volume = {7},
                issn = {18750362},
                url = {http://benthamopen.com/ABSTRACT/TOBIOIJ-7-1},
                doi = {10.2174/1875036201307010001},
                language = {en},
                number = {1},
                urldate = {2015-07-10},
                journal = {The Open Bioinformatics Journal},
                author = {Aronesty, Erik},
                month = jan,
                year = {2013},
                pages = {1--8}
            }
        </citation>
    </citations>

</tool>
author	lparsons
date	Wed, 16 Dec 2015 16:01:08 -0500
parents	b007d9642911
children