view fastq-join.xml @ 3:8ec3dfde378b draft default tip

planemo upload for repository https://github.com/lparsons/galaxy_tools/tree/master/tools/fastq_join commit a7f8461032de196092033c9ceae9f4407a469c44
author lparsons
date Wed, 16 Dec 2015 16:01:08 -0500
parents b007d9642911
children
line wrap: on
line source

<tool id="fastq_join" name="fastq-join" version="1.1.2-806.1">
    <description> - Joins two paired-end reads on the overlapping ends</description>

    <requirements>
        <requirement type="package" version="1.1.2-806">ea-utils</requirement>
    </requirements>

    <stdio>
        <exit_code range="1:" level="fatal" description="Unknown error occurred" />
    </stdio>

    <command><![CDATA[
        fastq-join
        -v "${splitChar}"
        -p "${pctMaxDiff}"
        -m "${minOverlap}"
        #if $stitchLengthReport:
        -r "${outputStitchLengthReport}"
        #end if
        #if str( $input_type.input_type_selector ) == 'paired':
            "${input_type.read1}"
            "${input_type.read2}"
        #else:
            "${input_type.input_collection.forward}"
            "${input_type.input_collection.reverse}"
        #end if
        -o "${outputUnmatched1}" -o "${outputUnmatched2}" -o "${outputJoined}"
        ]]>
    </command>

    <inputs>
      <conditional name="input_type">
            <param name="input_type_selector" type="select" label="Dataset type">
              <option value="paired">Paired-end</option>
              <option value="paired_collection">Paired-end Dataset Collection</option>
            </param>
            <when value="paired">
                <param format="fastqsanger" name="read1" type="data" label="Read 1 Fastq" help="Nucleotide-space: Must have PHRED-scaled quality values with offset 33 (fastqsanger)." />
                <param format="fastqsanger" name="read2" type="data" label="Read 2 Fastq" help="Nucleotide-space: Must have PHRED-scaled quality values with offset 33 (fastqsanger)." />
            </when>
            <when value="paired_collection">
                <param name="input_collection" format="fastqsanger"
                    type="data_collection" collection_type="paired"
                    label="FASTQ Paired Dataset" help="Nucleotide-space: Must have PHRED-scaled quality values with offset 33 (fastqsanger)." />
            </when>
        </conditional>
        <param name="splitChar" type="text" value=" " label="Split read ids on this character" help="Default is space ' ' for Illumina reads" />
        <param name="pctMaxDiff" type="float" value="8" min="0" max="100" label="Maximum percentage difference between matching segments" />
        <param name="minOverlap" type="integer" value="6" min="1" label="Minimum length of matching segements" />
        <param name="stitchLengthReport" type="boolean" value="False" label="Output verbose stitch length report" />
    </inputs>

    <outputs>
        <data format="fastqsanger" name="outputJoined" label="${tool.name} on ${on_string} (joined)"/>
        <data format="fastqsanger" name="outputUnmatched1" label="${tool.name} on ${on_string} (unmatched1)"/>
        <data format="fastqsanger" name="outputUnmatched2" label="${tool.name} on ${on_string} (unmatched2)"/>
        <data format="tabular" name="outputStitchLengthReport" label="${tool.name} on ${on_string} (stitch length report)">
            <filter>stitchLengthReport</filter>
        </data>
    </outputs>

    <tests>
        <test>
            <param name="read1" value="test_read1.fastq" ftype="fastqsanger" />
            <param name="read2" value="test_read3.fastq" ftype="fastqsanger" />
            <output name="outputJoined" file="testout.join.fastq" />
            <output name="outputUnmatched1" file="testout.un1.fastq" />
            <output name="outputUnmatched2" file="testout.un2.fastq" />
        </test>
    </tests>

    <help><![CDATA[
Overview
--------
fastq-join joins two paired-end reads on the overlapping ends.

Split read ids character: Verifies that the 2 files probe id's match up to char C. Use ' ' for Illumina reads.

Maximum difference is the maximum allowed percentage of bases that differ in the matching region.

Minimum overlap is the minimum number of bases that must overlap (with no more than the maximum difference) for reads to be joined.

Verbose stitch length report is a report for each joined paired of reads showing how large the overlapping section was.

This tool uses sqr(distance)/len for anchored alignment quality algorithm. It's a good measure of anchored alignment quality, akin to squared-deviation for means.  This tool uses the fastq-join program that is part of the ea-utils suite. See http://code.google.com/p/ea-utils/wiki/FastqJoin for details.
]]>
    </help>

    <citations>
        <citation type="bibtex">
            @article{aronesty_comparison_2013,
                title = {Comparison of {Sequencing} {Utility} {Programs}},
                volume = {7},
                issn = {18750362},
                url = {http://benthamopen.com/ABSTRACT/TOBIOIJ-7-1},
                doi = {10.2174/1875036201307010001},
                language = {en},
                number = {1},
                urldate = {2015-07-10},
                journal = {The Open Bioinformatics Journal},
                author = {Aronesty, Erik},
                month = jan,
                year = {2013},
                pages = {1--8}
            }
        </citation>
    </citations>

</tool>