view split_libraries_fastq.xml @ 6:c2ffcfff57f6 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit c845cb240f57663cf1e2240c5c506ea0b294872c"
author iuc
date Thu, 05 Dec 2019 07:54:57 -0500
parents 1327fee2bf93
children
line wrap: on
line source

<tool id="qiime_split_libraries_fastq" name="Split fastq libraries" version="@WRAPPER_VERSION@.0" profile="@PROFILE@">
    <description> to performs demultiplexing of Fastq sequence data (split_libraries_fastq)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <version_command>split_libraries_fastq.py --version</version_command>
    <command detect_errors="aggressive"><![CDATA[
@MPLBACKEND@
split_libraries_fastq.py
    #set $seq_files = ''
    #set $sep = ''
    #for $file in $sequence_read_fps
        #set $seq_files += $sep + str($file)
        #set $sep = ','
    #end for
    --sequence_read_fps '$seq_files'

    -o split_libraries

    #set $mapping_files = ''
    #set $sep = ''
    #for $file in $mapping_fps
        #set $mapping_files += $sep + str($file)
        #set $sep = ','
    #end for
    #if $mapping_files != 'None'
        --mapping_fps '$mapping_files'
    #end if

    #set $barcode_files = ''
    #set $sep = ''
    #for $file in $barcode_read_fps
        #set $barcode_files += $sep + str($file)
        #set $sep = ','
    #end for
    #if $barcode_files != 'None'
        --barcode_read_fps '$barcode_files'
    #end if

    $store_qual_scores
    #if str($sample_ids) != ''
        --sample_ids '$sample_ids'
    #end if
    $store_demultiplexed_fastq
    $retain_unassigned_reads

    --max_bad_run_length '$max_bad_run_length'
    --min_per_read_length_fraction '$min_per_read_length_fraction'
    --sequence_max_n '$sequence_max_n'
    --start_seq_id '$start_seq_id'
    $rev_comp_barcode
    $rev_comp_mapping_barcodes
    $rev_comp
    --phred_quality_threshold '$phred_quality_threshold'
    #if str( $barcode.barcode_type ) != "custom_length"
        --barcode_type '$barcode.barcode_type'
    #else
        --barcode_type '$barcode.barcode_length'
    #end if
    --max_barcode_errors '$max_barcode_errors'
    $phred_offset
    ]]></command>
    <inputs>
        <param argument="--sequence_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" multiple="true" label="Input fastq files"/>
        <param argument="--mapping_fps" type="data" format="txt,tabular,tsv,csv" multiple="true" optional="true" label="Metadata mapping files"/>
        <param argument="--barcode_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" multiple="true" optional="true" label="Barcode read files"/>
        <param argument="--store_qual_scores" type="boolean" truevalue="--store_qual_scores" falsevalue="" checked="false" label="Store quality strings in files?"/>
        <param argument="--sample_ids" type="text" optional="true" label="Comma-separated list of samples ids to be applied to all sequences" help="It must be one per input file path (used when data is not multiplexed)"/>
        <param argument="--store_demultiplexed_fastq" type="boolean" truevalue="--store_demultiplexed_fastq" falsevalue="" checked="false" label="Write demultiplexed fastq files?"/>
        <param argument="--retain_unassigned_reads" type="boolean" truevalue="--retain_unassigned_reads" falsevalue="" checked="false"  label="Retain sequences which don’t map to a barcode in the mapping file?" help="Sample ID will be 'Unassigned'"/>
        <param argument="--max_bad_run_length" type="integer" value="3" label="Maximum number of consecutive low quality base calls allowed before truncating a read"/>
        <param argument="--min_per_read_length_fraction" type="float" value="0.75" label="Minimum number of consecutive high quality base calls to include a read (per single end read) as a fraction of the input read length"/>
        <param argument="--sequence_max_n" type="integer" value="0" label="Maximum number of N characters allowed in a sequence to retain it" help="This is applied after quality trimming, and is total over combined paired end reads if applicable"/>
        <param argument="--start_seq_id" type="integer" value="0" label="Start seq_ids as ascending integers beginning with start_seq_id"/>
        <param argument="--rev_comp_barcode" type="boolean" truevalue="--rev_comp_barcode" falsevalue="" checked="false" label="Reverse complement barcode reads before lookup?"/>
        <param argument="--rev_comp_mapping_barcodes" type="boolean" truevalue="--rev_comp_mapping_barcodes" falsevalue="" checked="false" label="Reverse complement barcode in mapping before lookup?" help="It is useful if barcodes in mapping file are reverse complements of golay codes"/>
        <param argument="--rev_comp" type="boolean" truevalue="--rev_comp" falsevalue="" checked="false" label="Reverse complement sequence before writing to output file?"/>
        <param argument="--phred_quality_threshold" type="integer" value="3" label="Maximum unacceptable Phred quality score" help="E.g., for Q20 and better, 19 must be specified"/>
        <conditional name="barcode">
            <param argument="--barcode_type" type="select" label="Type of barcode">
                <option value="hamming_8">hamming_8</option>
                <option value="golay_12" selected="true">golay_12</option>
                <option value="variable_length">variable_length (disable any barcode correction)</option>
                <option value="custom_length">Custom length</option>
                <option value="not-barcoded">Data not barcoded</option>
            </param>
            <when value="hamming_8"/>
            <when value="golay_12"/>
            <when value="variable_length"/>
            <when value="custom_length">
                <param name="barcode_length" type="integer" value="4" label="Barcode length"/>
            </when>
            <when value="not-barcoded"/>
        </conditional>
        <param argument="--max_barcode_errors" type="float" value="1.5" label="Maximum number of errors in barcode"/>
        <param argument="--phred_offset" type="select" label="Ascii offset to use when decoding phred scores">
            <option value="--phred_offset 33">33</option>
            <option value="--phred_offset 64">64</option>
            <option value="" selected="true">Automatically determined</option>
        </param>
    </inputs>
    <outputs>
        <data name="log" format="txt" from_work_dir="split_libraries/split_library_log.txt" label="${tool.name} on ${on_string}: log"/>
        <data name="histograms" format="tabular" from_work_dir="split_libraries/histograms.txt" label="${tool.name} on ${on_string}: histograms"/>
        <data name="seqs" format="fasta" from_work_dir="split_libraries/seqs.fna" label="${tool.name} on ${on_string}: sequences"/>
        <data name="seqs_qual" format="qual" from_work_dir="split_libraries/seqs.qual" label="${tool.name} on ${on_string}: sequence qualities">
            <filter>store_qual_scores is True</filter>
        </data>
        <data name="seqs_fastq" format="fastq" from_work_dir="split_libraries/seqs.fastq" label="${tool.name} on ${on_string}: demultiplexed sequences (fastq)">
            <filter>store_demultiplexed_fastq is True</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/>
            <param name="mapping_fps" value="split_libraries_fastq/map.txt"/>
            <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz"/>
            <param name="store_qual_scores" value=""/>
            <param name="store_demultiplexed_fastq" value=""/>
            <param name="retain_unassigned_reads" value=""/>
            <param name="max_bad_run_length" value="3"/>
            <param name="min_per_read_length_fraction" value="0.75"/>
            <param name="sequence_max_n" value="0"/>
            <param name="start_seq_id" value="0"/>
            <param name="rev_comp_barcode" value=""/>
            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
            <param name="rev_comp" value=""/>
            <param name="phred_quality_threshold" value="19"/>
            <conditional name="barcode">
                <param name="barcode_type" value="golay_12"/>
            </conditional>
            <param name="max_barcode_errors" value="1.5"/>
            <param name="phred_offset" value=""/>
            <output name="log">
                <assert_contents>
                    <has_text text="Median sequence length: 151.00"></has_text>
                    <has_text text="s41"></has_text>
                    <has_text text="s122"></has_text>
                </assert_contents>
            </output>
            <output name="seqs" ftype="fasta">
                <assert_contents>
                    <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
                    <has_text text="s26_84 M00176:17:000000000-A0CNA:1:1:18075:1844 1:N:0:0 orig_bc=AGGGTTCCAGTT new_bc=AGGGTTCCAGTT bc_diffs=0"></has_text>
                </assert_contents>
            </output>
            <output name="histograms">
                <assert_contents>
                    <has_text text="Length"></has_text>
                    <has_text text="127.0"></has_text>
                    <has_text text="157.0"></has_text>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/>
            <param name="mapping_fps" value="split_libraries_fastq/map.txt"/>
            <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz"/>
            <param name="store_qual_scores" value="--store_qual_scores"/>
            <param name="store_demultiplexed_fastq" value="--store_demultiplexed_fastq"/>
            <param name="retain_unassigned_reads" value=""/>
            <param name="max_bad_run_length" value="3"/>
            <param name="min_per_read_length_fraction" value="0.75"/>
            <param name="sequence_max_n" value="0"/>
            <param name="start_seq_id" value="0"/>
            <param name="rev_comp_barcode" value=""/>
            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
            <param name="rev_comp" value=""/>
            <param name="phred_quality_threshold" value="19"/>
            <conditional name="barcode">
                <param name="barcode_type" value="golay_12"/>
            </conditional>
            <param name="max_barcode_errors" value="1.5"/>
            <param name="phred_offset" value=""/>
            <output name="log">
                <assert_contents>
                    <has_text text="Median sequence length: 151.00"></has_text>
                    <has_text text="s41"></has_text>
                    <has_text text="s122"></has_text>
                </assert_contents>
            </output>
            <output name="seqs" ftype="fasta">
                <assert_contents>
                    <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
                    <has_text text="s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text>
                </assert_contents>
            </output>
            <output name="seqs_fastq" ftype="fastq">
                <assert_contents>
                    <has_text text="@s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
                    <has_text text="@s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text>
                </assert_contents>
            </output>
            <output name="seqs_qual" ftype="qual">
                <assert_contents>
                    <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
                    <has_text text="s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text>
                </assert_contents>
            </output>
            <output name="histograms">
                <assert_contents>
                    <has_text text="Length"></has_text>
                    <has_text text="127.0"></has_text>
                    <has_text text="157.0"></has_text>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz,split_libraries_fastq/lane2_read1.fastq.gz"/>
            <param name="mapping_fps" value="split_libraries_fastq/map.txt,split_libraries_fastq/map.txt"/>
            <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz,split_libraries_fastq/lane2_barcode.fastq.gz"/>
            <param name="store_qual_scores" value=""/>
            <param name="store_demultiplexed_fastq" value=""/>
            <param name="retain_unassigned_reads" value=""/>
            <param name="max_bad_run_length" value="3"/>
            <param name="min_per_read_length_fraction" value="0.75"/>
            <param name="sequence_max_n" value="0"/>
            <param name="start_seq_id" value="0"/>
            <param name="rev_comp_barcode" value=""/>
            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
            <param name="rev_comp" value=""/>
            <param name="phred_quality_threshold" value="19"/>
            <conditional name="barcode">
                <param name="barcode_type" value="golay_12"/>
            </conditional>
            <param name="max_barcode_errors" value="1.5"/>
            <param name="phred_offset" value=""/>
            <output name="log">
                <assert_contents>
                    <has_text text="Median sequence length: 151.00"></has_text>
                    <has_text text="s41"></has_text>
                    <has_text text="s122"></has_text>
                </assert_contents>
            </output>
            <output name="seqs" ftype="fasta">
                <assert_contents>
                    <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
                    <has_text text="s26_84 M00176:17:000000000-A0CNA:1:1:18075:1844 1:N:0:0 orig_bc=AGGGTTCCAGTT new_bc=AGGGTTCCAGTT bc_diffs=0"></has_text>
                </assert_contents>
            </output>
            <output name="histograms">
                <assert_contents>
                    <has_text text="Length"></has_text>
                    <has_text text="127.0"></has_text>
                    <has_text text="157.0"></has_text>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/>
            <param name="store_qual_scores" value=""/>
            <param name="sample_ids" value="my.sample.1"/>
            <param name="store_demultiplexed_fastq" value=""/>
            <param name="retain_unassigned_reads" value=""/>
            <param name="max_bad_run_length" value="3"/>
            <param name="min_per_read_length_fraction" value="0.75"/>
            <param name="sequence_max_n" value="0"/>
            <param name="start_seq_id" value="0"/>
            <param name="rev_comp_barcode" value=""/>
            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
            <param name="rev_comp" value=""/>
            <param name="phred_quality_threshold" value="19"/>
            <conditional name="barcode">
                <param name="barcode_type" value="not-barcoded"/>
            </conditional>
            <param name="phred_offset" value=""/>
            <output name="log">
                <assert_contents>
                    <has_text text="Median sequence length: 151.00"></has_text>
                    <has_text text="my.sample.1"></has_text>
                    <has_text text="Total number seqs written"></has_text>
                </assert_contents>
            </output>
            <output name="seqs" ftype="fasta">
                <assert_contents>
                    <has_text text="my.sample.1_0 M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
                    <has_text text="my.sample.1_12 M00176:17:000000000-A0CNA:1:1:14889:1778 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
                </assert_contents>
            </output>
            <output name="histograms">
                <assert_contents>
                    <has_text text="Length"></has_text>
                    <has_text text="124.0"></has_text>
                    <has_text text="154.0"></has_text>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz,split_libraries_fastq/lane2_read1.fastq.gz"/>
            <param name="store_qual_scores" value=""/>
            <param name="sample_ids" value="my.sample.1,my.sample.2"/>
            <param name="store_demultiplexed_fastq" value=""/>
            <param name="retain_unassigned_reads" value=""/>
            <param name="max_bad_run_length" value="3"/>
            <param name="min_per_read_length_fraction" value="0.75"/>
            <param name="sequence_max_n" value="0"/>
            <param name="start_seq_id" value="0"/>
            <param name="rev_comp_barcode" value=""/>
            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
            <param name="rev_comp" value=""/>
            <param name="phred_quality_threshold" value="19"/>
            <conditional name="barcode">
                <param name="barcode_type" value="not-barcoded"/>
            </conditional>
            <param name="phred_offset" value=""/>
            <output name="log">
                <assert_contents>
                    <has_text text="Median sequence length: 151.00"></has_text>
                    <has_text text="my.sample.1"></has_text>
                    <has_text text="Total number seqs written"></has_text>
                </assert_contents>
            </output>
            <output name="seqs" ftype="fasta">
                <assert_contents>
                    <has_text text="my.sample.1_0 M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
                    <has_text text="my.sample.1_12 M00176:17:000000000-A0CNA:1:1:14889:1778 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
                </assert_contents>
            </output>
            <output name="histograms">
                <assert_contents>
                    <has_text text="Length"></has_text>
                    <has_text text="124.0"></has_text>
                    <has_text text="154.0"></has_text>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool performs demultiplexing of Fastq sequence data where barcodes and sequences are contained in two separate fastq files (common on Illumina runs).

More information about this tool is available on
`QIIME documentation <http://qiime.org/scripts/split_libraries_fastq.html>`_.
    ]]></help>
    <citations>
        <expand macro="citations"/>
    </citations>
</tool>