view stacks_procrad.xml @ 7:8cff84759bf6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit feda4e2ea70c013fcddd1dbdeab73158fe9c86a4
author iuc
date Mon, 23 May 2022 17:57:30 +0000
parents b4bab607eb05
children
line wrap: on
line source

<tool id="stacks2_procrad" name="Stacks2: process radtags" profile="@PROFILE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>the Stacks demultiplexing script</description>
    <macros>
        <import>macros.xml</import>
        <import>macros_process.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_cmd"/>
    <command detect_errors="aggressive"><![CDATA[
@FASTQ_INPUT_FUNCTIONS@

#if $input_type.barcode_encoding != ''
    python '$__tool_directory__'/check_bcfile.py '$input_type.barcode' &&
#end if
mkdir stacks_inputs stacks_outputs &&

#if $output_log
    ln -s '$output_log' stacks_outputs/process_radtags.stacks_inputs.log &&
#end if

#set ($link_command, $inputype) = $fastq_input_nonbatch( $input_type.fqinputs, $input_type.input_type_select, "_R%d_0" )
$link_command

process_radtags

@PROCESS_IOOPTIONS@
@PROCESS_FILTER@
@COMMON_ADVANCED@
@RESCUE_BARCODE@
@PROCESS_ADAPTER@

## -E not implemented in Galaxy defaults to phred33

## Restriction enzyme options
#if str($options_enzyme.enzyme) != '':
    -e $options_enzyme.enzyme
#end if
#if str( $options_enzyme.options_enzyme_selector ) == "2" and str($options_enzyme.enzyme2)!='':
    --renz_2 $options_enzyme.enzyme2
#end if

## advanced options not shared between shortreads and radtags
$options_advanced.bestrad
$options_advanced.disable_rad_check

## Output options
## --merge not implemented in Galaxy

@PROCESS_FASTQ_POSTPROC@
    ]]></command>

    <inputs>
        <conditional name="input_type">
            <param name="input_type_select" type="select" label="Single-end or paired-end reads">
                <option value="single" selected="True">Single-end files</option>
                <option value="paired">Paired-end files</option>
            </param>
            <when value="single">
                <param name="fqinputs" argument="-f" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Singles-end reads"/>
                <expand macro="barcode_selector">
                    <expand macro="barcode_encoding_single" type="Barcode"/>
                </expand>
            </when>
            <when value="paired">
                <param name="fqinputs" type="data_collection" collection_type="list:paired" label="Paired-end reads" format="fastqsanger,fastqsanger.gz"/>
                <expand macro="barcode_selector">
                    <expand macro="barcode_encoding_pair" type="Barcode"/>
                </expand>
            </when>
        </conditional>
        <conditional name="options_enzyme">
            <param name="options_enzyme_selector" type="select" label="Number of enzymes">
                <option value="1">One</option>
                <option value="2">Two</option>
            </param>
            <when value="1">
                <param name="enzyme" type="select" label="Enzyme" argument="-e" help="provide the restriction enzyme used" >
                    <expand macro="enzymes"/>
                </param>
            </when>
            <when value="2">
                <param name="enzyme" type="select" label="Enzyme" argument="-e" help="provide the restriction enzyme used" >
                    <expand macro="enzymes"/>
                </param>
                <param name="enzyme2" type="select" label="Second enzyme" argument="--renz_2" help="provide the second restriction enzyme used" >
                    <expand macro="enzymes"/>
                </param>
            </when>
        </conditional>

        <section name="options_advanced" title="advanced options" expanded="False">
            <expand macro="common_advanced"/>
            <param argument="--bestrad" type="boolean" checked="false" truevalue="--bestrad" falsevalue="" label="Library was generated using BestRAD, check for restriction enzyme on either read and potentially tranpose reads"/>
            <param argument="--disable_rad_check" type="boolean" checked="false" truevalue="--disable_rad_check" falsevalue="" label="Disable checking if the RAD site is intact"/>
            <expand macro="rescue_barcode"/>
            <expand macro="process_adapter"/>
        </section>

        <expand macro="process_filter"/>
        <expand macro="process_output_types"/>
        <expand macro="in_log"/>
    </inputs>

    <outputs>
        <expand macro="out_log"/>
        <expand macro="process_outputs">
            <collection name="processed" type="list" label="${tool.name} on ${on_string} Processed reads">
                <filter>input_type['input_type_select'] == "single"</filter>
                <filter>input_type['barcode_encoding'] == ""</filter>
                <expand macro="discover_faqgz_output_macro" pattern="(?P&lt;name&gt;.+)" dir="stacks_outputs"/>
            </collection>
            <collection name="processed_paired" type="list:paired" label="${tool.name} on ${on_string} Processed reads">
                <filter>input_type['input_type_select'] == "paired"</filter>
                <filter>input_type['barcode_encoding'] == ""</filter>
                <expand macro="discover_faqgz_output_macro" pattern="(?P&lt;identifier_0&gt;.+)\.(?P&lt;identifier_1&gt;[^.]+)" dir="stacks_outputs"/>
            </collection>
            <collection name="demultiplexed" type="list" label="${tool.name} on ${on_string} Demultiplexed reads">
                <filter>input_type['input_type_select'] == "single"</filter>
                <filter>input_type['barcode_encoding'] != ""</filter>
                <expand macro="discover_faqgz_output_macro" pattern="(?P&lt;name&gt;.+)" dir="stacks_outputs"/>
            </collection>
            <collection name="demultiplexed_paired" type="list:paired" label="${tool.name} on ${on_string} Demultiplexed reads">
                <filter>input_type['input_type_select'] == "paired"</filter>
                <filter>input_type['barcode_encoding'] != ""</filter>
                <expand macro="discover_faqgz_output_macro" pattern="(?P&lt;identifier_0&gt;.+)\.(?P&lt;identifier_1&gt;[^.]+)" dir="stacks_outputs"/>
            </collection>
        </expand>
    </outputs>
    <tests>
        <!-- 01 single single ended input, no filtering (hence no capturing) + log -->
        <test expect_num_outputs="2">
            <param name="input_type|input_type_select" value="single"/>
            <param name="input_type|fqinputs" ftype="fastqsanger" value="procrad/R1_01.fq,procrad/R1_02.fq,procrad/R1_03.fq,procrad/R1_04.fq"/>
            <param name="input_type|barcode_encoding" value="--inline_null"/>
            <param name="input_type|barcode" value="procrad/barcodes"/>
            <param name="options_enzyme|options_enzyme_selector" value="1"/>
            <param name="options_enzyme|enzyme" value="ecoRI"/>
            <param name="add_log" value="yes"/>
            <output name="output_log" file="procrad/process_radtags.out" lines_diff="4"/>
            <output_collection name="demultiplexed" count="40">
                <element name="PopA_01" file="demultiplexed/PopA_01.fq" ftype="fastqsanger" compare="contains"/>
            </output_collection>
        </test>
        <!-- 02 multiple (zipped) single end input (misusing R2 as add single end read file),
             discarding by quality and capturing them -->
        <test expect_num_outputs="3">
            <param name="input_type|input_type_select" value="single"/>
            <param name="input_type|fqinputs" ftype="fastqsanger.gz" value="procrad/R1.fq.gzip,procrad/R2.fq.gzip"/>
            <param name="input_type|barcode_encoding" value="--inline_null"/>
            <param name="input_type|barcode" value="procrad/barcodes"/>
            <param name="options_enzyme|options_enzyme_selector" value="1"/>
            <param name="options_enzyme|enzyme" value="ecoRI"/>
            <param name="filter_cond|filter_select" value="yes"/>
            <param name="filter_cond|discard" value="-q"/>
            <param name="filter_cond|sliding" value="0.1"/>
            <param name="filter_cond|score" value="11"/>
            <param name="filter_cond|remove" value="-c"/>
            <param name="capture" value="-D"/>
            <param name="outype" value="gzfastq"/>
            <param name="add_log" value="yes"/>
            <output name="output_log" file="procrad/process_radtags2.out" lines_diff="4"/>
            <assert_command>
                <has_text text="-q"/>
                <has_text text="-w 0.1"/>
                <has_text text="-s 11"/>
                <has_text text="-c"/>
                <has_text text="-D"/>
            </assert_command>
            <output_collection name="demultiplexed" count="40">
                <element name="PopA_01" ftype="fastqsanger.gz"  md5="c7250f50138cbca747b85223aaae9565"/>
            </output_collection>
            <output_collection name="discarded" count="2">
                <element name="R1">
                    <assert_contents>
                        <has_size value="1101516" delta="200"/>
                        <has_text text="@lane1_fakedata0_11 1:N:0:"/>
                    </assert_contents>
                </element>
                <element name="R2">
                    <assert_contents>
                        <has_size value="1613724" delta="200"/>
                        <has_text text="@lane1_fakedata0_0 1:N:0:"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <!-- 03 paired input, no quality but length filter, gzfasta output -->
        <test expect_num_outputs="4">
            <param name="input_type|input_type_select" value="paired"/>
            <param name="input_type|fqinputs">
                <collection type="list:paired">
                    <element name="reads">
                        <collection type="paired">
                            <element name="forward" value="procrad/R1.fq.gzip" ftype="fastqsanger"/>
                            <element name="reverse" value="procrad/R2.fq.gzip"  ftype="fastqsanger"/>
                        </collection>
                    </element>
                </collection>
            </param>
            <param name="input_type|barcode" value="procrad/barcodes"/>
            <param name="options_enzyme|options_enzyme_selector" value="1"/>
            <param name="options_enzyme|enzyme" value="ecoRI"/>
            <param name="filter_cond|filter_select" value="no"/>
            <param name="filter_cond|len_limit" value="50"/>
            <param name="capture" value="-D"/>
            <param name="outype" value="gzfasta"/>
            <param name="add_log" value="yes"/>
            <output name="output_log" file="procrad/process_radtags_paired.out" lines_diff="4"/>
            <assert_command>
                <has_text text="--len_limit 50"/>
            </assert_command>
            <output_collection name="demultiplexed_paired" type="list:paired" count="40">
                <element name="PopA_01">
                    <element name="forward" value="demultiplexed/PopA_01.1.fa.gz" ftype="fasta.gz"/>
                    <element name="reverse" value="demultiplexed/PopA_01.2.fa.gz" ftype="fasta.gz"/>
                </element>
            </output_collection>
            <output_collection name="remaining" type="list:paired" count="40">
                <element name="PopA_01">
                    <element name="forward" file="demultiplexed/PopA_01.rem.1.fa.gz" ftype="fasta.gz"/>
                    <element name="reverse" file="demultiplexed/PopA_01.rem.2.fa.gz" ftype="fasta.gz"/>
                </element>
            </output_collection>
            <output_collection name="discarded_paired" type="list:paired" count="1">
                <element name="reads">
                    <element name="forward">
                        <assert_contents>
                            <has_size value="609382" delta="200"/>
                            <has_text text=">lane1_fakedata0_11 1:N:0:"/>
                        </assert_contents>
                    </element>
                    <element name="reverse" file="procrad/R2.fa.discards" ftype="fasta"/>
                </element>
            </output_collection>
        </test>
        <!-- 04 paired input (gzipped) + advanced options + two enzymes, fasta output -->
        <test expect_num_outputs="3">
            <param name="input_type|input_type_select" value="paired"/>
            <param name="input_type|fqinputs">
                <collection type="list:paired">
                    <element name="reads">
                        <collection type="paired">
                            <element name="forward" value="procrad/R1.fq.gzip" ftype="fastqsanger.gz"/>
                            <element name="reverse" value="procrad/R2.fq.gzip" ftype="fastqsanger.gz"/>
                        </collection>
                    </element>
                </collection>
            </param>
            <param name="input_type|barcode" value="procrad/barcodes"/>
            <param name="options_enzyme|options_enzyme_selector" value="2"/>
            <param name="options_enzyme|enzyme" value="ecoRI"/>
            <param name="options_enzyme|enzyme2" value="ecoRI"/>
            <param name="options_advanced|truncate" value="70"/>
            <param name="options_advanced|rescue_cond|rescue" value="-r"/>
            <param name="options_advanced|rescue_cond|barcode_dist_1" value="2"/>
            <param name="options_advanced|rescue_cond|barcode_dist_2" value="2"/>
            <param name="options_advanced|bestrad" value="--bestrad"/>
            <param name="options_advanced|retain_header" value="--retain_header"/>
            <param name="options_advanced|disable_rad_check" value="--disable_rad_check"/>
            <param name="options_advanced|adapter_1" value="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"/>
            <param name="options_advanced|adapter_2" value="TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"/>
            <param name="options_advanced|adapter_mm" value="2"/>
            <param name="outype" value="fasta"/>
            <assert_command>
                <has_text text="-e ecoRI"/>
                <has_text text="--renz_2 ecoRI"/>
                <has_text text="-t 70"/>
                <has_text text="-r"/>
                <has_text text="--bestrad"/>
                <has_text text="--retain_header"/>
                <has_text text="--disable_rad_check"/>
                <has_text text="--barcode_dist_1 2"/>
                <has_text text="--barcode_dist_2 2"/>
                <has_text text="--adapter_1 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"/>
                <has_text text="--adapter_2 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"/>
                <has_text text="--adapter_mm 2"/>
            </assert_command>
            <output_collection name="demultiplexed_paired" type="list:paired" count="40">
                <element name="PopA_01">
                    <element name="forward" file="demultiplexed/PopA_01.1.fa" ftype="fasta"/>
                    <element name="reverse" file="demultiplexed/PopA_01.2.fa" ftype="fasta"/>
                </element>
            </output_collection>
            <output_collection name="remaining" type="list:paired" count="40">
                <element name="PopA_01">
                    <element name="forward" file="demultiplexed/PopA_01.rem.1.fa" ftype="fasta"/>
                    <element name="reverse" file="demultiplexed/PopA_01.rem.2.fa" ftype="fasta"/>
                </element>
            </output_collection>
        </test>
        <!-- 05 test invalid barcode file -->
        <test expect_failure="true">
            <param name="input_type|input_type_select" value="single"/>
            <param name="input_type|fqinputs" ftype="fastqsanger" value="procrad/R1_01.fq,procrad/R1_02.fq,procrad/R1_03.fq,procrad/R1_04.fq"/>
            <param name="input_type|barcode_encoding" value="--inline_null"/>
            <param name="input_type|barcode" value="procrad/barcodes-duplicate"/>
            <param name="options_enzyme|options_enzyme_selector" value="1"/>
            <param name="options_enzyme|enzyme" value="ecoRI"/>
            <param name="add_log" value="yes"/>
        </test>
        <!-- 06 test invalid barcode file -->
        <test expect_failure="true">
            <param name="input_type|input_type_select" value="single"/>
            <param name="input_type|fqinputs" ftype="fastqsanger" value="procrad/R1_01.fq,procrad/R1_02.fq,procrad/R1_03.fq,procrad/R1_04.fq"/>
            <param name="input_type|barcode_encoding" value="--inline_null"/>
            <param name="input_type|barcode" value="procrad/barcodes-duplicate2"/>
            <param name="options_enzyme|options_enzyme_selector" value="1"/>
            <param name="options_enzyme|enzyme" value="ecoRI"/>
            <param name="add_log" value="yes"/>
        </test>
        <!-- 07 test no provided barcode file single-->
        <test expect_num_outputs="3">
            <param name="input_type|input_type_select" value="single"/>
            <param name="input_type|fqinputs" ftype="fastqsanger.gz" value="procrad/R1.fq.gzip"/>
            <param name="input_type|barcode_encoding" value=""/>
            <param name="options_enzyme|options_enzyme_selector" value="1"/>
            <param name="options_enzyme|enzyme" value="ecoRI"/>
            <param name="add_log" value="yes"/>
            <param name="capture" value="-D"/>
            <output name="output_log">
                <assert_contents>
                    <has_text text="Barcode Not Found"/>
                    <has_text text="Retained Reads"/>
                    <has_n_lines n="10"/>
                </assert_contents>
            </output>
            <output_collection name="processed" count="1">
                <element name="R1">
                    <assert_contents>
                        <has_size value="20"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="discarded" count="1">
                <element name="R1">
                    <assert_contents>
                        <has_size value="1613724" delta="200"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <!-- 08 test no provided barcode file paired -->
        <test expect_num_outputs="3">
            <param name="input_type|input_type_select" value="paired"/>
            <param name="input_type|fqinputs">
                <collection type="list:paired">
                    <element name="reads">
                        <collection type="paired">
                            <element name="forward" value="procrad/R1.fq.gzip" ftype="fastqsanger.gz"/>
                            <element name="reverse" value="procrad/R2.fq.gzip"  ftype="fastqsanger.gz"/>
                        </collection>
                    </element>
                </collection>
            </param>
            <param name="input_type|barcode_encoding" value=""/>
            <param name="options_enzyme|options_enzyme_selector" value="1"/>
            <param name="options_enzyme|enzyme" value="ecoRI"/>
            <param name="outype" value="gzfasta"/>
            <param name="add_log" value="yes"/>
            <output name="output_log" file="procrad/process_radtags_paired_no_barcode.out" lines_diff="4"/>
            <output_collection name="processed_paired" type="list:paired" count="2">
                <element name="reads_R1_0">
                    <element name="forward">
                        <assert_contents>
                            <has_size value="20"/>
                        </assert_contents>
                    </element>
                </element>
                <element name="reads_R2_0">
                    <element name="reverse">
                        <assert_contents>
                            <has_size value="20"/>
                        </assert_contents>
                    </element>
                </element>
            </output_collection>
            <output_collection name="remaining" type="list:paired" count="2">
                <element name="reads_R1_0">
                    <element name="forward">
                        <assert_contents>
                            <has_size value="20"/>
                        </assert_contents>
                    </element>
                </element>
                <element name="reads_R2_0">
                    <element name="reverse">
                        <assert_contents>
                            <has_size value="17607" delta="100"/>
                        </assert_contents>
                    </element>
                </element>
            </output_collection>
        </test>
    </tests>
    <help>
<![CDATA[
.. class:: infomark

**What it does**

This program examines raw reads from an Illumina sequencing run and first, checks that the barcode and the RAD cutsite are intact, and demultiplexes the data. If there are errors in the barcode or the RAD site within a certain allowance process_radtags can correct them. Second, it slides a window down the length of the read and checks the average quality score within the window. If the score drops below 90% probability of being correct (a raw phred score of 10), the read is discarded. This allows for some seqeuncing errors while elimating reads where the sequence is degrading as it is being sequenced. By default the sliding window is 15% of the length of the read, but the threshold and window size can be adjusted.

The process_radtags program can:

- handle data that is barcoded, either inline or using an index, or unbarcoded.
- use combinatorial barcodes.
- check and correct for a restriction enzyme cutsite for single or double-digested data.
- filter adapter sequence while allowing for sequencing error in the adapter pattern.
- process individual files or whole directories of files.
- directly read gzipped data
- filter reads based on Illumina's Chastity filter

**Help**

Input files:

- A set of one or more FASTQ files (either selected manually, a dataset list, or a paired dataset list)

- Barcode File

The barcode file is a very simple format:

======= ===========
Barcode Sample name
======= ===========
ATGGGG  PopA_01
GGGTAA  PopA_02
AGGAAA  PopA_03
TTTAAG  PopA_04
GGTGTG  PopA_05
TGATGT  PopA_06
======= ===========

Combinatorial barcodes are specified, one per column, separated by a tab:

======== ======== ===========
Barcode1 Barcode2 Sample name
======== ======== ===========
CGATA    ACGTA    PopA_01
CGGCG    CGTA     PopA_02
GAAGC    CGTA     PopA_03
GAGAT    CGTA     PopA_04
CGATA    AGCA     PopA_05
CGGCG    AGCA     PopA_06
======== ======== ===========

The sample name column can be omitted. Then the Barcodes are used for naming the output files.

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation"/>
</tool>