Mercurial > repos > iuc > fastp

<tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.1">
    <description>fast all-in-one preprocessing for FASTQ files</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools" />
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">fastp</requirement>
    </requirements>
    <version_command>fastp -v</version_command>
    <command detect_errors="exit_code"><![CDATA[
#import re

## Link input files

#set ext = '.fastqsanger'

#if $single_paired.single_paired_selector == 'paired_collection'
    #if $single_paired.paired_input.forward.is_of_type('fastqsanger.gz')
        #set ext = '.fastqsanger.gz'
    #end if
    #set $in1 = $single_paired.paired_input.forward
    #set $in2 = $single_paired.paired_input.reverse
    #set $in1_name = re.sub('[^\w\-\s]', '_', str($single_paired.paired_input.name)) + $ext
    #set $in2_name = re.sub('[^\w\-\s]', '_', str("%s_%s" % ($single_paired.paired_input.name, "R2"))) + $ext
    ln -sf '$in1' '$in1_name' &&
    ln -sf '$in2' '$in2_name' &&
#else
    #if $in1.is_of_type('fastqsanger.gz')
        #set ext = '.fastqsanger.gz'
    #end if

    #set $in1_name = re.sub('[^\w\-\s]', '_', str($in1.element_identifier)) + $ext
    ln -sf '$in1' '$in1_name' &&
#end if

## Run fastp

fastp

--thread \${GALAXY_SLOTS:-1}
--report_title 'fastp report for $in1_name'

-i '$in1_name'

## Merge reads

#if str($single_paired.single_paired_selector).startswith('paired')
    -I '$in2_name'
    #if $single_paired.merge_reads.merge
        $single_paired.merge_reads.merge
        --merged_out merged${ext}
        #if $single_paired.merge_reads.include_unmerged
            $single_paired.merge_reads.include_unmerged
        #else
            --out1 'unmerged_R1${ext}'
            --out2 'unmerged_R2${ext}'
            --unpaired1 'unpaired_R1${ext}'
            --unpaired2 'unpaired_R2${ext}'
        #end if
    #else
        -o first${ext}
        -O second${ext}
    #end if
#else
    -o first${ext}
#end if

## Adapter Trimming Options

$single_paired.adapter_trimming_options.disable_adapter_trimming

#if str($single_paired.adapter_trimming_options.adapter_sequence1):
    --adapter_sequence '$single_paired.adapter_trimming_options.adapter_sequence1'
#end if

#if str($single_paired.single_paired_selector).startswith('paired'):
    #if str($single_paired.adapter_trimming_options.adapter_sequence2):
        --adapter_sequence_r2 '$single_paired.adapter_trimming_options.adapter_sequence2'
    #end if
    $single_paired.adapter_trimming_options.detect_adapter_for_pe
#end if


## Global Trimming Options

#if str($single_paired.global_trimming_options.trim_front1):
    -f $single_paired.global_trimming_options.trim_front1
#end if

#if str($single_paired.global_trimming_options.trim_tail1):
    -t $single_paired.global_trimming_options.trim_tail1
#end if

#if str($single_paired.single_paired_selector).startswith('paired'):
    #if str($single_paired.global_trimming_options.trim_front2):
        -F $single_paired.global_trimming_options.trim_front2
    #end if
    #if str($single_paired.global_trimming_options.trim_tail2):
        -T $single_paired.global_trimming_options.trim_tail2
    #end if
#end if


## Overrepresented sequence analysis

$overrepresented_sequence_analysis.overrepresentation_analysis

#if str($overrepresented_sequence_analysis.overrepresentation_sampling):
    -P $overrepresented_sequence_analysis.overrepresentation_sampling
#end if


## Filter Options

## Quality filtering options

$filter_options.quality_filtering_options.disable_quality_filtering

#if str($filter_options.quality_filtering_options.qualified_quality_phred):
    -q $filter_options.quality_filtering_options.qualified_quality_phred
#end if
#if str($filter_options.quality_filtering_options.unqualified_percent_limit):
    -u $filter_options.quality_filtering_options.unqualified_percent_limit
#end if
#if str($filter_options.quality_filtering_options.n_base_limit):
    -n $filter_options.quality_filtering_options.n_base_limit
#end if


## Length filtering options

$filter_options.length_filtering_options.disable_length_filtering

#if str($filter_options.length_filtering_options.length_required):
    -l $filter_options.length_filtering_options.length_required
#end if

#if str($filter_options.length_filtering_options.length_limit):
    --length_limit $filter_options.length_filtering_options.length_limit
#end if

## Low complexity filtering options

$filter_options.low_complexity_filter.enable_low_complexity_filter

#if str($filter_options.low_complexity_filter.complexity_threshold):
    -Y $filter_options.low_complexity_filter.complexity_threshold
#end if


## Duplicate analysis / deduplication

$duplicated_reads.handling_options.eval_dups
#if not str($duplicated_reads.handling_options.eval_dups):
    $duplicated_reads.handling_options.dedup
#end if


## Read Modification Options

## PolyG tail trimming, useful for NextSeq/NovaSeq data

#if $read_mod_options.polyg_tail_trimming.trimming_select in ['', '-g']:
    #if str($read_mod_options.polyg_tail_trimming.poly_g_min_len):
        --poly_g_min_len $read_mod_options.polyg_tail_trimming.poly_g_min_len
    #end if
    $read_mod_options.polyg_tail_trimming.trimming_select
#end if

## PolyX tail trimming

#if $read_mod_options.polyx_tail_trimming.polyx_trimming_select == '-x':
    $read_mod_options.polyx_tail_trimming.polyx_trimming_select
    #if str($read_mod_options.polyx_tail_trimming.poly_x_min_len):
        --poly_x_min_len $read_mod_options.polyx_tail_trimming.poly_x_min_len
    #end if
#end if

## UMI processing

#if $read_mod_options.umi_processing.umi:
    $read_mod_options.umi_processing.umi
    #if str($read_mod_options.umi_processing.umi_loc):
        --umi_loc '$read_mod_options.umi_processing.umi_loc'
    #end if
    #if str($read_mod_options.umi_processing.umi_len):
        --umi_len $read_mod_options.umi_processing.umi_len
    #end if
    #if str($read_mod_options.umi_processing.umi_prefix):
        --umi_prefix '$read_mod_options.umi_processing.umi_prefix'
    #end if
#end if

## Per read cutting by quality options

#if str($read_mod_options.cutting_by_quality_options.cut_front_select.cut_front) == '--cut_front'
        $read_mod_options.cutting_by_quality_options.cut_front_select.cut_front
        --cut_front_window_size $read_mod_options.cutting_by_quality_options.cut_front_select.cut_front_window_size
        --cut_front_mean_quality $read_mod_options.cutting_by_quality_options.cut_front_select.cut_front_mean_quality
#end if

#if str($read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail) == '--cut_tail'
        $read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail
        --cut_tail_window_size $read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail_window_size
        --cut_tail_mean_quality $read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail_mean_quality
#end if

#if str($read_mod_options.cutting_by_quality_options.cut_right_select.cut_right) == '--cut_right'
        $read_mod_options.cutting_by_quality_options.cut_right_select.cut_right
        --cut_right_window_size $read_mod_options.cutting_by_quality_options.cut_right_select.cut_right_window_size
        --cut_right_mean_quality $read_mod_options.cutting_by_quality_options.cut_right_select.cut_right_mean_quality
#end if

## Base correction by overlap analysis options

$read_mod_options.base_correction_options.correction

#if str($single_paired.single_paired_selector) == 'paired_collection'
    #if $single_paired.merge_reads.merge
        && mv merged${ext} '$merged_reads'
        #if not $single_paired.merge_reads.include_unmerged
            && mv 'unmerged_R1${ext}' '$unmerged_out_coll.forward'
            && mv 'unmerged_R2${ext}' '$unmerged_out_coll.reverse'
            && mv 'unpaired_R1${ext}' '$unpaired_out_coll.forward'
            && mv 'unpaired_R2${ext}' '$unpaired_out_coll.reverse'
        #end if
    #else
        && mv first${ext} '$output_paired_coll.forward'
        && mv second${ext} '$output_paired_coll.reverse'
    #end if
#else
    && mv first${ext} '${out1}'
#end if

]]></command>
    <inputs>
        <conditional name="single_paired">
            <param name="single_paired_selector" type="select" label="Single-end or paired reads">
                <option value="single" selected="true">Single-end</option>
                <option value="paired_collection">Paired Collection</option>
            </param>
            <when value="single">
                <expand macro="in" />
                <expand macro="adapter_trimming_options" />
                <expand macro="global_trimming_options" />
            </when>
            <when value="paired_collection">
                <param name="paired_input" type="data_collection" format="fastqsanger,fastqsanger.gz" label="Select paired collection(s)" collection_type="paired"/>
                <expand macro="merge_reads" />
                <expand macro="adapter_trimming_options">
                    <expand macro="adapter_sequence" read_number="2"/>
                    <expand macro="detect_adapter_for_pe" />
                </expand>
                <expand macro="global_trimming_options_paired" />
            </when>
        </conditional>

        <section name="overrepresented_sequence_analysis" title="Overrepresented Sequence Analysis" expanded="False">
            <param name="overrepresentation_analysis" argument="-p" type="boolean" truevalue="-p" falsevalue="" checked="false" label="Enable overrepresented analysis" help="Enable overrepresented sequence analysis."/>
            <param name="overrepresentation_sampling" argument="-P" type="integer" optional="true" label="Overrepresentation sampling" help="One in (--overrepresentation_sampling) reads will be computed for overrepresentation analysis (1~10000), smaller is slower, default is 20."/>
        </section>

        <!-- Filter Options -->
        <section name="filter_options" title="Filter Options">
            <section name="quality_filtering_options" title="Quality filtering options" expanded="True">
                <param name="disable_quality_filtering" argument="-Q" type="boolean" truevalue="-Q" falsevalue="" checked="false" label="Disable quality filtering" help="Quality filtering is enabled by default. If this option is specified, quality filtering is disabled."/>
                <param name="qualified_quality_phred" argument="-q" type="integer" optional="true" label="Qualified quality phred" help="The quality value that a base is qualified. Default 15 means phred quality >=Q15 is qualified."/>
                <param name="unqualified_percent_limit" argument="-u" type="integer" optional="true" label="Unqualified percent limit" help="How many percents of bases are allowed to be unqualified (0~100). Default 40 means 40%."/>
                <param name="n_base_limit" argument="-n" type="integer" optional="true" label="N base limit" help="If one read's number of N base is >n_base_limit, then this read/pair is discarded. Default is 5."/>
            </section>

            <section name="length_filtering_options" title="Length filtering options" expanded="True">
                <param name="disable_length_filtering" argument="-L" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Disable length filtering" help="Length filtering is enabled by default. If this option is specified, length filtering is disabled."/>
                <param name="length_required" argument="-l" type="integer" optional="true" label="Length required" help="Reads shorter than this value will be discarded. Default is 15."/>
                <param argument="--length_limit" type="integer" optional="true" label="Maximum length" help="Reads longer than this value will be discarded. Default is 0 and means no limitation."/>
            </section>

            <section name="low_complexity_filter" title="Low complexity filtering options" expanded="True">
                <param name="enable_low_complexity_filter" argument="-y" type="boolean" truevalue="-y" falsevalue="" checked="false" label="Enable low complexity filter" help="The complexity is defined as the percentage of base that is different from its next base, default is No"/>
                <param name="complexity_threshold" argument="-Y" type="integer" optional="true" label="Complexity threshold" help="Threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required."/>
            </section>
        </section>
        <section name="duplicated_reads" title="Duplicated Reads Options">
            <conditional name="handling_options">
                <param name="eval_dups" type="select" label="Enable duplicated reads analysis" help="If enabled, calculate and report read duplication statistics. Enabling this is also a prerequisite for optional deduplication of reads. Duplicate detection relies exclusively on exact identity between read sequences (both for SE and PE data). It also increases tool memory requirements and running time moderately. NOTE: the default (no duplication analysis) is different from the command-line tool.">
                    <option value="">Enable</option>
                    <option value="--dont_eval_duplication" selected="true">Disable (--dont_eval_duplication)</option>
                </param>
                <when value="--dont_eval_duplication" />
                <when value="">
                    <param argument="--dedup" type="boolean" truevalue="--dedup" falsevalue="" label="Drop duplicate reads/pairs"/>
                </when>
            </conditional>
        </section>
        <!-- Read Modification Options -->
        <section name="read_mod_options" title="Read Modification Options">
            <conditional name="polyg_tail_trimming">
                <param name="trimming_select" type="select" label="PolyG tail trimming" help="This feature is enabled for NextSeq/NovaSeq data by default. NextSeq/NovaSeq data is detected by the machine ID in the FASTQ records.">
                    <option value="" selected="true">Automatic trimming for Illumina NextSeq/NovaSeq data</option>
                    <option value="-g">Force polyG tail trimming</option>
                    <option value="-G">Disable polyG tail trimming</option>
                </param>
                <when value="-g">
                    <expand macro="poly_g_min_len" />
                </when>
                <when value="">
                    <expand macro="poly_g_min_len" />
                </when>
                <when value="-G" />
            </conditional>

            <conditional name="polyx_tail_trimming">
                <param name="polyx_trimming_select" type="select" label="PolyX tail trimming" help="Similar to polyG tail trimming. When polyG tail trimming and polyX tail trimming are both enabled, fastp will perform polyG trimming first, then perform polyX trimming. Disabled by default.">
                    <option value="" selected="true">Disable polyX trimming</option>
                    <option value="-x">Enable polyX tail trimming</option>
                </param>
                <when value="-x">
                    <param argument="--poly_x_min_len" type="integer" optional="true" label="PolyX minimum length"
                        help="The minimum length to detect polyX in the read tail. 10 by default."/>
                </when>
                <when value="" />
            </conditional>

            <section name="umi_processing" title="UMI processing" expanded="True">
                <param name="umi" argument="-U" type="boolean" truevalue="-U" falsevalue="" checked="false" label="Enable unique molecular identifer" help="Enable unique molecular identifer (UMI) preprocessing."/>
                <param argument="--umi_loc" type="text" optional="true" label="UMI location" help="Specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none."/>
                <param argument="--umi_len" type="integer" optional="true" label="UMI length" help="If the UMI is in read1/read2, its length should be provided."/>
                <param argument="--umi_prefix" type="text" optional="true" label="UMI prefix" help="If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default."/>
            </section>

            <section name="cutting_by_quality_options" title="Per read cutting by quality options" expanded="True">
                <conditional name="cut_front_select">
                    <param argument="--cut_front" type="select" truevalue="--cut_front" falsevalue="" checked="false" label="Cut by quality in front (5')" help="Enable per read cutting by quality in front (5'). (WARNING: this will interfere with deduplication of both PE/SE data if performed with downstream tools.)">
                        <option value="--cut_front">Yes</option>
                        <option value="" selected="true">No</option>
                    </param>
                    <when value="--cut_front">
                        <param argument="--cut_front_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut front" help="The size of the sliding window for sliding window trimming."/>
                        <param argument="--cut_front_mean_quality" type="integer" optional="true" value="20" min="1" max="30 " label="Cutting mean quality for cut front" help="The bases in the sliding window with mean quality below cutting_quality will be cut."/>
                    </when>
                    <when value="">
                    </when>
                </conditional>
                <conditional name="cut_tail_select">
                    <param argument="--cut_tail" type="select" truevalue="--cut_tail" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Enable per read cutting by quality in tail (3'). (WARNING: this will interfere with deduplication of SE data if performed with downstream tools.)">
                        <option value="--cut_tail">Yes</option>
                        <option value="" selected="true">No</option>
                    </param>
                    <when value="--cut_tail">
                        <param argument="--cut_tail_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut tail" help="The size of the sliding window for sliding window trimming."/>
                        <param argument="--cut_tail_mean_quality" type="integer" optional="true" value="20" min="1" max="30 " label="Cutting mean quality for cut tail" help="The bases in the sliding window with mean quality below cutting_quality will be cut."/>
                    </when>
                    <when value="">
                    </when>
                </conditional>
                <conditional name="cut_right_select">
                    <param argument="--cut_right" type="select" truevalue="--cut_right" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Move a sliding window from front to tail, if meet one window with mean quality &lt; threshold, drop the bases in the window and the right part, and then stop. (WARNING: this will interfere with deduplication of SE data if performed with downstream tools.)">
                        <option value="--cut_right">Yes</option>
                        <option value="" selected="true">No</option>
                    </param>
                    <when value="--cut_right">
                        <param argument="--cut_right_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut right" help="The size of the sliding window for sliding window trimming."/>
                        <param argument="--cut_right_mean_quality" type="integer" optional="true" value="20" min="1" max="30 " label="Cutting mean quality for cut right" help="The bases in the sliding window with mean quality below cutting_quality will be cut."/>
                    </when>
                    <when value="">
                    </when>
                </conditional>
            </section>

            <section name="base_correction_options" title="Base correction by overlap analysis options" expanded="True">
                <param name="correction" argument="-c" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Enable base correction" help="Enable base correction in overlapped regions (only for PE data), default is disabled."/>
            </section>
        </section>

        <section name="output_options" title="Output Options" expanded="False">
            <param name="report_html" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Output HTML report" help="fastp provides a QC report for the data Before and After filtering within a single HTML page, which enables comparison of the quality statistics changed by the preprocessing step directly"/>
            <param name="report_json" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Output JSON report" help="The JSON report contains all the data visualized in the HTML report. The format of the JSON report is manually optimized to be easily readable by humans and is compatible with MultiQC"/>
        </section>
    </inputs>

    <outputs>
        <data name="out1" format_source="single_paired|in1" label="${tool.name} on ${on_string}: Read 1 output">
            <filter>single_paired['single_paired_selector'] == "single"</filter>
        </data>
        <collection name="output_paired_coll" type="paired" format_source="single_paired|paired_input['forward']" label="${tool.name} on ${on_string}: Paired-end output">
            <filter>single_paired['single_paired_selector'] == "paired_collection" and not single_paired['merge_reads']['merge']</filter>
        </collection>

        <data name="report_html" format="html" from_work_dir="fastp.html" label="${tool.name} on ${on_string}: HTML report">
            <filter>output_options['report_html'] is True</filter>
        </data>
        <data name="report_json" format="json" from_work_dir="fastp.json" label="${tool.name} on ${on_string}: JSON report">
            <filter>output_options['report_json'] is True</filter>
        </data>
        <data name="merged_reads" format_source="single_paired|paired_input['forward']" label="${tool.name} on ${on_string}: Merged reads">
            <filter>single_paired['single_paired_selector'] == "paired_collection" and single_paired['merge_reads']['merge']</filter>
        </data>
        <collection name="unmerged_out_coll" type="paired" format_source="single_paired|paired_input['forward']" label="${tool.name} on ${on_string}: Unmerged filtered">
            <filter>single_paired['single_paired_selector'] == "paired_collection" and single_paired['merge_reads']['merge'] and not single_paired['merge_reads']['include_unmerged']</filter>
        </collection>
        <collection name="unpaired_out_coll" type="paired" format_source="single_paired|paired_input['forward']" label="${tool.name} on ${on_string}: Unmerged unfiltered">
            <filter>single_paired['single_paired_selector'] == "paired_collection" and single_paired['merge_reads']['merge'] and not single_paired['merge_reads']['include_unmerged']</filter>
        </collection>
    </outputs>

    <tests>
        <!-- 1. Ensure default output works -->
        <test expect_num_outputs="3">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="single"/>
                <param name="in1" ftype="fastqsanger" value="R1.fq"/>
            </conditional>
            <output name="out1" ftype="fastqsanger" file="out1.fq"/>
            <output name="report_html">
                <assert_contents>
                    <has_text text="fastp report"/>
                    <not_has_text text="duplication rate:"/>
                </assert_contents>
            </output>
            <output name="report_json">
                <assert_contents>
                    <has_text text="fastp report"/>
                    <not_has_text text="&quot;duplication&quot;:"/>
                </assert_contents>
            </output>
        </test>
        <!-- 2. Ensure paired collection works -->
        <test expect_num_outputs="4">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="paired_collection"/>
                <param name="paired_input">
                    <collection type="paired">
                        <element name="forward" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
                        <element name="reverse" value="bwa-mem-fastq2.fq" ftype="fastqsanger" />
                    </collection>
                </param>
            </conditional>
            <section name="output_options">
                <param name="report_json" value="False" />
            </section>
            <output name="report_html">
                <assert_contents>
                    <has_text text="fastp report"/>
                    <not_has_text text="duplication rate:"/>
                </assert_contents>
            </output>
            <output_collection name="output_paired_coll" type="paired">
                <element name="forward" value="out_bwa1.fq" ftype="fastqsanger"/>
                <element name="reverse" value="out_bwa2.fq" ftype="fastqsanger"/>
            </output_collection>
        </test>
        <!-- 3. Ensure custom adapter works -->
        <test expect_num_outputs="2">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="single"/>
                <param name="in1" ftype="fastqsanger" value="R1.fq"/>
                <section name="adapter_trimming_options">
                    <param name="adapter_sequence1" value="ATCG"/>
                </section>
            </conditional>
            <section name="output_options">
                <param name="report_json" value="False" />
            </section>
            <output name="out1" ftype="fastqsanger" file="out_a.fq"/>
        </test>
        <!-- 4. Ensure UMI processing works -->
        <test expect_num_outputs="2">
            <conditional name="single_paired">
                <param name="in1" ftype="fastqsanger" value="R1.fq"/>
                <param name="single_paired_selector" value="single"/>
            </conditional>
            <section name="read_mod_options">
                <section name="umi_processing">
                    <param name="umi" value="true"/>
                    <param name="umi_loc" value="read1"/>
                    <param name="umi_len" value="8"/>
                </section>
            </section>
            <section name="output_options">
                <param name="report_json" value="False" />
            </section>
            <output name="out1" ftype="fastqsanger" file="out2.fq"/>
        </test>
        <!-- 5. Ensure UMI processing with different lengths works -->
        <test expect_num_outputs="2">
            <conditional name="single_paired">
                <param name="in1" ftype="fastqsanger" value="R1.fq"/>
                <param name="single_paired_selector" value="single"/>
            </conditional>
            <section name="read_mod_options">
                <section name="umi_processing">
                    <param name="umi" value="true"/>
                    <param name="umi_loc" value="read1"/>
                    <param name="umi_len" value="12"/>
                </section>
            </section>
            <section name="output_options">
                <param name="report_json" value="False" />
            </section>
            <output name="out1" ftype="fastqsanger" file="out3.fq"/>
        </test>
        <!-- 6. Ensure paired-end UMI processing of Read 1 works -->
        <test expect_num_outputs="4">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="paired_collection"/>
                <param name="paired_input">
                    <collection type="paired">
                        <element name="forward" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
                        <element name="reverse" value="bwa-mem-fastq2.fq" ftype="fastqsanger" />
                    </collection>
                </param>
            </conditional>
            <section name="read_mod_options">
                <section name="umi_processing">
                    <param name="umi" value="true"/>
                    <param name="umi_loc" value="read1"/>
                    <param name="umi_len" value="8"/>
                </section>
            </section>
            <section name="output_options">
                <param name="report_json" value="False" />
            </section>
            <output_collection name="output_paired_coll" type="paired">
                <element name="forward" value="out_bwa_umi_read1_1.fq" ftype="fastqsanger"/>
                <element name="reverse" value="out_bwa_umi_read1_2.fq" ftype="fastqsanger"/>
            </output_collection>
        </test>
        <!-- 7. Ensure paired-end UMI processing of Read 2 works -->
        <test expect_num_outputs="5">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="paired_collection"/>
                <param name="paired_input">
                    <collection type="paired">
                        <element name="forward" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
                        <element name="reverse" value="bwa-mem-fastq2.fq" ftype="fastqsanger" />
                    </collection>
                </param>
            </conditional>
            <section name="read_mod_options">
                <section name="umi_processing">
                    <param name="umi" value="true"/>
                    <param name="umi_loc" value="read2"/>
                    <param name="umi_len" value="8"/>
                </section>
            </section>
            <output_collection name="output_paired_coll" type="paired">
                <element name="forward" value="out_bwa_umi_read2_1.fq" ftype="fastqsanger"/>
                <element name="reverse" value="out_bwa_umi_read2_2.fq" ftype="fastqsanger"/>
            </output_collection>
            <output name="report_json">
                <assert_contents>
                    <has_text text="fastp report"/>
                </assert_contents>
            </output>
        </test>
        <!-- 8. Ensure enabling duplicate analysis works -->
        <test expect_num_outputs="3">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="single"/>
                <param name="in1" ftype="fastqsanger" value="R1.fq"/>
            </conditional>
            <section name="duplicated_reads">
                <conditional name="handling_options">
                    <param name="eval_dups" value=""/>
                </conditional>
            </section>
            <output name="out1" ftype="fastqsanger" file="out1.fq"/>
            <output name="report_html">
                <assert_contents>
                    <has_text text="fastp report"/>
                    <has_text text="duplication rate:"/>
                </assert_contents>
            </output>
            <output name="report_json">
                <assert_contents>
                    <has_text text="fastp report"/>
                    <has_text text="&quot;duplication&quot;:"/>
                </assert_contents>
            </output>
        </test>
        <!-- 9. Ensure polyG trimming works -->
        <test expect_num_outputs="3">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="single"/>
                <param name="in1" ftype="fastqsanger.gz" value="R1.fq.gz"/>
            </conditional>
            <section name="read_mod_options">
                <conditional name="polyg_tail_trimming">
                    <param name="trimming_select" value="-g"/>
                    <param name="poly_g_min_len" value="10"/>
                </conditional>
            </section>
            <output name="out1" ftype="fastqsanger.gz" decompress="true" file="out1.fq.gz"/>
            <output name="report_json">
                <assert_contents>
                    <has_text text="fastp report"/>
                </assert_contents>
            </output>
        </test>
        <!-- 10. Ensure polyX trimming works -->
        <test expect_num_outputs="3">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="single"/>
                <param name="in1" ftype="fastqsanger.gz" value="R1.fq.gz"/>
            </conditional>
            <section name="read_mod_options">
                <conditional name="polyg_tail_trimming">
                    <param name="trimming_select" value="-G"/>
                </conditional>
                <conditional name="polyx_tail_trimming">
                    <param name="polyx_trimming_select" value="-x"/>
                    <param name="poly_x_min_len" value="10"/>
                </conditional>
            </section>
            <output name="out1" ftype="fastqsanger.gz" decompress="true" file="out1.fq.gz"/>
            <output name="report_json">
                <assert_contents>
                    <has_text text="fastp report"/>
                </assert_contents>
            </output>
        </test>
        <!-- 11. Test fastqsanger files with different length -->
        <!-- Adapted for the updated version of fastp, which now ignores unmatched reads and continues filtering as expected -->
        <test expect_num_outputs="4">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="paired_collection"/>
                <param name="paired_input">
                    <collection type="paired">
                        <element name="forward" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
                        <element name="reverse" value="bwa-mem-fastq2_too_long.fq" ftype="fastqsanger" />
                    </collection>
                </param>
            </conditional>
            <section name="output_options">
                <param name="report_json" value="False"/>
            </section>
            <output name="report_html">
                <assert_contents>
                    <has_text text="fastp report"/>
                </assert_contents>
            </output>
            <output_collection name="output_paired_coll" type="paired">
                <element name="forward" value="out_filtered_1.fq" ftype="fastqsanger"/>
                <element name="reverse" value="out_filtered_2.fq" ftype="fastqsanger"/>
            </output_collection>
        </test>
        <!-- 12. Test merge reads in combination with paired -->
        <test expect_num_outputs="7">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="paired_collection"/>
                <param name="paired_input">
                    <collection type="paired">
                        <element name="forward" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
                        <element name="reverse" value="bwa-mem-fastq2.fq" ftype="fastqsanger" />
                    </collection>
                </param>
                <conditional name="merge_reads">
                    <param name="merge" value="--merge" />
                </conditional>
            </conditional>
            <section name="output_options">
                <param name="report_json" value="False" />
                <param name="report_html" value="False" />
            </section>
            <output name="merged_reads" ftype="fastqsanger" file="bwa-mem-merged-reads.fastqsanger" />
            <output_collection name="unmerged_out_coll" type="paired">
                <element name="forward" value="bwa-mem-unmerged-filtered-reads1.fastqsanger" ftype="fastqsanger"/>
                <element name="reverse" value="bwa-mem-unmerged-filtered-reads2.fastqsanger" ftype="fastqsanger"/>
            </output_collection>
            <output_collection name="unpaired_out_coll" type="paired">
                <element name="forward" value="bwa-mem-unmerged-unfiltered-reads1.fastqsanger" ftype="fastqsanger"/>
                <element name="reverse" ftype="fastqsanger">
                    <assert_contents>
                        <has_size size="0" />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <!-- 13. Test merge and include_unmerged in combination with paired collection -->
        <test expect_num_outputs="2">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="paired_collection"/>
                <param name="paired_input">
                    <collection type="paired">
                        <element name="forward" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
                        <element name="reverse" value="bwa-mem-fastq2.fq" ftype="fastqsanger" />
                    </collection>
                </param>
                <conditional name="merge_reads">
                    <param name="merge" value="--merge" />
                    <param name="include_unmerged" value="true" />
                </conditional>
            </conditional>
            <section name="output_options">
                <param name="report_html" value="False" />
            </section>
            <output name="merged_reads" ftype="fastqsanger" file="bwa-mem-merged-read-include-unmerged.fastqsanger" />
            <output name="report_json">
                <assert_contents>
                    <has_text text="fastp report"/>
                </assert_contents>
            </output>
        </test>
        <!-- 14. Test merge reads in combination with paired -->
        <test expect_num_outputs="7">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="paired_collection"/>
                <param name="paired_input">
                    <collection type="paired">
                        <element name="forward" value="bwa-mem-fastq1.fq.gz" ftype="fastqsanger.gz" />
                        <element name="reverse" value="bwa-mem-fastq2.fq.gz" ftype="fastqsanger.gz" />
                    </collection>
                </param>
                <conditional name="merge_reads">
                    <param name="merge" value="--merge" />
                </conditional>
            </conditional>
            <section name="output_options">
                <param name="report_html" value="False" />
                <param name="report_json" value="False" />
            </section>
            <output name="merged_reads" ftype="fastqsanger.gz" decompress="true" file="bwa-mem-merged-reads.fastqsanger.gz" />
            <output_collection name="unmerged_out_coll" type="paired">
                <element name="forward" value="bwa-mem-unmerged-filtered-reads1.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/>
                <element name="reverse" value="bwa-mem-unmerged-filtered-reads2.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/>
            </output_collection>
            <output_collection name="unpaired_out_coll" type="paired">
                <element name="forward" value="bwa-mem-unmerged-unfiltered-reads1.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/>
                <element name="reverse" ftype="fastqsanger.gz">
                    <assert_contents>
                        <has_size size="0" />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <!-- 15. Test merge and include_unmerged in combination with paired collection -->
        <test expect_num_outputs="2">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="paired_collection"/>
                <param name="paired_input">
                    <collection type="paired">
                        <element name="forward" value="bwa-mem-fastq1.fq.gz" ftype="fastqsanger.gz" />
                        <element name="reverse" value="bwa-mem-fastq2.fq.gz" ftype="fastqsanger.gz" />
                    </collection>
                </param>
                <conditional name="merge_reads">
                    <param name="merge" value="--merge" />
                    <param name="include_unmerged" value="true" />
                </conditional>
            </conditional>
            <section name="output_options">
                <param name="report_html" value="False" />
            </section>
            <output name="merged_reads" ftype="fastqsanger.gz" decompress="true" file="bwa-mem-merged-read-include-unmerged.fastqsanger.gz" />
            <output name="report_json">
                <assert_contents>
                    <has_text text="fastp report"/>
                </assert_contents>
            </output>
        </test>
        <!--16. Test paired collection in combination with compressed input-->
        <test expect_num_outputs="4">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="paired_collection"/>
                <param name="paired_input">
                    <collection type="paired">
                        <element name="forward" value="bwa-mem-fastq-paired-collection/input_forward.fastqsanger.gz" ftype="fastqsanger.gz" />
                        <element name="reverse" value="bwa-mem-fastq-paired-collection/input_reverse.fastqsanger.gz" ftype="fastqsanger.gz" />
                    </collection>
                </param>
            </conditional>
            <section name="output_options">
                <param name="report_json" value="False" />
            </section>
            <output name="report_html">
                <assert_contents>
                    <has_text text="fastp report"/>
                </assert_contents>
            </output>
            <output_collection name="output_paired_coll" type="paired">
                <element name="forward" value="bwa-mem-fastq-paired-collection/output_forward.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/>
                <element name="reverse" value="bwa-mem-fastq-paired-collection/output_reverse.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/>
            </output_collection>
        </test>
        <!-- 17. Ensure quality cutting work -->
        <test expect_num_outputs="3">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="single"/>
                <param name="in1" ftype="fastqsanger.gz" value="R1.fq.gz"/>
            </conditional>
            <section name="read_mod_options">
                <section name="cutting_by_quality_options">
                    <conditional name="cut_front_select">
                        <param name="cut_front" value="--cut_front"/>
                        <param name="cut_front_window_size" value="2"/>
                        <param name="cut_front_mean_quality" value="3"/>
                    </conditional>
                    <conditional name="cut_tail_select">
                        <param name="cut_tail" value="--cut_tail"/>
                        <param name="cut_tail_window_size" value="4"/>
                        <param name="cut_tail_mean_quality" value="5"/>
                    </conditional>
                    <conditional name="cut_right_select">
                        <param name="cut_right" value="--cut_right"/>
                        <param name="cut_right_window_size" value="6"/>
                        <param name="cut_right_mean_quality" value="7"/>
                    </conditional>
                </section>
            </section>
            <output name="out1" ftype="fastqsanger.gz" decompress="true" file="quality_cutting_output.fq.gz"/>
            <output name="report_json">
                <assert_contents>
                    <has_text text="--cut_front"/>
                    <has_text text="--cut_tail"/>
                    <has_text text="--cut_right"/>
                    <has_text text="--cut_front_window_size 2"/>
                    <has_text text="--cut_front_mean_quality 3"/>
                    <has_text text="--cut_tail_window_size 4"/>
                    <has_text text="--cut_tail_mean_quality 5"/>
                    <has_text text="--cut_right_window_size 6"/>
                    <has_text text="--cut_right_mean_quality 7"/>
                </assert_contents>
            </output>
        </test>
        <!-- 18. Ensure deduplication works -->
        <test expect_num_outputs="2">
            <conditional name="single_paired">
                <param name="single_paired_selector" value="single"/>
                <param name="in1" ftype="fastqsanger" value="R1_with_dup.fq"/>
            </conditional>
            <section name="duplicated_reads">
                <conditional name="handling_options">
                    <param name="eval_dups" value=""/>
                    <param name="dedup" value="true"/>
                </conditional>
            </section>
            <section name="output_options">
                <param name="report_html" value="false"/>
            </section>
            <output name="out1" ftype="fastqsanger" file="out1.fq"/>
            <output name="report_json">
                <assert_contents>
                    <has_text text="fastp report"/>
                    <has_text text="&quot;duplication&quot;:"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

fastp_ is a tool designed to provide fast all-in-one preprocessing for FASTQ files. This tool is developed in C++ with multithreading supported to
afford high performance.

*Features*

1. Filter out bad (too low quality, too short, or too many N...) and/or duplicate reads

2. Cut low quality bases for per read in its 5' and 3' by evaluating the mean quality from a sliding window (like Trimmomatic but faster)

3. Trim all reads in front and tail

4. Cut adapters. Adapter sequences can be automatically detected, which means you don't have to input the adapter sequences to trim them.

5. Correct mismatched base pairs in overlapped regions of paired end reads, if one base is with high quality while the other is with ultra-low quality

6. Trim polyG in 3' ends, which is commonly seen in NovaSeq/NextSeq data. Trim polyX in 3' ends to remove unwanted polyX tailing (i.e. polyA tailing for mRNA-Seq data)

7. Preprocess unique molecular identifier (UMI) enabled data, shift UMI to sequence name

8. Report JSON format result for further interpreting

9. Visualize quality control and filtering results on a single HTML page (like FASTQC but faster and more informative)

10. Split the output to multiple files (0001.R1.gz, 0002.R1.gz...) to support parallel processing. Two modes can be used, limiting the total split file number, or limiting the lines of each split file (*Not enabled in this Galaxy tool*)

11. Support long reads (data from PacBio / Nanopore devices)

-----

**Inputs**

Single-end or Paired-end (compressed) fastqsanger files

-----

**Outputs**

    * Processed reads
    * Merged reads
    * Unmerged filtered reads, reads that cannot be merged successfully, but both pass all the filters.
    * Unmerged unfiltered reads, reads that cannot be merged, i.e. **forward** passes filters but **reverse** doesn't.

Optionally, under **Output Options** you can choose to output

    * HTML report (default is Yes)
    * JSON report (compatible with MultiQC)

.. _fastp: https://github.com/OpenGene/fastp

]]></help>
    <citations>
        <citation type="doi">10.1101/274100</citation>
    </citations>
</tool>
author	iuc
date	Sun, 19 Oct 2025 07:27:04 +0000
parents	1c183b0a6cfd
children