view fastplong.xml @ 5:e047516c73de draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastplong commit 959b0a681d482a3882d3a4deb1c5baa9bf0d6ab9
author iuc
date Tue, 09 Sep 2025 09:32:31 +0000
parents 0a4456785b0a
children
line wrap: on
line source

<tool id="fastplong" name="Fastplong" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.2" license="MIT">
    <description>Filter and trim long reads</description>
    <creator>
        <organization name="Masaryk University" url="https://www.muni.cz/"/>
        <person givenName="Hana" familyName="Resovska" email="499924@mail.muni.cz"/>
    </creator>
    <macros>
        <token name="@TOOL_VERSION@">0.4.1</token>
        <token name="@VERSION_SUFFIX@">0</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">fastplong</requirement>
    </requirements>
    <command detect_errors="exit_code">
 <![CDATA[
 #import re

 ## Link input files

 #set ext = '.fastq'

 #if $in.is_of_type('fastq.gz')
    #set ext = '.fastq.gz'
 #end if

 #set $in_name = re.sub('[^\w\-\s]', '_', str($in.element_identifier)) + $ext
 ln -sf '$in' '$in_name' &&

 ## Run fastplong
 fastplong 
 
 --thread \${GALAXY_SLOTS:-1}
 --report_title 'fastplong report for $in_name'
 --in '$in_name'
 --out long${ext}

 #if $reads_to_process: 
    --reads_to_process '$reads_to_process'
 #end if

 ## Adapter Trimming Options
 
 #if $adapter_trimming_options.adapter_trim.disable_adapter_trimming == 'yes'
    --disable_adapter_trimming
 #else

   #if str($adapter_trimming_options.adapter_trim.start_adapter):
     --start_adapter '$adapter_trimming_options.adapter_trim.start_adapter'
   #end if

   #if str($adapter_trimming_options.adapter_trim.end_adapter):
     --end_adapter '$adapter_trimming_options.adapter_trim.end_adapter'
   #end if

   #if $adapter_trimming_options.adapter_trim.adapter_fasta:
     --adapter_fasta '$adapter_trimming_options.adapter_fasta'
   #end if

   --distance_threshold $adapter_trimming_options.adapter_trim.distance_threshold
   --trimming_extension $adapter_trimming_options.adapter_trim.trimming_extension
 #end if

 ## Global Trimming Options

 #if str($global_trimming_options.trim_front):
     --trim_front $global_trimming_options.trim_front
 #end if

 #if str($global_trimming_options.trim_tail):
     --trim_tail $global_trimming_options.trim_tail
 #end if


 ## Filter Options

 ## Quality filtering options

 #if $filter_options.quality_filtering_options.qual_filt.disable_quality_filtering == 'yes'
   --disable_quality_filtering
 #else

   #if str($filter_options.quality_filtering_options.qual_filt.qualified_quality_phred):
     --qualified_quality_phred $filter_options.quality_filtering_options.qual_filt.qualified_quality_phred
   #end if
   #if str($filter_options.quality_filtering_options.qual_filt.unqualified_percent_limit):
     --unqualified_percent_limit $filter_options.quality_filtering_options.qual_filt.unqualified_percent_limit
   #end if
   #if str($filter_options.quality_filtering_options.qual_filt.n_base_limit):
     --n_base_limit $filter_options.quality_filtering_options.qual_filt.n_base_limit
   #end if
   #if str($filter_options.quality_filtering_options.qual_filt.mean_qual):
     --mean_qual $filter_options.quality_filtering_options.qual_filt.mean_qual
   #end if
 #end if

 ## Length filtering options

 #if $filter_options.length_filtering_options.len_filt.disable_length_filtering == 'yes'
   --disable_length_filtering 
 #else
   #if str($filter_options.length_filtering_options.len_filt.length_required):
     --length_required $filter_options.length_filtering_options.len_filt.length_required
   #end if

   #if str($filter_options.length_filtering_options.len_filt.length_limit):
     --length_limit $filter_options.length_filtering_options.len_filt.length_limit
   #end if
 #end if

 ## Low complexity filtering options

 #if $filter_options.low_complexity_filter.compl_filt.low_complexity_filter == 'yes'
   --low_complexity_filter

   #if str($filter_options.low_complexity_filter.compl_filt.complexity_threshold):
     --complexity_threshold $filter_options.low_complexity_filter.compl_filt.complexity_threshold
   #end if
 #end if

 ## Read Modification Options

 ### PolyX tail trimming

 #if $read_mod_options.polyx_tail_trimming.polyx.trim_poly_x == 'yes'
    --trim_poly_x
    #if $read_mod_options.polyx_tail_trimming.polyx.poly_x_min_len
       --poly_x_min_len $read_mod_options.polyx_tail_trimming.polyx.poly_x_min_len
    #end if
 #end if

 ### Per read cutting by quality options

 #if $read_mod_options.cutting_by_quality_options.qual_trim.trim_side == 'front'
	--cut_front

    #if str($read_mod_options.cutting_by_quality_options.qual_trim.cut_front_window_size):
        --cut_front_window_size $read_mod_options.cutting_by_quality_options.qual_trim.cut_front_window_size
    #end if
    #if str($read_mod_options.cutting_by_quality_options.qual_trim.cut_front_mean_quality):
        --cut_front_mean_quality $read_mod_options.cutting_by_quality_options.qual_trim.cut_front_mean_quality
    #end if

 #else if $read_mod_options.cutting_by_quality_options.qual_trim.trim_side == 'tail'
    --cut_tail

    #if str($read_mod_options.cutting_by_quality_options.qual_trim.cut_tail_window_size):
        --cut_tail_window_size $read_mod_options.cutting_by_quality_options.qual_trim.cut_tail_window_size
    #end if
    #if str($read_mod_options.cutting_by_quality_options.qual_trim.cut_tail_mean_quality):
        --cut_tail_mean_quality $read_mod_options.cutting_by_quality_options.qual_trim.cut_tail_mean_quality
    #end if


 #else if $read_mod_options.cutting_by_quality_options.qual_trim.trim_side == 'both'
    --cut_front
    --cut_tail

    #if str($read_mod_options.cutting_by_quality_options.qual_trim.cut_front_window_size):
        --cut_front_window_size $read_mod_options.cutting_by_quality_options.qual_trim.cut_front_window_size
    #end if
    #if str($read_mod_options.cutting_by_quality_options.qual_trim.cut_front_mean_quality):
        --cut_front_mean_quality $read_mod_options.cutting_by_quality_options.qual_trim.cut_front_mean_quality
    #end if
    #if str($read_mod_options.cutting_by_quality_options.qual_trim.cut_tail_window_size):
        --cut_tail_window_size $read_mod_options.cutting_by_quality_options.qual_trim.cut_tail_window_size
    #end if
    #if str($read_mod_options.cutting_by_quality_options.qual_trim.cut_tail_mean_quality):
        --cut_tail_mean_quality $read_mod_options.cutting_by_quality_options.qual_trim.cut_tail_mean_quality
    #end if

 #end if

 #if str($output_options.failed_out):
    $output_options.failed_out failed_reads.fastq
 #end if

 &&
 mv long${ext} '${out}'

  ]]>
    </command>
    <inputs>
        <param argument="--in" type="data" format="fastq,fastq.gz" label="Input Long Reads" help="Input FASTQ file with long reads"/>
        <param argument="--reads_to_process" type="integer" optional="true" label="Number of reads to process" help="Limit the number of reads to process. Useful for quick QC or creating subsets."/>

        <section name="adapter_trimming_options" title="Adapter Trimming Options" expanded="False">
            <conditional name="adapter_trim">
                <param type="select" argument="--disable_adapter_trimming" label="Disable adapter trimming" help="Adapter trimming is enabled by default. If this option is specified, adapter trimming is disabled.">
                    <option value="no" selected="true">NO</option>
                    <option value="yes">YES</option>
                </param>
                <when value="no">
                    <param argument="--start_adapter" type="text" optional="true" label="Adapter sequence for read start" help="The adapter sequence (uppercase only) at read start (5'). Leave blank to auto-detect.">
                        <sanitizer invalid_char="" >
                            <valid initial="none">
                                <add value="A"/>
                                <add value="T"/>
                                <add value="C"/>
                                <add value="G"/>
                            </valid>
                        </sanitizer>
                    </param>
                    <param argument="--end_adapter" type="text" optional="true" label="Adapter sequence for read end" help="The adapter sequence (uppercase only) at read end (3'). Leave blank to auto-detect.">
                        <sanitizer invalid_char="">
                            <valid initial="none">
                                <add value="A"/>
                                <add value="T"/>
                                <add value="C"/>
                                <add value="G"/>
                            </valid>
                        </sanitizer>
                    </param>
                    <param argument="--adapter_fasta" type="data" format="fasta" optional="true" label="FASTA file with adapter sequences" help="Specify a FASTA file to trim the reads by all the sequences in this FASTA file."/>
                    <param argument="--distance_threshold" type="float" optional="true" value="0.25" min="0.0" max="1.0" label="Adapter Distance Threshold" help="Threshold of sequence-adapter-distance/adapter-length (0.0 ~ 1.0), Higher values increase adapter detection sensitivity. ([0.25])" />
                    <param argument="--trimming_extension" type="integer" value="10" min="0" max="50" label="Adapter trimming extension" help="When an adapter is detected, extend the trimming to make cleaner trimming, default 10 means trimming 10 bases more [10]."/>
                </when>
                <when value="yes"/>
            </conditional>
        </section>
 
        <section name="global_trimming_options" title="Global trimming options" expanded="False">
            <param argument="--trim_front" type="integer" optional="true" label="Trim front (5') of read" help="Trim this many bases from the start (5') of each read. Default: 0."/>
            <param argument="--trim_tail" type="integer" optional="true" label="Trim tail (3') of read" help="Trim this many bases from the end (3') of each read. Default: 0."/>
        </section>

        <!-- Filter Options -->
        <section name="filter_options" title="Filter Options">
            <section name="quality_filtering_options" title="Quality filtering options" expanded="True">
                <conditional name="qual_filt"> 
                    <param argument="--disable_quality_filtering" type="select" label="Disable quality filtering" help="Quality filtering is enabled by default. If this option is specified, quality filtering is disabled.">
                        <option value="no" selected="true">NO</option>
                        <option value="yes">YES</option>
                    </param>
                    <when value="no">
                        <param argument="--qualified_quality_phred" type="integer" optional="true" min="0" max="60" value="15" label="Qualified quality phred" help="The quality value that a base is qualified. Default 15 means phred quality >=Q15 is qualified."/>
                        <param argument="--unqualified_percent_limit" type="integer" optional="true" min="0" max="100" value="40" label="Unqualified percent limit" help="How many percent of bases are allowed to be unqualified (0~100). Default 40 means 40%."/>
                        <param argument="--n_base_limit" type="integer" optional="true" value="5" label="N base limit" help="If a read's number of N base is &gt;n_base_limit, then this read is discarded. Default is 5."/>
                        <param argument="--mean_qual" type="integer" optional="true" value="0" min="0" max="60" label="Mean read quality" help="If a read's mean_qual quality score &lt;  mean_qual, then this read is discarded. Default 0 means no requirement."/>
                    </when>
                    <when value="yes"/>
                </conditional>
            </section>

            <section name="length_filtering_options" title="Length filtering options" expanded="True">
                <conditional name="len_filt">
                    <param argument="--disable_length_filtering" type="select" label="Disable length filtering" help="Length filtering is enabled by default. If this option is specified, length filtering is disabled.">
                        <option value="no" selected="true">NO</option>
                        <option value="yes">YES</option>
                    </param>
                    <when value="no">
                        <param argument="--length_required" type="integer" optional="true" value="15" min="1" label="Length required" help="Reads shorter than this value will be discarded. Default is 15."/>
                        <param argument="--length_limit" type="integer" optional="true" value="0" min="0" label="Maximum length" help="Reads longer than this value will be discarded. Default is 0 and means no limitation."/>
                    </when>
                    <when value="yes"/>
                </conditional>
            </section>

            <section name="low_complexity_filter" title="Low complexity filtering options" expanded="True">
                <conditional name="compl_filt">
                    <param argument="--low_complexity_filter" type="select" label="Enable low complexity filter" help="The complexity is defined as the percentage of bases that is different from its next base. Default is No">
                        <option value="no" selected="true">NO</option>
                        <option value="yes">YES</option>
                    </param>
                    <when value="yes">
                        <param argument="--complexity_threshold" type="integer" optional="true" min="0" max="100" value="30" label="Complexity threshold (%)" help="Threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required."/>
                    </when>
                    <when value="no"/>
                </conditional>
            </section>
        </section>

        <!-- Read Modification Options -->
        <section name="read_mod_options" title="Read Modification Options">
            <section name="polyx_tail_trimming" title="Polyx trimming on tail" expanded="False">
                <conditional name="polyx">
                    <param argument="--trim_poly_x" type="select" label="PolyX tail trimming" help="Enable polyX trimming on 3' ends.">
                        <option value="no" selected="true">NO</option>
                        <option value="yes">YES</option>
                    </param>
                    <when value="yes">
                        <param argument="--poly_x_min_len" type="integer" optional="true" value="10" label="PolyX minimum length"
                        help="The minimum length to detect polyX in the read tail. 10 by default."/>
                    </when>
                    <when value="no"/>
                </conditional>
            </section>

            <section name="cutting_by_quality_options" title="Per read cutting by quality options" expanded="True">
                <conditional name="qual_trim">
                    <param name="trim_side" type="select" label="Activate trimming by quality" help="Enable per read trimming by quality on both sides/ front (5') only or tail (3') only, move a sliding window from over the read strating from the specified side, drop the bases in the window if its mean quality &lt; threshold until window mean quality is above threshold">
                        <option value="none" selected="true">No quality trimming</option>
                        <option value="both">quality trimming from both sides</option>
                        <option value="front">quality trimming from front (5') only</option>
                        <option value="tail">quality trimming from tail (3') only</option>
                    </param>
                    <when value="none"/>
                    <when value="both">
                        <param argument="--cut_front_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Front window size" help="Override window size for cut_front only. Default: cut_window_size"/>
                        <param argument="--cut_front_mean_quality" type="integer" optional="true" value="20" min="1" max="30" label="Front mean quality" help="Override quality threshold for cut_front only. Default: cut_mean_quality"/>
                        <param argument="--cut_tail_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Tail window size" help="Override window size for cut_tail only. Default: cut_window_size"/>
                        <param argument="--cut_tail_mean_quality" type="integer" optional="true" value="20" min="1" max="30" label="Tail mean quality" help="Override quality threshold for cut_tail only. Default: cut_mean_quality"/>
                    </when>
                    <when value="front">
                        <param argument="--cut_front_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Front window size" help="Override window size for cut_front only. Default: cut_window_size"/>
                        <param argument="--cut_front_mean_quality" type="integer" optional="true" value="20" min="1" max="30" label="Front mean quality" help="Override quality threshold for cut_front only. Default: cut_mean_quality"/>
                    </when>
                    <when value="tail">
                        <param argument="--cut_tail_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Tail window size" help="Override window size for cut_tail only. Default: cut_window_size"/>
                        <param argument="--cut_tail_mean_quality" type="integer" optional="true" value="20" min="1" max="36" label="Tail mean quality" help="Override quality threshold for cut_tail only. Default: cut_mean_quality"/>
                    </when>
                </conditional>
            </section>
        </section>

        <section name="output_options" title="Output Options" expanded="False">
            <param name="report_html" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Output HTML report" help="Fastplong provides a QC report for the data before and after filtering within a single HTML page, which enables comparison of the quality statistics changed by the preprocessing step directly"/>
            <param name="report_json" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output JSON report" help="The JSON report contains all the data visualized in the HTML report. The format of the JSON report is manually optimized to be easily readable by humans and is compatible with MultiQC"/>
            <param argument="--failed_out" type="boolean" truevalue="--failed_out" falsevalue="" checked="false" label="Save failed reads?" help="Enable this to write failed reads to a separate FASTQ file."/>
        </section>
    </inputs>

    <outputs>
        <data name="out" format_source="in" label="${tool.name} on ${on_string}: Reads FASTQ output"/>
        <data name="report_html" format="html" from_work_dir="fastplong.html" label="${tool.name} on ${on_string}: HTML report">
            <filter>output_options['report_html'] is True</filter>
        </data>
        <data name="report_json" format="json" from_work_dir="fastplong.json" label="${tool.name} on ${on_string}: JSON report">
            <filter>output_options['report_json'] is True</filter>
        </data>
        <data name="failed_out" format="fastq" from_work_dir="failed_reads.fastq" label="${tool.name} on ${on_string}: Failed Reads FASTQ">
            <filter>output_options['failed_out'] is True</filter>
        </data>
    </outputs>

    <tests>
        <test expect_num_outputs="2">
            <param name="in" value="input.fastq"/>
            <output name="report_html">
                <assert_contents>
                    <has_text text="html"/>
                    <has_text text="fastplong report"/>
                </assert_contents>
            </output>
        </test>
        
        <test expect_num_outputs="3">
            <param name="in" value="input.fastq"/>
            <section name="read_mod_options">
                <section name="cutting_by_quality_options">
                    <conditional name="qual_trim">
                        <param name="trim_side" value="both"/>
                    </conditional>
                </section>
            </section>
            <section name="output_options">
                <param name="report_json" value="true"/>
            </section>
            <output name="out" file="quality_cutting_output.fastq"/>
            <output name="report_json">
				<assert_contents>
					<has_text text='summary": {'/>
					<has_text text='fastplong_version": "@TOOL_VERSION@'/>
					<has_text text="--cut_front"/>
					<has_text text="--cut_tail"/>
					<has_text text="--cut_front_window_size 4"/>
					<has_text text="--cut_front_mean_quality 20"/>
					<has_text text="--cut_tail_window_size 4"/>
					<has_text text="--cut_tail_mean_quality 20"/>
				</assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="in" value="input.fastq"/>
            <output name="out" file="output.fastq"/>
        </test>
        <test expect_num_outputs="2">
            <param name="in" value="input.fastq"/>
            <param name="reads_to_process" value="3"/>
            <output name="out" file="output_reads_to_process.fastq"/>
        </test>
        <test expect_num_outputs="3">
            <param name="in" value="input.fastq"/>
            <section name="output_options">
                <param name="failed_out" value="true"/>
            </section>
            <output name="failed_out" value="output_failed_out.fastq"/>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

fastplong_ is a tool designed to provide fast all-in-one preprocessing for long reads FASTQ files. This tool is developed in C++ with multithreading supported to
afford high performance.

*Features*

1. Filter out bad reads (too low quality, too short, or too many N...)

2. Cut low quality bases for per read in its 5' and 3' by evaluating the mean quality from a sliding window (like Trimmomatic but faster)

3. Trim all reads in front and tail

4. Cut adapters. Adapter sequences can be automatically detected, which means you don't have to input the adapter sequences to trim them.

5. Trim polyX in 3' ends to remove unwanted polyX tailing (i.e. polyA tailing for mRNA-Seq data)

6. Report JSON format result for further interpreting

7. Visualize quality control and filtering results on a single HTML page (like FASTQC but faster and more informative)


-----

**Inputs**

Single-end FASTQ or FASTQ.GZ long reads

-----

**Outputs**

    * Processed reads

Optionally, under **Output Options** you can choose to output

    * HTML report (default is Yes)
    * JSON report (compatible with MultiQC)
    * Failed reads (default is No)

**More Information**

- **Official Repository**: [GitHub - OpenGene/fastplong](https://github.com/OpenGene/fastplong)

.. _fastplong: https://github.com/OpenGene/fastplong

]]></help>
    <citations>
        <citation type="doi">10.1002/imt2.107</citation>
        <citation type="doi">10.1093/bioinformatics/bty560</citation>
    </citations>
</tool>