diff mgnify_seqprep.xml @ 0:76ea9d4604bc draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mgnify_seqprep commit fd696b8f2ce44287b6ad19fe52277cfdbd7e94fb
author bgruening
date Tue, 14 May 2024 09:49:32 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mgnify_seqprep.xml	Tue May 14 09:49:32 2024 +0000
@@ -0,0 +1,347 @@
+<tool id="mgnify_seqprep" name="Merging paired-end Illumina reads (SeqPrep, modified for use with MGnify piplines)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
+    <description>Merge and Trim Adapter Sequences from Paired-End Illumina Reads</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements"/>
+    <expand macro="creators"/>
+    <command detect_errors="exit_code"><![CDATA[
+    SeqPrep
+        -f '${input1}'
+        -r '${input2}'
+        -1 '${output1}'
+        -2 '${output2}'
+
+        #if $merge_reads
+            -s '${merged}'
+        #end if
+
+        ## General Arguments ##
+        #if $general_options.first_read_discarded
+            -3 '${general_options.first_read_discarded}'
+        #end if
+        #if $general_options.second_read_discarded
+            -4 '${general_options.second_read_discarded}'
+        #end if
+        #if $general_options.phred64
+            -6 '${general_options.phred64}'
+        #end if
+        #if $general_options.quality_cutoff
+            -q '${general_options.quality_cutoff}'
+        #end if
+        #if $general_options.min_length
+            -L '${general_options.min_length}'
+        #end if
+
+        ## Additional Adapter/Primer Trimming Arguments ##
+        #if $trimming_options.adapter_a
+            -A '${trimming_options.adapter_a}'
+        #end if
+        #if $trimming_options.adapter_b
+            -B '${trimming_options.adapter_b}'
+        #end if
+        #if $trimming_options.adapter_overlap
+            -O '${trimming_options.adapter_overlap}'
+        #end if
+        #if $trimming_options.max_mismatch_fraction
+            -M '${trimming_options.max_mismatch_fraction}'
+        #end if
+        #if $trimming_options.min_match_fraction
+            -N '${trimming_options.min_match_fraction}'
+        #end if
+        #if $trimming_options.adapter_bandwidth
+            -b '${trimming_options.adapter_bandwidth}'
+        #end if
+        #if $trimming_options.gap_open
+            -Q '${trimming_options.gap_open}'
+        #end if
+        #if $trimming_options.gap_extend
+            -t '${trimming_options.gap_extend}'
+        #end if
+        #if $trimming_options.gap_end
+            -e '${trimming_options.gap_end}'
+        #end if
+        #if $trimming_options.local_alignment_score
+            -Z '${trimming_options.local_alignment_score}'
+        #end if
+        #if $trimming_options.read_alignment_bandwidth
+            -w '${trimming_options.read_alignment_bandwidth}'
+        #end if
+        #if $trimming_options.read_alignment_gap_open
+            -W '${trimming_options.read_alignment_gap_open}'
+        #end if
+        #if $trimming_options.read_alignment_gap_extend
+            -p '${trimming_options.read_alignment_gap_extend}'
+        #end if
+        #if $trimming_options.read_alignment_gap_end
+            -P '${trimming_options.read_alignment_gap_end}'
+        #end if
+        #if $trimming_options.read_alignment_max_gap_fraction
+            -X '${trimming_options.read_alignment_max_gap_fraction}'
+        #end if
+
+        ## Additional Arguments for Merging ##
+        #if $merging_options.maximum_quality_score
+            -y '${merging_options.maximum_quality_score}'
+        #end if
+        #if $merging_options.print_overhang
+            -g '${merging_options.print_overhang}'
+        #end if
+        #if $merging_options.min_base_pair_overlap
+            -o '${merging_options.min_base_pair_overlap}'
+        #end if
+        #if $merging_options.max_mismatch_fraction
+            -m '${merging_options.max_mismatch_fraction}'
+        #end if
+        #if $merging_options.min_match_fraction
+            -n '${merging_options.min_match_fraction}'
+        #end if
+    ]]></command>
+    <inputs>
+        <param name="input1" type="data" format="fastq" label="First Read Input" help="Select the FASTQ file containing the first set of paired-end reads." />
+        <param name="input2" type="data" format="fastq" label="Second Read Input" help="Select the FASTQ file containing the second set of paired-end reads." />
+        <param name="merge_reads" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Merge Reads" help="Enable this to merge overlapping reads from the provided paired-end FASTQ files." />        
+
+        <!-- Section for General Arguments -->
+        <section name="general_options" title="General Arguments (Optional)" expanded="false">
+            <param name="first_read_discarded" argument="-3" type="text" optional="true" label="First Read Discarded FASTQ Filename" help="first read discarded fastq filename" />
+            <param name="second_read_discarded" argument="-4" type="text" optional="true" label="Second Read Discarded FASTQ Filename" help="second read discarded fastq filename" />
+            <param name="phred64" argument="-6" type="boolean" truevalue="-6" falsevalue="" checked="false" label="Input Sequence is in Phred+64 Format" help="Input sequence is in phred+64 rather than phred+33 format, the output will still be phred+33"/>
+            <param name="quality_cutoff" argument="-q" type="integer" optional="true" value="13" label="Quality Score Cutoff" help="Quality score cutoff for mismatches to be counted in overlap" />
+            <param name="min_length" argument="-L" type="integer" optional="true" value="30" label="Minimum Length of Reads" help="Minimum length of a trimmed or merged read to print it" />
+        </section>
+
+        <!-- Section for Additional Adapter/Primer Trimming Arguments -->
+        <section name="trimming_options" title="Additional Adapter/Primer Trimming Arguments" expanded="false">
+            <param name="adapter_a" argument="-A" label="Adapter Sequence A" type="text" optional="true" value="AGATCGGAAGAGCGGTTCAG" help="Forward read primer/adapter sequence to trim as it would appear at the end of a read" />
+            <param name="adapter_b" argument="-B" label="Adapter Sequence B" type="text" optional="true" value="AGATCGGAAGAGCGTCGTGT" help="Reverse read primer/adapter sequence to trim as it would appear at the end of a read" />
+            <param name="adapter_overlap" argument="-O" label="Minimum Overall Base Pair Overlap with Adapter" type="integer" value="10" optional="true" help="minimum overall base pair overlap with adapter sequence to trim" />
+            <param name="max_mismatch_fraction" argument="-M" label="Maximum Fraction of Good Quality Mismatching Bases" type="float" value="0.02" optional="true" help="maximum fraction of good quality mismatching bases for primer/adapter overlap" />
+            <param name="min_match_fraction" argument="-N" label="Minimum Fraction of Matching Bases" type="float" value="0.87" optional="true" help="minimum fraction of matching bases for primer/adapter overlap" />
+            <param name="adapter_bandwidth" argument="-b" label="Adapter Alignment Band-width" type="integer" value="50" optional="true" />
+            <param name="gap_open" argument="-Q" label="Adapter Alignment Gap-Open" type="integer" value="8" optional="true" />
+            <param name="gap_extend" argument="-t" label="Adapter Alignment Gap-Extension" type="integer" value="2" optional="true" />
+            <param name="gap_end" argument="-e" label="Adapter Alignment Gap-End" type="integer" value="2" optional="true" />
+            <param name="local_alignment_score" argument="-Z" label="Minimum Local Alignment Score Cutoff" type="integer" value="26" optional="true" help="Adapter alignment minimum local alignment score cutoff [roughly (2*num_hits) - (num_gaps*gap_open) - (num_gaps*gap_close) - (gap_len*gap_extend) - (2*num_mismatches)]" />
+            <param name="read_alignment_bandwidth" argument="-w" label="Read Alignment Band-width" type="integer" value="50" optional="true" />
+            <param name="read_alignment_gap_open" argument="-W" label="Read Alignment Gap-Open" type="integer" value="26" optional="true" />
+            <param name="read_alignment_gap_extend" argument="-p" label="Read Alignment Gap-Extension" type="integer" value="9" optional="true" />
+            <param name="read_alignment_gap_end" argument="-P" label="Read Alignment Gap-End" type="integer" value="5" optional="true" help="read alignment maximum fraction gap cutoff" />
+            <param name="read_alignment_max_gap_fraction" argument="-X" label="Read Alignment Maximum Fraction Gap Cutoff" type="float" value="0.125" optional="true" />
+        </section>
+
+        <!-- Section for Optional Arguments for Merging: -->
+        <section name="merging_options" title="Optional Arguments for Merging" expanded="false">
+            <param name="maximum_quality_score" argument="-y" label="Maximum Quality Score in Output" type="text" optional="true" help="Maximum quality score in output (phred 33), default = ']'"/>
+            <param name="print_overhang" argument="-g" type="boolean" truevalue="-g" falsevalue="" checked="false" label="Print Overhang When Adapters Are Present and Stripped" help="Use this if reads are different lengths"/>
+            <param name="min_base_pair_overlap" argument="-o" type="integer" optional="true" value="15" label="Minimum Overall Base Pair Overlap" help="Minimum overall base pair overlap to merge two reads"/>
+            <param name="max_mismatch_fraction" argument="-m" type="float" optional="true" value="0.02" label="Maximum Fraction of Good Quality Mismatching Bases" help="Maximum fraction of good quality mismatching bases to overlap reads"/>
+            <param name="min_match_fraction" argument="-n" type="float" optional="true" value="0.9" label="Minimum Fraction of Matching Bases" help="Minimum fraction of matching bases to overlap reads"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data format="fastq.gz" name="output1" label="${tool.name} on ${on_string}: First Read Output">
+            <filter>output_all</filter>
+        </data>
+        <data format="fastq.gz" name="output2" label="${tool.name} on ${on_string}: Second Read Output">
+            <filter>output_all</filter>
+        </data>
+        <data format="fastq.gz" name="merged" label="${tool.name} on ${on_string}: Merged Reads">
+            <filter>merge_reads</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test default inputs #1 -->
+        <test expect_num_outputs="3">
+            <param name="input1" value="input1.fq" />
+            <param name="input2" value="input2.fq" />
+            <param name="merge_reads" value="true" />
+
+            <!-- Section for General Arguments -->
+            <section name="general_options" >
+                <param name="quality_cutoff" value="13" />
+                <param name="min_length" value="30" />
+            </section>
+
+            <!-- Section for Additional Adapter/Primer Trimming Arguments -->
+            <section name="trimming_options">
+                <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" />
+                <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" />
+                <param name="adapter_overlap" value="10" /> 
+                <param name="max_mismatch_fraction" value="0.02" />
+                <param name="min_match_fraction" value="0.87" />
+                <param name="adapter_bandwidth" value="50" />
+                <param name="gap_open" value="8" />
+                <param name="gap_extend" value="2" />
+                <param name="gap_end" value="2" />
+                <param name="local_alignment_score" value="26" />
+                <param name="read_alignment_bandwidth" value="50" />
+                <param name="read_alignment_gap_open" value="26" />
+                <param name="read_alignment_gap_extend" value="9" />
+                <param name="read_alignment_gap_end" value="5" />
+                <param name="read_alignment_max_gap_fraction" value="0.125" />
+            </section>
+            <output name="output1" file="output1.fq.gz" />
+            <output name="output2" file="output2.fq.gz" />
+            <output name="merged" file="merged_output.fq.gz" />
+        </test>
+    
+        <!-- Without Merging, Two Outputs #2 -->
+        <test expect_num_outputs="2">
+            <param name="input1" value="input1.fq" />
+            <param name="input2" value="input2.fq" />
+            <param name="merge_reads" value="false" />
+
+            <!-- Section for General Arguments -->
+            <section name="general_options" >
+                <param name="quality_cutoff" value="13" />
+                <param name="min_length" value="30" />
+            </section>  
+
+            <!-- Section for Additional Adapter/Primer Trimming Arguments -->
+            <section name="trimming_options">
+                <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" />
+                <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" />
+                <param name="adapter_overlap" value="10" /> 
+                <param name="max_mismatch_fraction" value="0.02" />
+                <param name="min_match_fraction" value="0.87" />
+                <param name="adapter_bandwidth" value="50" />
+                <param name="gap_open" value="8" />
+                <param name="gap_extend" value="2" />
+                <param name="gap_end" value="2" />
+                <param name="local_alignment_score" value="26" />
+                <param name="read_alignment_bandwidth" value="50" />
+                <param name="read_alignment_gap_open" value="26" />
+                <param name="read_alignment_gap_extend" value="9" />
+                <param name="read_alignment_gap_end" value="5" />
+                <param name="read_alignment_max_gap_fraction" value="0.125" />
+            </section>
+            <output name="output1" file="outputNoMerge1.fq.gz" />
+            <output name="output2" file="outputNoMerge2.fq.gz" />
+        </test>
+        <!-- Test with Empty Input Files #3 -->
+        <test expect_num_outputs="3">
+            <param name="input1" value="empty1.fq" />
+            <param name="input2" value="empty2.fq" />
+            <param name="merge_reads" value="true" />
+
+            <!-- Section for General Arguments -->
+            <section name="general_options" >
+                <param name="quality_cutoff" value="13" />
+                <param name="min_length" value="30" />
+            </section>  
+
+            <!-- Section for Additional Adapter/Primer Trimming Arguments -->
+            <section name="trimming_options">
+                <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" />
+                <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" />
+                <param name="adapter_overlap" value="10" /> 
+                <param name="max_mismatch_fraction" value="0.02" />
+                <param name="min_match_fraction" value="0.87" />
+                <param name="adapter_bandwidth" value="50" />
+                <param name="gap_open" value="8" />
+                <param name="gap_extend" value="2" />
+                <param name="gap_end" value="2" />
+                <param name="local_alignment_score" value="26" />
+                <param name="read_alignment_bandwidth" value="50" />
+                <param name="read_alignment_gap_open" value="26" />
+                <param name="read_alignment_gap_extend" value="9" />
+                <param name="read_alignment_gap_end" value="5" />
+                <param name="read_alignment_max_gap_fraction" value="0.125" />
+            </section>
+            <output name="output1" file="empty_output1.fq.gz" />
+            <output name="output2" file="empty_output2.fq.gz" />
+            <output name="merged" file="empty_merged_output.fq.gz" />
+        </test>
+
+        <!-- Advanced Functional Tests -->
+        <!-- General Arguments Test #4 -->
+        <test expect_num_outputs="2">
+            <param name="input1" value="input1.fq" />
+            <param name="input2" value="input2.fq" />
+            <param name="merge_reads" value="false" />
+
+            <param name="quality_cutoff" value="15" />
+            <param name="min_length" value="25" />
+            <output name="output1" file="output1_general_args.fq.gz" />
+            <output name="output2" file="output2_general_args.fq.gz" />
+        </test>
+
+        <!-- Adapter/Primer Trimming Arguements Test #5 -->
+        <test expect_num_outputs="2">
+            <param name="input1" value="input1.fq" />
+            <param name="input2" value="input2.fq" />
+            <param name="merge_reads" value="false" />
+            <section name="trimming_options">
+                <param name="adapter_a" value="ACTGACTG" />
+                <param name="adapter_b" value="GTGACTGA" />
+                <param name="adapter_overlap" value="12" />
+                <param name="max_mismatch_fraction" value="0.03" />
+                <param name="min_match_fraction" value="0.85" />
+                <param name="adapter_bandwidth" value="55" />
+                <param name="gap_open" value="10" />
+                <param name="gap_extend" value="3" />
+                <param name="gap_end" value="3" />
+                <param name="local_alignment_score" value="28" />
+            </section>
+            <output name="output1" file="output1_adapter_trim.fq.gz" />
+            <output name="output2" file="output2_adapter_trim.fq.gz" />
+        </test>
+        <!-- Test with gzipped input files #6 -->
+        <test expect_num_outputs="3">
+            <param name="input1" value="input1.fastq.gz" />
+            <param name="input2" value="input2.fastq.gz" />
+            <param name="merge_reads" value="true" />
+            <output name="output1" file="output1_from_gzipped.fq.gz" />
+            <output name="output2" file="output2_from_gzipped.fq.gz" />
+            <output name="merged" file="merged_output_from_gzipped.fq.gz" />
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: warningmark
+
+**Caution**
+-----------
+::
+
+    This is a modified version of the 1.2 release. Made for use with the MGnify pipeline.
+
+    Difference in `utils.h`:
+
+::
+    
+    -#define MAX_SEQ_LEN (256)
+    +#define MAX_SEQ_LEN (1024)
+
+**SeqPrep**
+-----------
+::
+
+    SeqPrep is a versatile tool designed for merging overlapping paired-end Illumina reads into a single, longer read.
+    Additionally, it offers the functionality to trim adapter sequences from reads, making it a needful tool for preprocessing Illumina sequencing data.
+
+**Usage**
+=========
+::
+
+    To utilize SeqPrep, start by selecting your input FASTQ files: one for the first set of reads and another for the second set.
+    SeqPrep provides several options to customize your data processing:
+
+    - Adapter Sequences: You can provide specific sequences for adapter trimming if they are known. SeqPrep will remove these sequences from the reads.
+    - Quality Score Cutoff: Set a threshold for the quality score. Reads with quality scores below this threshold can be discarded or trimmed.
+    - Minimum Read Length: Define the minimum length for reads to be retained after trimming. Reads shorter than this length will be discarded.
+
+    If the merging feature is enabled, SeqPrep will combine overlapping reads into longer sequences, thereby enhancing the data quality for downstream analysis.
+
+**Outputs**
+===========
+::
+
+    SeqPrep generates outputs in gzipped FASTQ format.
+
+    See more details on `SeqPrep GitHub repository <https://github.com/jstjohn/SeqPrep>`_.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>