Mercurial > repos > bgruening > mgnify_seqprep
diff mgnify_seqprep.xml @ 0:76ea9d4604bc draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mgnify_seqprep commit fd696b8f2ce44287b6ad19fe52277cfdbd7e94fb
author | bgruening |
---|---|
date | Tue, 14 May 2024 09:49:32 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mgnify_seqprep.xml Tue May 14 09:49:32 2024 +0000 @@ -0,0 +1,347 @@ +<tool id="mgnify_seqprep" name="Merging paired-end Illumina reads (SeqPrep, modified for use with MGnify piplines)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> + <description>Merge and Trim Adapter Sequences from Paired-End Illumina Reads</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements"/> + <expand macro="creators"/> + <command detect_errors="exit_code"><![CDATA[ + SeqPrep + -f '${input1}' + -r '${input2}' + -1 '${output1}' + -2 '${output2}' + + #if $merge_reads + -s '${merged}' + #end if + + ## General Arguments ## + #if $general_options.first_read_discarded + -3 '${general_options.first_read_discarded}' + #end if + #if $general_options.second_read_discarded + -4 '${general_options.second_read_discarded}' + #end if + #if $general_options.phred64 + -6 '${general_options.phred64}' + #end if + #if $general_options.quality_cutoff + -q '${general_options.quality_cutoff}' + #end if + #if $general_options.min_length + -L '${general_options.min_length}' + #end if + + ## Additional Adapter/Primer Trimming Arguments ## + #if $trimming_options.adapter_a + -A '${trimming_options.adapter_a}' + #end if + #if $trimming_options.adapter_b + -B '${trimming_options.adapter_b}' + #end if + #if $trimming_options.adapter_overlap + -O '${trimming_options.adapter_overlap}' + #end if + #if $trimming_options.max_mismatch_fraction + -M '${trimming_options.max_mismatch_fraction}' + #end if + #if $trimming_options.min_match_fraction + -N '${trimming_options.min_match_fraction}' + #end if + #if $trimming_options.adapter_bandwidth + -b '${trimming_options.adapter_bandwidth}' + #end if + #if $trimming_options.gap_open + -Q '${trimming_options.gap_open}' + #end if + #if $trimming_options.gap_extend + -t '${trimming_options.gap_extend}' + #end if + #if $trimming_options.gap_end + -e '${trimming_options.gap_end}' + #end if + #if $trimming_options.local_alignment_score + -Z '${trimming_options.local_alignment_score}' + #end if + #if $trimming_options.read_alignment_bandwidth + -w '${trimming_options.read_alignment_bandwidth}' + #end if + #if $trimming_options.read_alignment_gap_open + -W '${trimming_options.read_alignment_gap_open}' + #end if + #if $trimming_options.read_alignment_gap_extend + -p '${trimming_options.read_alignment_gap_extend}' + #end if + #if $trimming_options.read_alignment_gap_end + -P '${trimming_options.read_alignment_gap_end}' + #end if + #if $trimming_options.read_alignment_max_gap_fraction + -X '${trimming_options.read_alignment_max_gap_fraction}' + #end if + + ## Additional Arguments for Merging ## + #if $merging_options.maximum_quality_score + -y '${merging_options.maximum_quality_score}' + #end if + #if $merging_options.print_overhang + -g '${merging_options.print_overhang}' + #end if + #if $merging_options.min_base_pair_overlap + -o '${merging_options.min_base_pair_overlap}' + #end if + #if $merging_options.max_mismatch_fraction + -m '${merging_options.max_mismatch_fraction}' + #end if + #if $merging_options.min_match_fraction + -n '${merging_options.min_match_fraction}' + #end if + ]]></command> + <inputs> + <param name="input1" type="data" format="fastq" label="First Read Input" help="Select the FASTQ file containing the first set of paired-end reads." /> + <param name="input2" type="data" format="fastq" label="Second Read Input" help="Select the FASTQ file containing the second set of paired-end reads." /> + <param name="merge_reads" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Merge Reads" help="Enable this to merge overlapping reads from the provided paired-end FASTQ files." /> + + <!-- Section for General Arguments --> + <section name="general_options" title="General Arguments (Optional)" expanded="false"> + <param name="first_read_discarded" argument="-3" type="text" optional="true" label="First Read Discarded FASTQ Filename" help="first read discarded fastq filename" /> + <param name="second_read_discarded" argument="-4" type="text" optional="true" label="Second Read Discarded FASTQ Filename" help="second read discarded fastq filename" /> + <param name="phred64" argument="-6" type="boolean" truevalue="-6" falsevalue="" checked="false" label="Input Sequence is in Phred+64 Format" help="Input sequence is in phred+64 rather than phred+33 format, the output will still be phred+33"/> + <param name="quality_cutoff" argument="-q" type="integer" optional="true" value="13" label="Quality Score Cutoff" help="Quality score cutoff for mismatches to be counted in overlap" /> + <param name="min_length" argument="-L" type="integer" optional="true" value="30" label="Minimum Length of Reads" help="Minimum length of a trimmed or merged read to print it" /> + </section> + + <!-- Section for Additional Adapter/Primer Trimming Arguments --> + <section name="trimming_options" title="Additional Adapter/Primer Trimming Arguments" expanded="false"> + <param name="adapter_a" argument="-A" label="Adapter Sequence A" type="text" optional="true" value="AGATCGGAAGAGCGGTTCAG" help="Forward read primer/adapter sequence to trim as it would appear at the end of a read" /> + <param name="adapter_b" argument="-B" label="Adapter Sequence B" type="text" optional="true" value="AGATCGGAAGAGCGTCGTGT" help="Reverse read primer/adapter sequence to trim as it would appear at the end of a read" /> + <param name="adapter_overlap" argument="-O" label="Minimum Overall Base Pair Overlap with Adapter" type="integer" value="10" optional="true" help="minimum overall base pair overlap with adapter sequence to trim" /> + <param name="max_mismatch_fraction" argument="-M" label="Maximum Fraction of Good Quality Mismatching Bases" type="float" value="0.02" optional="true" help="maximum fraction of good quality mismatching bases for primer/adapter overlap" /> + <param name="min_match_fraction" argument="-N" label="Minimum Fraction of Matching Bases" type="float" value="0.87" optional="true" help="minimum fraction of matching bases for primer/adapter overlap" /> + <param name="adapter_bandwidth" argument="-b" label="Adapter Alignment Band-width" type="integer" value="50" optional="true" /> + <param name="gap_open" argument="-Q" label="Adapter Alignment Gap-Open" type="integer" value="8" optional="true" /> + <param name="gap_extend" argument="-t" label="Adapter Alignment Gap-Extension" type="integer" value="2" optional="true" /> + <param name="gap_end" argument="-e" label="Adapter Alignment Gap-End" type="integer" value="2" optional="true" /> + <param name="local_alignment_score" argument="-Z" label="Minimum Local Alignment Score Cutoff" type="integer" value="26" optional="true" help="Adapter alignment minimum local alignment score cutoff [roughly (2*num_hits) - (num_gaps*gap_open) - (num_gaps*gap_close) - (gap_len*gap_extend) - (2*num_mismatches)]" /> + <param name="read_alignment_bandwidth" argument="-w" label="Read Alignment Band-width" type="integer" value="50" optional="true" /> + <param name="read_alignment_gap_open" argument="-W" label="Read Alignment Gap-Open" type="integer" value="26" optional="true" /> + <param name="read_alignment_gap_extend" argument="-p" label="Read Alignment Gap-Extension" type="integer" value="9" optional="true" /> + <param name="read_alignment_gap_end" argument="-P" label="Read Alignment Gap-End" type="integer" value="5" optional="true" help="read alignment maximum fraction gap cutoff" /> + <param name="read_alignment_max_gap_fraction" argument="-X" label="Read Alignment Maximum Fraction Gap Cutoff" type="float" value="0.125" optional="true" /> + </section> + + <!-- Section for Optional Arguments for Merging: --> + <section name="merging_options" title="Optional Arguments for Merging" expanded="false"> + <param name="maximum_quality_score" argument="-y" label="Maximum Quality Score in Output" type="text" optional="true" help="Maximum quality score in output (phred 33), default = ']'"/> + <param name="print_overhang" argument="-g" type="boolean" truevalue="-g" falsevalue="" checked="false" label="Print Overhang When Adapters Are Present and Stripped" help="Use this if reads are different lengths"/> + <param name="min_base_pair_overlap" argument="-o" type="integer" optional="true" value="15" label="Minimum Overall Base Pair Overlap" help="Minimum overall base pair overlap to merge two reads"/> + <param name="max_mismatch_fraction" argument="-m" type="float" optional="true" value="0.02" label="Maximum Fraction of Good Quality Mismatching Bases" help="Maximum fraction of good quality mismatching bases to overlap reads"/> + <param name="min_match_fraction" argument="-n" type="float" optional="true" value="0.9" label="Minimum Fraction of Matching Bases" help="Minimum fraction of matching bases to overlap reads"/> + </section> + </inputs> + <outputs> + <data format="fastq.gz" name="output1" label="${tool.name} on ${on_string}: First Read Output"> + <filter>output_all</filter> + </data> + <data format="fastq.gz" name="output2" label="${tool.name} on ${on_string}: Second Read Output"> + <filter>output_all</filter> + </data> + <data format="fastq.gz" name="merged" label="${tool.name} on ${on_string}: Merged Reads"> + <filter>merge_reads</filter> + </data> + </outputs> + <tests> + <!-- Test default inputs #1 --> + <test expect_num_outputs="3"> + <param name="input1" value="input1.fq" /> + <param name="input2" value="input2.fq" /> + <param name="merge_reads" value="true" /> + + <!-- Section for General Arguments --> + <section name="general_options" > + <param name="quality_cutoff" value="13" /> + <param name="min_length" value="30" /> + </section> + + <!-- Section for Additional Adapter/Primer Trimming Arguments --> + <section name="trimming_options"> + <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" /> + <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" /> + <param name="adapter_overlap" value="10" /> + <param name="max_mismatch_fraction" value="0.02" /> + <param name="min_match_fraction" value="0.87" /> + <param name="adapter_bandwidth" value="50" /> + <param name="gap_open" value="8" /> + <param name="gap_extend" value="2" /> + <param name="gap_end" value="2" /> + <param name="local_alignment_score" value="26" /> + <param name="read_alignment_bandwidth" value="50" /> + <param name="read_alignment_gap_open" value="26" /> + <param name="read_alignment_gap_extend" value="9" /> + <param name="read_alignment_gap_end" value="5" /> + <param name="read_alignment_max_gap_fraction" value="0.125" /> + </section> + <output name="output1" file="output1.fq.gz" /> + <output name="output2" file="output2.fq.gz" /> + <output name="merged" file="merged_output.fq.gz" /> + </test> + + <!-- Without Merging, Two Outputs #2 --> + <test expect_num_outputs="2"> + <param name="input1" value="input1.fq" /> + <param name="input2" value="input2.fq" /> + <param name="merge_reads" value="false" /> + + <!-- Section for General Arguments --> + <section name="general_options" > + <param name="quality_cutoff" value="13" /> + <param name="min_length" value="30" /> + </section> + + <!-- Section for Additional Adapter/Primer Trimming Arguments --> + <section name="trimming_options"> + <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" /> + <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" /> + <param name="adapter_overlap" value="10" /> + <param name="max_mismatch_fraction" value="0.02" /> + <param name="min_match_fraction" value="0.87" /> + <param name="adapter_bandwidth" value="50" /> + <param name="gap_open" value="8" /> + <param name="gap_extend" value="2" /> + <param name="gap_end" value="2" /> + <param name="local_alignment_score" value="26" /> + <param name="read_alignment_bandwidth" value="50" /> + <param name="read_alignment_gap_open" value="26" /> + <param name="read_alignment_gap_extend" value="9" /> + <param name="read_alignment_gap_end" value="5" /> + <param name="read_alignment_max_gap_fraction" value="0.125" /> + </section> + <output name="output1" file="outputNoMerge1.fq.gz" /> + <output name="output2" file="outputNoMerge2.fq.gz" /> + </test> + <!-- Test with Empty Input Files #3 --> + <test expect_num_outputs="3"> + <param name="input1" value="empty1.fq" /> + <param name="input2" value="empty2.fq" /> + <param name="merge_reads" value="true" /> + + <!-- Section for General Arguments --> + <section name="general_options" > + <param name="quality_cutoff" value="13" /> + <param name="min_length" value="30" /> + </section> + + <!-- Section for Additional Adapter/Primer Trimming Arguments --> + <section name="trimming_options"> + <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" /> + <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" /> + <param name="adapter_overlap" value="10" /> + <param name="max_mismatch_fraction" value="0.02" /> + <param name="min_match_fraction" value="0.87" /> + <param name="adapter_bandwidth" value="50" /> + <param name="gap_open" value="8" /> + <param name="gap_extend" value="2" /> + <param name="gap_end" value="2" /> + <param name="local_alignment_score" value="26" /> + <param name="read_alignment_bandwidth" value="50" /> + <param name="read_alignment_gap_open" value="26" /> + <param name="read_alignment_gap_extend" value="9" /> + <param name="read_alignment_gap_end" value="5" /> + <param name="read_alignment_max_gap_fraction" value="0.125" /> + </section> + <output name="output1" file="empty_output1.fq.gz" /> + <output name="output2" file="empty_output2.fq.gz" /> + <output name="merged" file="empty_merged_output.fq.gz" /> + </test> + + <!-- Advanced Functional Tests --> + <!-- General Arguments Test #4 --> + <test expect_num_outputs="2"> + <param name="input1" value="input1.fq" /> + <param name="input2" value="input2.fq" /> + <param name="merge_reads" value="false" /> + + <param name="quality_cutoff" value="15" /> + <param name="min_length" value="25" /> + <output name="output1" file="output1_general_args.fq.gz" /> + <output name="output2" file="output2_general_args.fq.gz" /> + </test> + + <!-- Adapter/Primer Trimming Arguements Test #5 --> + <test expect_num_outputs="2"> + <param name="input1" value="input1.fq" /> + <param name="input2" value="input2.fq" /> + <param name="merge_reads" value="false" /> + <section name="trimming_options"> + <param name="adapter_a" value="ACTGACTG" /> + <param name="adapter_b" value="GTGACTGA" /> + <param name="adapter_overlap" value="12" /> + <param name="max_mismatch_fraction" value="0.03" /> + <param name="min_match_fraction" value="0.85" /> + <param name="adapter_bandwidth" value="55" /> + <param name="gap_open" value="10" /> + <param name="gap_extend" value="3" /> + <param name="gap_end" value="3" /> + <param name="local_alignment_score" value="28" /> + </section> + <output name="output1" file="output1_adapter_trim.fq.gz" /> + <output name="output2" file="output2_adapter_trim.fq.gz" /> + </test> + <!-- Test with gzipped input files #6 --> + <test expect_num_outputs="3"> + <param name="input1" value="input1.fastq.gz" /> + <param name="input2" value="input2.fastq.gz" /> + <param name="merge_reads" value="true" /> + <output name="output1" file="output1_from_gzipped.fq.gz" /> + <output name="output2" file="output2_from_gzipped.fq.gz" /> + <output name="merged" file="merged_output_from_gzipped.fq.gz" /> + </test> + </tests> + <help><![CDATA[ +.. class:: warningmark + +**Caution** +----------- +:: + + This is a modified version of the 1.2 release. Made for use with the MGnify pipeline. + + Difference in `utils.h`: + +:: + + -#define MAX_SEQ_LEN (256) + +#define MAX_SEQ_LEN (1024) + +**SeqPrep** +----------- +:: + + SeqPrep is a versatile tool designed for merging overlapping paired-end Illumina reads into a single, longer read. + Additionally, it offers the functionality to trim adapter sequences from reads, making it a needful tool for preprocessing Illumina sequencing data. + +**Usage** +========= +:: + + To utilize SeqPrep, start by selecting your input FASTQ files: one for the first set of reads and another for the second set. + SeqPrep provides several options to customize your data processing: + + - Adapter Sequences: You can provide specific sequences for adapter trimming if they are known. SeqPrep will remove these sequences from the reads. + - Quality Score Cutoff: Set a threshold for the quality score. Reads with quality scores below this threshold can be discarded or trimmed. + - Minimum Read Length: Define the minimum length for reads to be retained after trimming. Reads shorter than this length will be discarded. + + If the merging feature is enabled, SeqPrep will combine overlapping reads into longer sequences, thereby enhancing the data quality for downstream analysis. + +**Outputs** +=========== +:: + + SeqPrep generates outputs in gzipped FASTQ format. + + See more details on `SeqPrep GitHub repository <https://github.com/jstjohn/SeqPrep>`_. + + ]]></help> + <expand macro="citations"/> +</tool>