changeset 1:6ee24ca51829 draft default tip

"planemo upload for repository https://github.com/LUMC/fastq-filter/tree/develop/galaxy commit a4a3ab70c61a5ea14719002eb72a34a02b5d89e3"
author rhpvorderman
date Wed, 08 Jun 2022 07:49:43 +0000
parents 5f0d949db99e
children
files Dockerfile fast_fastq_filter.xml
diffstat 2 files changed, 120 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/Dockerfile	Tue Dec 28 14:17:40 2021 +0000
+++ b/Dockerfile	Wed Jun 08 07:49:43 2022 +0000
@@ -1,4 +1,4 @@
 FROM python:3.10-slim-bullseye
 
 ENV PYTHONDONTWRITEBYTECODE=true
-RUN pip install --no-cache-dir fastq-filter
+RUN pip install --no-cache-dir fastq-filter==0.3.0
--- a/fast_fastq_filter.xml	Tue Dec 28 14:17:40 2021 +0000
+++ b/fast_fastq_filter.xml	Wed Jun 08 07:49:43 2022 +0000
@@ -1,49 +1,135 @@
-<tool id="fast_fastq_filter" name="fastq-filter" version="0.1.0" python_template_version="3.5" profile="16.04">
+<tool id="fast_fastq_filter" name="fastq-filter" version="0.3.0" python_template_version="3.5" profile="16.04">
     <description>filter FASTQ reads fast</description>
     <requirements>
-        <requirement type="package" version="0.1.0">fastq-filter</requirement>
+        <requirement type="package" version="0.3.0">fastq-filter</requirement>
         <!-- TODO: Remove this once biocontainer is published -->
-        <container type="docker">quay.io/rhpvorderman/fastq-filter:0.1.0</container>
+        <container type="docker">quay.io/rhpvorderman/fastq-filter:0.3.0</container>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        #set all_filters = [str(filter['filter']) + ":" + str(filter['filter_threshold']) for filter in $filters]
-        fastq-filter -o '$output1'
-        #echo "'" + "|".join($all_filters) + "'"
-        '$input1'
+        set -e;
+        fastq-filter
+        #if str($filters.minimum_length.enabled) == "true"
+        --min-length $filters.minimum_length.threshold
+        #end if
+        #if str($filters.maximum_length.enabled) == "true"
+        --max-length $filters.maximum_length.threshold
+        #end if
+        #if str($filters.average_error_rate.enabled) == "true"
+        --average-error-rate $filters.average_error_rate.threshold
+        #end if
+        #if str($filters.mean_quality.enabled) == "true"
+        --mean-quality $filters.mean_quality.threshold
+        #end if
+        #if str($filters.median_quality.enabled) == "true"
+        --median-quality $filters.median_quality.threshold
+        #end if
+        --verbose
+        -o '${output1}.gz'
+        #if str($library.type) == "paired":
+        -o ${output2}.gz
+        #end if
+
+        '$library.input_1'
+        #if str($library.type) == "paired":
+        '$library.input_2'
+        #end if
+        ;
+
+        mv '${output1}.gz' '$output1';
+        #if str($library.type) == "paired":
+        mv '${output2}.gz' '$output2';
+        #end if
     ]]></command>
     <inputs>
-        <param type="data" name="input1" label="Input FASTQ file" format="fastqsanger,fastqsanger.gz" />
-        <repeat name="filters" title="Filter" min="1">
-            <param name="filter" type="select" label="Filter on">
-              <option value="min_length">minimum length</option>
-              <option value="max_length">maximum length</option>
-              <option value="mean_quality" selected="true">mean quality</option>
-              <option value="median_quality">median quality</option>
+        <conditional name="library">
+            <param name="type" type="select" label="Single-end or Paired-end reads?">
+                <option value="single">Single-end</option>
+                <option value="paired">Paired-end</option>
             </param>
-            <param name="filter_threshold" type="integer" label="Filter threshold" value="20"/>
-        </repeat>
+            <when value="single">
+                <param type="data" name="input_1" label="Input FASTQ file" format="fastqsanger,fastqsanger.gz" />
+            </when>
+            <when value="paired">
+                <param type="data" name="input_1" label="Input FASTQ file #1" format="fastqsanger,fastqsanger.gz" />
+                <param type="data" name="input_2" label="Input FASTQ file #2" format="fastqsanger,fastqsanger.gz" />
+            </when>
+        </conditional>
+        <section name="filters" title="Filters" expanded="true">
+            <conditional name="minimum_length">
+                <param name="enabled" type="boolean" label="Minimum length"
+                       help="The minimum length for a read."/>
+                <when value="true">
+                    <param name="threshold" type="integer" label="Threshold"
+                           value="20" min="1"/>
+                </when>
+                <when value="false"/>
+            </conditional>
+            <conditional name="maximum_length">
+                <param name="enabled" type="boolean" label="Maximum length"
+                       help="The maximum length for a read."/>
+                <when value="true">
+                    <param name="threshold" type="integer" label="Threshold"
+                           value="1000" min="1"/>
+                </when>
+                <when value="false"/>
+            </conditional>
+            <conditional name="average_error_rate">
+                <param name="enabled" type="boolean" label="Average error rate"
+                       help="The minimum average per base error rate."/>
+                <when value="true">
+                    <param name="threshold" type="float" label="Threshold"
+                           value="0.001" min="0"/>
+                </when>
+                <when value="false"/>
+            </conditional>
+            <conditional name="mean_quality">
+                <param name="enabled" type="boolean" label="Mean quality">
+                    <help>
+                        Average quality. Same as the 'Average error rate' option but
+                        specified with a phred score. I.e a mean quality of 30 is
+                        equivalent to an average error rate of 0.001'.
+                    </help>
+                </param>
+                <when value="true">
+                    <param name="threshold" type="integer" label="Threshold"
+                           value="30" min="0"/>
+                </when>
+                <when value="false"/>
+            </conditional>
+            <conditional name="median_quality">
+                <param name="enabled" type="boolean" label="Median quality">
+                    <help>
+                        DEPRECATED: The minimum median phred score. This is not as
+                        informative as the average error rate. It is also slower to
+                        calculate. This filter is only included for backwards
+                        compatibility reasons.
+                    </help>
+                </param>
+                <when value="true">
+                    <param name="threshold" type="integer" label="Threshold"
+                           value="30" min="0"/>
+                </when>
+                <when value="false"/>
+            </conditional>
+        </section>
     </inputs>
     <outputs>
-        <!--Fastqsanger format for now. For conditionally applying fastqsanger.gz the tool needs
-         to be updated. An option is using format auto_detect, so we do not have to conditionally set
-         fastqsanger or fastqsanger.gz-->
-        <data name="output1" format="fastqsanger" />
-        <!--When the tool is updated for paired input, the optional paired output can probably be
-        found in the cutadapt wrapper -->
+        <data name="output1" format="fastqsanger.gz" />
+        <data name="output2" format="fastqsanger.gz">
+            <filter>library['type'] == 'paired'</filter>
+        </data>
     </outputs>
-    <tests>
-        <test>
-            <param name="input1" value="input.fastq.gz"/>
-            <output name="output1" file="output.fastq.gz"/>
-        </test>
-    </tests>
     <help><![CDATA[
-    The following filters are available:
+    When paired FASTQ data is given, fastq-filter makes sure the output is in
+    sync. The filters behave as follows for paired-end data:
 
-    + mean_quality:<quality>         The mean quality of the FASTQ record is equal or above the given quality value.
-    + median_quality:<quality>       The median quality of the FASTQ record is equal or above the given quality value.
-    + min_length:<length>            The length of the sequence in the FASTQ record is at least min_length
-    + max_length:<length>            The length of the sequence in the FASTQ record is at most max_length
+    + average error rate: The average of the combined phred scores is used.
+    + median quality: The median of the combined phred scores is used.
+    + Minimum length: at least one of the records of the pair must meet the minimum length.
+    + Maximum length: None of the records in the pair must exceed the maximum length.
+
+    The rationale for the length filters is that R1 and R2 both sequence the same
+    molecule and the canonical length is the longest of both.
 
     ]]></help>
     <citations>