changeset 5:7cfeba6facd1 draft

planemo upload for repository https://github.com/vgl-hub/rdeval commit d55c4b3d6b91d0418950ed6b7806ef779a916099
author richard-burhans
date Fri, 02 May 2025 22:49:30 +0000
parents 7bf95986aaa4
children 24b05d3958d7
files macros.xml rdeval.xml rdeval_report.xml static/images/pipeline.svg test-data/input1.fastq.gz test-data/input1.rd test-data/output1.tabular test-data/output2.fastq.gz test-data/output2.tabular test-data/output3.tabular test-data/test.rd test-data/test1.fasta.gz test-data/test1.fastq.gz test-data/test2.bam
diffstat 14 files changed, 640 insertions(+), 112 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Wed Apr 23 19:31:12 2025 +0000
+++ b/macros.xml	Fri May 02 22:49:30 2025 +0000
@@ -4,9 +4,8 @@
             <requirement type="package" version="@TOOL_VERSION@">rdeval</requirement>
         </requirements>
     </xml>
-    <!-- should be: 0.0.5=r44h35c04b2_2 -->
-    <token name="@TOOL_VERSION@">0.0.5</token>
-    <token name="@VERSION_SUFFIX@">4</token>
+    <token name="@TOOL_VERSION@">0.0.7</token>
+    <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">23.02</token>
     <xml name="citations">
         <citations>
--- a/rdeval.xml	Wed Apr 23 19:31:12 2025 +0000
+++ b/rdeval.xml	Fri May 02 22:49:30 2025 +0000
@@ -15,46 +15,62 @@
     #end for
     #if $output_options.output_type.type_selector == "combined_reads"
         ln -s '$reads_outfile' 'output.${output_type.format_selector}' &&
-    #end if  
-	rdeval --input-reads #echo " ".join([f"'{input}'" for $input in $mangled_inputs])
+    #end if
+	rdeval --input-reads
+    #for $input in $mangled_inputs
+        '$input'
+    #end for
 	#if $expected_gsize
 		'$expected_gsize'
 	#end if
-	#if $input_filter.filter_selector == "exclude_file"
-		--exclude-list '$exclude_file'
-	#else if $input_filter.filter_selector == "include_file"
-		--include-list '$include_file'
-	#end if
-    #if $filter
-		--filter '$filter'
+	#if $input_filter.include_list
+		--include-list '$input_filter.include_list'
+    #end if
+	#if $input_filter.exclude_list
+		--exclude-list '$input_filter.exclude_list'
     #end if
-		--sample '$sample'
-	#if $input_subsample.seed_selector == "yes"
-		--random-seed '$random_seed'
+    #set $filter_exp_type = $input_filter.filter_expression.filter_selector
+    #if $filter_exp_type != "no_exp"
+        #set $l_exp = "l" + str($input_filter.filter_expression.length_comparison) + str($input_filter.filter_expression.length_value)
+        #set $q_exp = "q" + str($input_filter.filter_expression.quality_comparison) + str($input_filter.filter_expression.quality_value)
+        #if $filter_exp_type == "l_exp"
+            #set $filter_exp = $l_exp
+        #else if $filter_exp_type == "q_exp"
+            #set $filter_exp = $q_exp
+        #else if $filter_exp_type == "lq_exp"
+            #set $filter_exp = $l_exp + str($input_filter.filter_expression.exp_operator) + $q_exp
+        #end if
+        --filter '$filter_exp'
+    #end if
+    #if int($input_subsample.sample) != 1
+		--sample '$input_subsample.sample'
+    #end if
+	#if $input_subsample.random_seed.seed_selector == "yes"
+		--random-seed '$input_subsample.random_seed.random_seed'
 	#end if
 	#if $input_compress.compress_selector == "yes"
-		--homopolymer-compress '$homopolymer_compress'
+		--homopolymer-compress '$input_compress.homopolymer_compress'
 	#end if
-    #if $stats_flavor.flavor_selector == "stats"
-        #if $sequence_report
+    #set $stats_type = $output_options.stats_flavor.flavor_selector
+    #if $stats_type == "stats"
+        #if $output_options.stats_flavor.sequence_report
             --sequence-report
         #end if
-    #else if $stats_flavor.flavor_selector == "quality"
-		--quality '$quality'
-    #else if $stats_flavor.flavor_selector == "size"
-		--out-size '$out_size'
+    #else if $stats_type == "quality"
+		--quality '$output_options.stats_flavor.quality'
+    #else if $stats_type == "size"
+		--out-size '$output_options.stats_flavor.out_size'
     #end if
-    #if $output_options.output_type.type_selector == "rd_file"
-        #if $md5
+    #set $output_type = $output_options.output_type.type_selector
+    #if $output_type == "rd_file"
+        #if $output_options.output_type.md5
             --md5
         #end if
         -o output.rd
-    #else if $output_options.output_type.type_selector == "combined_reads"
-        -o 'output.${output_type.format_selector}'
+    #else if $output_type == "combined_reads"
+        -o 'output.${output_options.output_type.format_selector}'
     #end if
-	#if $verbose
 		--verbose
-	#end if
 		--tabular
 		--threads \${GALAXY_SLOTS:-2}
 		> '$stats_outfile'
@@ -63,21 +79,56 @@
         <param argument="--input-reads" type="data" format="bam,cram,fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input dataset" help="BAM, CRAM, FASTA, FASTQ, or RD files"/>
         <param name="expected_gsize" type="integer" label="Expected Genome Size" optional="true" help="Integer (e.g., 3000000000 for human)."/>
         <section name="input_filter" title="Filter input reads" expanded="false">
-            <conditional name="file_filter">
-                <param name="filter_selector" type="select" label="Use an exclude or include file">
-                    <option value="no_file" selected="true">no</option>
-                    <option value="exclude_file">Use an exclude file</option>
-                    <option value="include_file">Use an include file</option>
+            <param argument="--include-list" type="data" format="txt" optional="true" label="File containing headers to include"/>
+            <param argument="--exclude-list" type="data" format="txt" optional="true" label="File containing headers to exclude"/>
+            <conditional name="filter_expression">
+                <param name="filter_selector" type="select" label="Filter using length and/or quality" help="filter help">
+                    <option value="no_exp" selected="true">No</option>
+                    <option value="l_exp">Read length</option>
+                    <option value="q_exp">Average read quality</option>
+                    <option value="lq_exp">Both read length and average read quality</option>
                 </param>
-                <when value="no_file"/>
-                <when value="exclude_file">
-                    <param argument="--exclude-list" type="data" format="txt" optional="true" label="File containing headers to exclude"/>
+                <when value="no_exp"/>
+                <when value="l_exp">
+                    <param name="length_comparison" type="select" label="Retain reads with length">
+                        <option value="&lt;" selected="true">less than</option>
+                        <option value="=">equal to</option>
+                        <option value=">">greater than</option>
+                        <sanitizer sanitize="false"/>
+                    </param>
+                    <param name="length_value" type="integer" min="0" value="0" label="Length in bp" />
+                </when>
+                <when value="q_exp">
+                    <param name="quality_comparison" type="select" label="Retain reads with average read quality">
+                        <option value="&lt;" selected="true">less than</option>
+                        <option value="=">equal to</option>
+                        <option value=">">greater than</option>
+                        <sanitizer sanitize="false"/>
+                    </param>
+                    <param name="quality_value" type="integer" min="0" value="0" label="Average read quality" />
                 </when>
-                <when value="include_file">
-                    <param argument="--include-list" type="data" format="txt" optional="true" label="File containing headers to include"/>
+                <when value="lq_exp">
+                    <param name="length_comparison" type="select" label="Retain reads with length">
+                        <option value="&lt;" selected="true">less than</option>
+                        <option value="=">equal to</option>
+                        <option value=">">greater than</option>
+                        <sanitizer sanitize="false"/>
+                    </param>
+                    <param name="length_value" type="integer" min="0" value="0" label="Length in bp" />
+                    <param name="exp_operator" type="select" label="Combination operator">
+                        <option value="|" selected="true">or</option>
+                        <option value="&amp;">and</option>
+                        <sanitizer sanitize="false"/>
+                    </param>
+                    <param name="quality_comparison" type="select" label="Average read quality">
+                        <option value="&lt;" selected="true">less than</option>
+                        <option value="=">equal to</option>
+                        <option value=">">greater than</option>
+                        <sanitizer sanitize="false"/>
+                    </param>
+                    <param name="quality_value" type="integer" min="0" value="0" label="average read quality" />
                 </when>
             </conditional>
-            <param argument="--filter" type="text" optional="true" label="filter" help="e.g. l&gt;1000 &amp; q&gt;20"/>
         </section>
         <section name="input_subsample" title="Subsample input reads" expanded="false">
             <param argument="--sample" type="float" min="0" max="1" value="1" label="fraction of reads to subsample"/>
@@ -144,7 +195,6 @@
                     <param argument="--md5" type="boolean" checked="false" label="Print md5 of .rd files"/>
                 </when>
             </conditional>
-            <param argument="--verbose" type="boolean" checked="false" label="Verbose output"/>
         </section>
     </inputs>
     <outputs>
@@ -164,91 +214,103 @@
     </outputs>
     <tests>
         <test expect_num_outputs="2">
-            <param name="input_reads" value="test1.fasta.gz" ftype="fasta.gz"/>
+            <param name="input_reads" value="input1.fastq.gz" ftype="fastq.gz"/>
             <output name="stats_outfile" file="output1.tabular" ftype="tabular"/>
             <output name="rd_outfile" ftype="binary">
                 <assert_contents>
-                    <has_size size="109" delta="1"/>
+                    <has_size size="119" delta="1"/>
                 </assert_contents>
             </output>
         </test>
         <test expect_num_outputs="2">
-            <param name="input_reads" value="test1.fastq.gz" ftype="fastq.gz"/>
-            <output name="stats_outfile" file="output1.tabular" ftype="tabular" lines_diff="2"/>
+            <param name="input_reads" value="input1.fastq.gz" ftype="fastq.gz"/>
+            <section name="input_filter">
+                <conditional name="filter_expression">
+                    <param name="filter_selector" value="lq_exp"/>
+                    <param name="length_comparison" value=">"/>
+                    <param name="length_value" value="10"/>
+                    <param name="exp_operator" value="&amp;"/>
+                    <param name="quality_comparison" value=">"/>
+                    <param name="quality_value" value="10"/>
+                </conditional>
+            </section>
+            <output name="stats_outfile" file="output2.tabular" ftype="tabular"/>
             <output name="rd_outfile" ftype="binary">
                 <assert_contents>
-                    <has_size size="128" delta="1"/>
+                    <has_size size="100" delta="1"/>
                 </assert_contents>
             </output>
         </test>
         <test expect_num_outputs="2">
-            <param name="input_reads" value="test2.bam" ftype="bam"/>
+            <param name="input_reads" value="input1.fastq.gz" ftype="fastq.gz"/>
+            <section name="input_compress">
+                <param name="compress_selector" value="yes"/>
+                <param name="homopolymer_compress" value="1"/>
+            </section>
             <section name="output_options">
                 <conditional name="output_type">
                     <param name="type_selector" value="combined_reads"/>
                     <param name="format_selector" value="fastq.gz"/>
                 </conditional>
             </section>
-            <output name="stats_outfile" file="output2.tabular" ftype="tabular"/>
-            <output name="reads_outfile" file="output2.fastq.gz" ftype="fastq.gz"/>
+            <output name="stats_outfile" file="output3.tabular" ftype="tabular"/>
+            <output name="reads_outfile" ftype="fastq.gz" md5="23a14631cb075817967752021deb6ec4">
+                <assert_contents>
+                    <has_size size="159"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
-
-**rdeval** is a general-purpose, multithreaded tool for analyzing and manipulating reads (FASTA/FASTQ/BAM/CRAM/RD).
+What it does
+============
 
-        rdeval input.fa*[.gz]|bam|cram|rd [expected genome size]
+**rdeval** accepts an arbitrary number of sequencing files and optionally **filters**, **subsamples**, and/or **compresses homopolymers** within the reads. The retained reads can be saved in multiple formats, and metrics on these reads can be stored in a '*sketch*' file. Statistics can then be efficiently retrieved from these sketch files for further processing.
 
-::
-
-	Dataset report example:
+.. image:: pipeline.svg
 
-	+++Read summary+++:
-	# reads: 10000
-	Total read length: 134014104
-	Average read length: 13401.41
-	Read N50: 14270
-	Smallest read length: 1142
-	Largest read length: 40910
-	Coverage: inf
-	GC content %: 43.78
-	Base composition (A:C:T:G): 37693226:29331833:37655925:29333120
-	Average per base quality: 26.47
+Filtering
+=========
+
+Input reads can be filtered using one of the three methods listed below, applied sequentially in the specified order.
 
-::
-	
-	Per sequence/read report (--sequence-report) example:
+1. Retain reads whose header lines are listed in the include dataset.
+2. Discard reads whose header lines are listed in the exclude dataset.
+3. Retain reads that match the provided filter expression.
+
+The filter expression can be used to select reads based on read length (l), average read quality (q), or a combination of both. The grammar for constructing filter expressions is outlined below:
 
-	Header  Comment Length  A       C       G       T       N       GC      Average Quality
-	m54306U_210528_154706/69206614/ccs              22812   6170    5146    4802    6694    0       0.44    89.9705
-	m54306U_210528_154706/25888573/ccs              32200   9162    7270    7112    8656    0       0.45    56.8306
-	m54306U_210528_154706/40634168/ccs              8487    2443    1858    1876    2310    0       0.44    90.3828
-	m54306U_210528_154706/103745617/ccs             16496   4546    3752    3760    4438    0       0.46    88.3554
+     * filter-expression ::= <length-expression> | <quality-expression> | <length-expression> <combination-operator> <quality-expression> | <quality-expression> <combination-operator> <length-expression>
+     * length-expression ::= "l" <comparison-operator> <integer>
+     * quality-expression ::= "q" <comparison-operator> <integer>
+     * combination-operator := "&" | "|"
+     * comparison-operator ::= "<" | "=" | ">"
+     * integer ::= <digit> | <digit><integer>
+     * digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
 
-::
+Retain reads longer than 10 base pairs
+
+    l>10
+
+Retain reads with average quality greather than 20
+
+     q>20
 
-	Options:
-	--sequence-report generates a per-read report
-	-e --exclude-list <file> generates output on a excluding list of headers.
-	-f --filter <exp> filter reads using <exp> in quotes, e.g. 'l>10' for longer than 10bp or 'l>10 & q>10' to further exclude reads by quality (default: none).
-	-i --include-list <file> generates output on a subset list of headers.
-	-o --out-format <file> output file (fa*[.gz], bam, cram, rd). Optionally write reads to file or generate rd summary file.
-	-q --quality q|a generates list of average quality for each read (q) or both length and quality (a).
-	-r --input-reads <file1> <file2> <file n> input file (fa*[.gz], bam, cram, rd).
-	-s --out-size u|s|h|c  generates size list (unsorted|sorted|histogram|inverse cumulative table).
-	--homopolymer-compress <int> compress all the homopolymers longer than n in the input.
-	--sample <float> fraction of reads to subsample.
-	--random-seed <int> an optional random seed to make subsampling reproducible.
-	--md5 print md5 of .rd files.
-	--tabular tabular output.
-	--verbose verbose output.
-	-j --threads <int> numbers of threads (default:5).
-	-v --version software version.
-	--cmd print $0 to stdout.
+Retain reads longer than 10 base pairs with average quality greather than 20
+
+     l>10 & q>20
+
+.. _sampling-label:
 
-**Attribution**
+Sub-sampling
+============
+
+4. Retain a subsample of the reads by specifying the fraction to be kept. Use the *random seed* option to keep subsampling reproducible.
 
-This tool relies on the gfastar suite and the gfalibs toolkit `vgl-hub/gfalibs <https://github.com/vgl-hub/gfalibs>`_, developed by Giulio Formenti at the Rockefeller University
+Homopolymer Compression
+=======================
+
+5. Runs of repeated nucleotides in each read are collapsed, with any associated quality data discarded. For example, CAGGCTTT would become CAGCT.
     ]]></help>
     <expand macro="citations"/>
 </tool>
--- a/rdeval_report.xml	Wed Apr 23 19:31:12 2025 +0000
+++ b/rdeval_report.xml	Fri May 02 22:49:30 2025 +0000
@@ -10,23 +10,46 @@
     export RDEVAL_SHARE_DIR="\$(dirname \$(dirname \$(type -P rdeval)))/share/rdeval" &&
     ln -s "\$RDEVAL_SHARE_DIR/figures.Rmd" &&
     ln -s "\$RDEVAL_SHARE_DIR/rdeval_interface.R" &&
-    #set $num_files = 0
-    #for $input_file in $input_files
-        ln -s '$input_file' '${num_files}.rd' &&
-        #set $num_files += 1
+    #set $input_file_list = []
+    #for $idx, $input_file in enumerate($input_files)
+        ln -s '$input_file' '${idx}.rd' &&
+        #silent $input_file_list.append(f"'{idx}.rd'")
     #end for
-    R -e "rmarkdown::render('figures.Rmd', output_file='$html_outfile')" --args #for $idx in range($num_files)# '${idx}.rd' #end for
+    #set $r_vector = "c(" + ",".join(input_file_list) + ")"
+    #set $interactive_value = "FALSE"
+    #if $output_format.format_selector == "html" and $output_format.interactive
+        #set $interactive_value = "TRUE"
+    #end if
+    R -e "rmarkdown::render('figures.Rmd', output_file='$outfile', output_format='${output_format.format_selector}_document', params=list(input_files=$r_vector, interactive='$interactive_value'))"
 	]]></command>
     <inputs>
         <param name="input_files" type="data" format="data" multiple="true" label="Input dataset" help="RD files"/>
+        <conditional name="output_format">
+            <param name="format_selector" type="select" label="output format" help="output format">
+                <option value="html" selected="true">html</option>
+                <option value="pdf">pdf</option>
+            </param>
+            <when value="html">
+                <param name="interactive" type="boolean" value="true" label="interactive" help="interactive"/>
+            </when>
+            <when value="pdf"/>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="html_outfile" format="html" label="Rdeval report"/>
+        <data name="outfile" format="html" label="Rdeval report">
+            <change_format>
+                <when input="output_format" value="pdf" format="pdf"/>
+            </change_format>
+        </data>
     </outputs>
     <tests>
         <test expect_num_outputs="1">
-            <param name="input_files" value="test.rd"/>
-            <output name="html_outfile" ftype="html">
+            <param name="input_files" value="input1.rd"/>
+            <conditional name="output_format">
+                <param name="format_selector" value="html"/>
+            </conditional>
+
+            <output name="outfile" ftype="html">
                 <assert_contents>
                     <has_size size="833000" delta="8330"/>
                 </assert_contents>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/static/images/pipeline.svg	Fri May 02 22:49:30 2025 +0000
@@ -0,0 +1,434 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="210mm"
+   height="297mm"
+   viewBox="0 0 210 297"
+   version="1.1"
+   id="svg5"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <defs
+     id="defs2">
+    <marker
+       style="overflow:visible;"
+       id="marker1298"
+       refX="0.0"
+       refY="0.0"
+       orient="auto">
+      <path
+         transform="scale(0.8) rotate(180) translate(12.5,0)"
+         style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         id="path892" />
+    </marker>
+    <marker
+       style="overflow:visible;"
+       id="Arrow1Lend"
+       refX="0.0"
+       refY="0.0"
+       orient="auto">
+      <path
+         transform="scale(0.8) rotate(180) translate(12.5,0)"
+         style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         id="path36356" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker1298-3"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path892-3" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker1298-3-8"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path892-3-6" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker1298-3-8-4"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path892-3-6-8" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker1298-3-8-4-8"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path892-3-6-8-9" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker1298-3-8-4-7"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path892-3-6-8-6" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker1298-3-8-4-7-3"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path892-3-6-8-6-0" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker1298-3-8-4-7-3-0"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path892-3-6-8-6-0-9" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker1298-3-8-4-7-3-5"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path892-3-6-8-6-0-4" />
+    </marker>
+  </defs>
+  <g
+     id="layer1">
+    <g
+       id="g4714">
+      <g
+         id="g30989">
+        <g
+           id="g29274"
+           style="vector-effect:non-scaling-stroke;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;-inkscape-stroke:hairline">
+          <path
+             id="rect868-8-8-8-3"
+             style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 105.67591,35.29722 h 11.2448 l 5.62239,5.62241 v 16.20572 h -8.43359 -8.4336 z" />
+          <path
+             id="rect8392-9-6-9-0"
+             style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 116.9207,35.29723 5.6224,5.6224 v 0 h -5.6224 z" />
+          <rect
+             style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             id="rect8938-6-0-7-3"
+             width="16.867188"
+             height="5.622396"
+             x="105.67591"
+             y="46.376678" />
+          <text
+             xml:space="preserve"
+             style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             x="111.0649"
+             y="50.956284"
+             id="text50132-4-4-7-0"><tspan
+               id="tspan1187"
+               x="111.0649"
+               y="50.956284">RD</tspan></text>
+        </g>
+        <g
+           id="g29996"
+           transform="translate(-0.76821861,-0.139146)">
+          <path
+             id="rect868-8-8-8-3-2"
+             style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 84.115444,35.43637 h 11.2448 l 5.622386,5.62241 V 57.2645 h -8.433593 -8.433593 z" />
+          <path
+             id="rect8392-9-6-9-0-8"
+             style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 95.360234,35.43638 5.622396,5.6224 v 0 h -5.622396 z" />
+          <rect
+             style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             id="rect8938-6-0-7-3-3"
+             width="16.867188"
+             height="5.622396"
+             x="84.115448"
+             y="46.515827" />
+          <text
+             xml:space="preserve"
+             style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             x="84.894775"
+             y="50.765331"
+             id="text50132-4-4-7-0-8"><tspan
+               id="tspan29989"
+               x="84.894775"
+               y="50.765331">FASTQ</tspan></text>
+        </g>
+        <g
+           id="g30165"
+           transform="translate(-2.1639183,0.04923)">
+          <path
+             id="rect868-8-8-8-3-2-4"
+             style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 63.182464,35.247994 h 11.2448 l 5.622386,5.62241 v 16.20572 h -8.433593 -8.433593 z" />
+          <path
+             id="rect8392-9-6-9-0-8-9"
+             style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 74.427254,35.248004 5.622396,5.6224 v 0 h -5.622396 z" />
+          <rect
+             style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             id="rect8938-6-0-7-3-3-9"
+             width="16.867188"
+             height="5.622396"
+             x="63.182468"
+             y="46.32745" />
+          <text
+             xml:space="preserve"
+             style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             x="63.879272"
+             y="50.942425"
+             id="text50132-4-4-7-0-8-3"><tspan
+               id="tspan30158"
+               x="63.879272"
+               y="50.942425">FASTA</tspan></text>
+        </g>
+        <g
+           id="g30334"
+           transform="translate(-2.9423309,-0.081337)">
+          <path
+             id="rect868-8-8-8-3-2-4-5"
+             style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 41.632196,35.378561 h 11.2448 l 5.622386,5.62241 v 16.20572 h -8.433593 -8.433593 z" />
+          <path
+             id="rect8392-9-6-9-0-8-9-0"
+             style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 52.876986,35.378571 5.622396,5.6224 v 0 h -5.622396 z" />
+          <rect
+             style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             id="rect8938-6-0-7-3-3-9-2"
+             width="16.867188"
+             height="5.622396"
+             x="41.632202"
+             y="46.458015" />
+          <text
+             xml:space="preserve"
+             style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             x="43.811523"
+             y="51.07299"
+             id="text50132-4-4-7-0-8-3-9"><tspan
+               id="tspan30327"
+               x="43.811523"
+               y="51.07299">CRAM</tspan></text>
+        </g>
+        <g
+           id="g30539"
+           transform="translate(0,-5.126281)">
+          <path
+             id="rect868-8-8-8-3-2-4-5-5"
+             style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 16.361184,40.423505 h 11.2448 l 5.622386,5.62241 v 16.20572 h -8.433593 -8.433593 z" />
+          <path
+             id="rect8392-9-6-9-0-8-9-0-1"
+             style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             d="m 27.605974,40.423515 5.622396,5.6224 v 0 h -5.622396 z" />
+          <rect
+             style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             id="rect8938-6-0-7-3-3-9-2-7"
+             width="16.867188"
+             height="5.622396"
+             x="16.361191"
+             y="51.50296" />
+          <text
+             xml:space="preserve"
+             style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline"
+             x="20.137974"
+             y="56.150356"
+             id="text50132-4-4-7-0-8-3-9-4"><tspan
+               id="tspan30532"
+               x="20.137974"
+               y="56.150356">BAM</tspan></text>
+        </g>
+      </g>
+      <g
+         id="g30994"
+         transform="translate(0,3.6326433)">
+        <path
+           id="rect30677"
+           style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline"
+           d="M 16.769016,62.697968 H 69.898328 123.02764 V 84.508125 H 69.898328 16.769016 Z" />
+        <text
+           xml:space="preserve"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke-width:0.00700043;stroke-miterlimit:4;stroke-dasharray:none;-inkscape-stroke:hairline"
+           x="39.109085"
+           y="76.636116"
+           id="text3658-1-5-1-1-6"><tspan
+             id="tspan18890"
+             x="39.109085"
+             y="76.636116">include list</tspan></text>
+      </g>
+      <g
+         id="g31018"
+         transform="translate(0,1.6148885e-5)">
+        <rect
+           style="fill:#d8ebf8;fill-opacity:1;stroke-width:0.143519"
+           id="rect1218-6-1-9-9"
+           width="106.259"
+           height="21.81016"
+           x="17.913553"
+           y="190.39183" />
+        <path
+           id="rect30677-1"
+           style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline"
+           d="m 18.021629,190.4646 h 53.129311 53.12931 v 21.81016 H 18.021629 Z" />
+        <text
+           xml:space="preserve"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;stroke-width:0.241085"
+           x="20.666126"
+           y="203.17255"
+           id="text3658-1-5-1-1"><tspan
+             id="tspan17582"
+             x="20.666126"
+             y="203.17255"
+             style="stroke-width:0.241085">homopolymer compress</tspan></text>
+      </g>
+      <g
+         id="g31012"
+         transform="translate(0,-1.9580039)">
+        <rect
+           style="fill:#d8ebf8;fill-opacity:1;stroke-width:0.143519"
+           id="rect1218-6-1-9"
+           width="106.25862"
+           height="21.81016"
+           x="17.529228"
+           y="161.36182" />
+        <path
+           id="rect30677-1-4"
+           style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline"
+           d="m 17.529228,161.36183 h 53.129311 53.129311 v 21.81016 H 70.658539 17.529228 Z" />
+        <text
+           xml:space="preserve"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;stroke-width:0.241085"
+           x="47.823051"
+           y="174.17146"
+           id="text3658-1-5-1-1-4"><tspan
+             id="tspan24382"
+             x="47.823051"
+             y="174.17146">subsample</tspan></text>
+      </g>
+      <g
+         id="g31006"
+         transform="translate(0,-3.2617034)">
+        <rect
+           style="fill:#d8ebf8;fill-opacity:1;stroke-width:0.143519"
+           id="rect1218-6-1"
+           width="106.25862"
+           height="21.81016"
+           x="16.716393"
+           y="131.64111" />
+        <path
+           id="rect30677-1-4-6"
+           style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline"
+           d="m 16.716393,131.64111 h 53.129309 53.129308 v 21.81016 H 69.845702 16.716393 Z" />
+        <text
+           xml:space="preserve"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;stroke-width:0.241085"
+           x="26.682009"
+           y="144.47969"
+           id="text3658-1-5-1-1-0"><tspan
+             id="tspan23044"
+             x="26.682009"
+             y="144.47969">filter expression</tspan></text>
+      </g>
+      <g
+         id="g31000"
+         transform="translate(0,-0.80474739)">
+        <rect
+           style="fill:#d8ebf8;fill-opacity:1;stroke-width:0.143519"
+           id="rect1218-6"
+           width="106.25862"
+           height="21.81016"
+           x="16.769016"
+           y="98.15976" />
+        <path
+           id="rect30677-1-4-6-9"
+           style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline"
+           d="m 16.769016,98.15976 h 53.129312 53.129312 v 21.81016 H 69.898328 16.769016 Z" />
+        <text
+           xml:space="preserve"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;stroke-width:0.241085"
+           x="39.627808"
+           y="111.98196"
+           id="text3658-1-5-1-1-2"><tspan
+             id="tspan21056"
+             x="39.627808"
+             y="111.98196">exclude list</tspan></text>
+      </g>
+      <path
+         style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298)"
+         d="m 24.795,57.125 v 8.676833"
+         id="path1295" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3)"
+         d="m 47.123,57.125 v 8.677"
+         id="path1295-3" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8)"
+         d="m 69.452,57.125 v 8.677"
+         id="path1295-3-0" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4)"
+         d="m 91.781,57.125 v 8.677"
+         id="path1295-3-0-8" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-8)"
+         d="m 114.11,57.125 v 8.677"
+         id="path1295-3-0-8-7" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-7)"
+         d="m 69.452,88.114 v 8.711833"
+         id="path1295-3-0-8-4" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-7-3)"
+         d="m 69.452,119.165 v 8.90466"
+         id="path1295-3-0-8-4-3" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-7-3-0)"
+         d="m 69.452,150.19 v 8.68483"
+         id="path1295-3-0-8-4-3-2" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-7-3-5)"
+         d="m 69.452,181.214 -0.027,8.72183"
+         id="path1295-3-0-8-4-3-0" />
+    </g>
+  </g>
+</svg>
Binary file test-data/input1.fastq.gz has changed
Binary file test-data/input1.rd has changed
--- a/test-data/output1.tabular	Wed Apr 23 19:31:12 2025 +0000
+++ b/test-data/output1.tabular	Fri May 02 22:49:30 2025 +0000
@@ -7,4 +7,4 @@
 Coverage	inf
 GC content %	50.00
 Base composition (A:C:T:G)	9:14:11:6
-Average per base quality	0.00
+Average per base quality	11.81
Binary file test-data/output2.fastq.gz has changed
--- a/test-data/output2.tabular	Wed Apr 23 19:31:12 2025 +0000
+++ b/test-data/output2.tabular	Fri May 02 22:49:30 2025 +0000
@@ -1,10 +1,10 @@
-# reads	11
-Total read length	264855
-Average read length	24077.73
-Read N50	24322
-Smallest read length	17465
-Largest read length	36274
+# reads	2
+Total read length	30
+Average read length	15.00
+Read N50	15
+Smallest read length	15
+Largest read length	15
 Coverage	inf
-GC content %	40.81
-Base composition (A:C:T:G)	79479:54455:77277:53644
-Average per base quality	23.58
+GC content %	54.17
+Base composition (A:C:T:G)	5:10:6:3
+Average per base quality	9.81
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output3.tabular	Fri May 02 22:49:30 2025 +0000
@@ -0,0 +1,10 @@
+# reads	5
+Total read length	43
+Average read length	8.60
+Read N50	11
+Smallest read length	5
+Largest read length	12
+Coverage	inf
+GC content %	51.35
+Base composition (A:C:T:G)	8:13:10:6
+Average per base quality	0.00
Binary file test-data/test.rd has changed
Binary file test-data/test1.fasta.gz has changed
Binary file test-data/test1.fastq.gz has changed
Binary file test-data/test2.bam has changed