Mercurial > repos > richard-burhans > rdeval
changeset 5:7cfeba6facd1 draft
planemo upload for repository https://github.com/vgl-hub/rdeval commit d55c4b3d6b91d0418950ed6b7806ef779a916099
author | richard-burhans |
---|---|
date | Fri, 02 May 2025 22:49:30 +0000 |
parents | 7bf95986aaa4 |
children | 24b05d3958d7 |
files | macros.xml rdeval.xml rdeval_report.xml static/images/pipeline.svg test-data/input1.fastq.gz test-data/input1.rd test-data/output1.tabular test-data/output2.fastq.gz test-data/output2.tabular test-data/output3.tabular test-data/test.rd test-data/test1.fasta.gz test-data/test1.fastq.gz test-data/test2.bam |
diffstat | 14 files changed, 640 insertions(+), 112 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Wed Apr 23 19:31:12 2025 +0000 +++ b/macros.xml Fri May 02 22:49:30 2025 +0000 @@ -4,9 +4,8 @@ <requirement type="package" version="@TOOL_VERSION@">rdeval</requirement> </requirements> </xml> - <!-- should be: 0.0.5=r44h35c04b2_2 --> - <token name="@TOOL_VERSION@">0.0.5</token> - <token name="@VERSION_SUFFIX@">4</token> + <token name="@TOOL_VERSION@">0.0.7</token> + <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">23.02</token> <xml name="citations"> <citations>
--- a/rdeval.xml Wed Apr 23 19:31:12 2025 +0000 +++ b/rdeval.xml Fri May 02 22:49:30 2025 +0000 @@ -15,46 +15,62 @@ #end for #if $output_options.output_type.type_selector == "combined_reads" ln -s '$reads_outfile' 'output.${output_type.format_selector}' && - #end if - rdeval --input-reads #echo " ".join([f"'{input}'" for $input in $mangled_inputs]) + #end if + rdeval --input-reads + #for $input in $mangled_inputs + '$input' + #end for #if $expected_gsize '$expected_gsize' #end if - #if $input_filter.filter_selector == "exclude_file" - --exclude-list '$exclude_file' - #else if $input_filter.filter_selector == "include_file" - --include-list '$include_file' - #end if - #if $filter - --filter '$filter' + #if $input_filter.include_list + --include-list '$input_filter.include_list' + #end if + #if $input_filter.exclude_list + --exclude-list '$input_filter.exclude_list' #end if - --sample '$sample' - #if $input_subsample.seed_selector == "yes" - --random-seed '$random_seed' + #set $filter_exp_type = $input_filter.filter_expression.filter_selector + #if $filter_exp_type != "no_exp" + #set $l_exp = "l" + str($input_filter.filter_expression.length_comparison) + str($input_filter.filter_expression.length_value) + #set $q_exp = "q" + str($input_filter.filter_expression.quality_comparison) + str($input_filter.filter_expression.quality_value) + #if $filter_exp_type == "l_exp" + #set $filter_exp = $l_exp + #else if $filter_exp_type == "q_exp" + #set $filter_exp = $q_exp + #else if $filter_exp_type == "lq_exp" + #set $filter_exp = $l_exp + str($input_filter.filter_expression.exp_operator) + $q_exp + #end if + --filter '$filter_exp' + #end if + #if int($input_subsample.sample) != 1 + --sample '$input_subsample.sample' + #end if + #if $input_subsample.random_seed.seed_selector == "yes" + --random-seed '$input_subsample.random_seed.random_seed' #end if #if $input_compress.compress_selector == "yes" - --homopolymer-compress '$homopolymer_compress' + --homopolymer-compress '$input_compress.homopolymer_compress' #end if - #if $stats_flavor.flavor_selector == "stats" - #if $sequence_report + #set $stats_type = $output_options.stats_flavor.flavor_selector + #if $stats_type == "stats" + #if $output_options.stats_flavor.sequence_report --sequence-report #end if - #else if $stats_flavor.flavor_selector == "quality" - --quality '$quality' - #else if $stats_flavor.flavor_selector == "size" - --out-size '$out_size' + #else if $stats_type == "quality" + --quality '$output_options.stats_flavor.quality' + #else if $stats_type == "size" + --out-size '$output_options.stats_flavor.out_size' #end if - #if $output_options.output_type.type_selector == "rd_file" - #if $md5 + #set $output_type = $output_options.output_type.type_selector + #if $output_type == "rd_file" + #if $output_options.output_type.md5 --md5 #end if -o output.rd - #else if $output_options.output_type.type_selector == "combined_reads" - -o 'output.${output_type.format_selector}' + #else if $output_type == "combined_reads" + -o 'output.${output_options.output_type.format_selector}' #end if - #if $verbose --verbose - #end if --tabular --threads \${GALAXY_SLOTS:-2} > '$stats_outfile' @@ -63,21 +79,56 @@ <param argument="--input-reads" type="data" format="bam,cram,fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input dataset" help="BAM, CRAM, FASTA, FASTQ, or RD files"/> <param name="expected_gsize" type="integer" label="Expected Genome Size" optional="true" help="Integer (e.g., 3000000000 for human)."/> <section name="input_filter" title="Filter input reads" expanded="false"> - <conditional name="file_filter"> - <param name="filter_selector" type="select" label="Use an exclude or include file"> - <option value="no_file" selected="true">no</option> - <option value="exclude_file">Use an exclude file</option> - <option value="include_file">Use an include file</option> + <param argument="--include-list" type="data" format="txt" optional="true" label="File containing headers to include"/> + <param argument="--exclude-list" type="data" format="txt" optional="true" label="File containing headers to exclude"/> + <conditional name="filter_expression"> + <param name="filter_selector" type="select" label="Filter using length and/or quality" help="filter help"> + <option value="no_exp" selected="true">No</option> + <option value="l_exp">Read length</option> + <option value="q_exp">Average read quality</option> + <option value="lq_exp">Both read length and average read quality</option> </param> - <when value="no_file"/> - <when value="exclude_file"> - <param argument="--exclude-list" type="data" format="txt" optional="true" label="File containing headers to exclude"/> + <when value="no_exp"/> + <when value="l_exp"> + <param name="length_comparison" type="select" label="Retain reads with length"> + <option value="<" selected="true">less than</option> + <option value="=">equal to</option> + <option value=">">greater than</option> + <sanitizer sanitize="false"/> + </param> + <param name="length_value" type="integer" min="0" value="0" label="Length in bp" /> + </when> + <when value="q_exp"> + <param name="quality_comparison" type="select" label="Retain reads with average read quality"> + <option value="<" selected="true">less than</option> + <option value="=">equal to</option> + <option value=">">greater than</option> + <sanitizer sanitize="false"/> + </param> + <param name="quality_value" type="integer" min="0" value="0" label="Average read quality" /> </when> - <when value="include_file"> - <param argument="--include-list" type="data" format="txt" optional="true" label="File containing headers to include"/> + <when value="lq_exp"> + <param name="length_comparison" type="select" label="Retain reads with length"> + <option value="<" selected="true">less than</option> + <option value="=">equal to</option> + <option value=">">greater than</option> + <sanitizer sanitize="false"/> + </param> + <param name="length_value" type="integer" min="0" value="0" label="Length in bp" /> + <param name="exp_operator" type="select" label="Combination operator"> + <option value="|" selected="true">or</option> + <option value="&">and</option> + <sanitizer sanitize="false"/> + </param> + <param name="quality_comparison" type="select" label="Average read quality"> + <option value="<" selected="true">less than</option> + <option value="=">equal to</option> + <option value=">">greater than</option> + <sanitizer sanitize="false"/> + </param> + <param name="quality_value" type="integer" min="0" value="0" label="average read quality" /> </when> </conditional> - <param argument="--filter" type="text" optional="true" label="filter" help="e.g. l>1000 & q>20"/> </section> <section name="input_subsample" title="Subsample input reads" expanded="false"> <param argument="--sample" type="float" min="0" max="1" value="1" label="fraction of reads to subsample"/> @@ -144,7 +195,6 @@ <param argument="--md5" type="boolean" checked="false" label="Print md5 of .rd files"/> </when> </conditional> - <param argument="--verbose" type="boolean" checked="false" label="Verbose output"/> </section> </inputs> <outputs> @@ -164,91 +214,103 @@ </outputs> <tests> <test expect_num_outputs="2"> - <param name="input_reads" value="test1.fasta.gz" ftype="fasta.gz"/> + <param name="input_reads" value="input1.fastq.gz" ftype="fastq.gz"/> <output name="stats_outfile" file="output1.tabular" ftype="tabular"/> <output name="rd_outfile" ftype="binary"> <assert_contents> - <has_size size="109" delta="1"/> + <has_size size="119" delta="1"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> - <param name="input_reads" value="test1.fastq.gz" ftype="fastq.gz"/> - <output name="stats_outfile" file="output1.tabular" ftype="tabular" lines_diff="2"/> + <param name="input_reads" value="input1.fastq.gz" ftype="fastq.gz"/> + <section name="input_filter"> + <conditional name="filter_expression"> + <param name="filter_selector" value="lq_exp"/> + <param name="length_comparison" value=">"/> + <param name="length_value" value="10"/> + <param name="exp_operator" value="&"/> + <param name="quality_comparison" value=">"/> + <param name="quality_value" value="10"/> + </conditional> + </section> + <output name="stats_outfile" file="output2.tabular" ftype="tabular"/> <output name="rd_outfile" ftype="binary"> <assert_contents> - <has_size size="128" delta="1"/> + <has_size size="100" delta="1"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> - <param name="input_reads" value="test2.bam" ftype="bam"/> + <param name="input_reads" value="input1.fastq.gz" ftype="fastq.gz"/> + <section name="input_compress"> + <param name="compress_selector" value="yes"/> + <param name="homopolymer_compress" value="1"/> + </section> <section name="output_options"> <conditional name="output_type"> <param name="type_selector" value="combined_reads"/> <param name="format_selector" value="fastq.gz"/> </conditional> </section> - <output name="stats_outfile" file="output2.tabular" ftype="tabular"/> - <output name="reads_outfile" file="output2.fastq.gz" ftype="fastq.gz"/> + <output name="stats_outfile" file="output3.tabular" ftype="tabular"/> + <output name="reads_outfile" ftype="fastq.gz" md5="23a14631cb075817967752021deb6ec4"> + <assert_contents> + <has_size size="159"/> + </assert_contents> + </output> </test> </tests> <help><![CDATA[ - -**rdeval** is a general-purpose, multithreaded tool for analyzing and manipulating reads (FASTA/FASTQ/BAM/CRAM/RD). +What it does +============ - rdeval input.fa*[.gz]|bam|cram|rd [expected genome size] +**rdeval** accepts an arbitrary number of sequencing files and optionally **filters**, **subsamples**, and/or **compresses homopolymers** within the reads. The retained reads can be saved in multiple formats, and metrics on these reads can be stored in a '*sketch*' file. Statistics can then be efficiently retrieved from these sketch files for further processing. -:: - - Dataset report example: +.. image:: pipeline.svg - +++Read summary+++: - # reads: 10000 - Total read length: 134014104 - Average read length: 13401.41 - Read N50: 14270 - Smallest read length: 1142 - Largest read length: 40910 - Coverage: inf - GC content %: 43.78 - Base composition (A:C:T:G): 37693226:29331833:37655925:29333120 - Average per base quality: 26.47 +Filtering +========= + +Input reads can be filtered using one of the three methods listed below, applied sequentially in the specified order. -:: - - Per sequence/read report (--sequence-report) example: +1. Retain reads whose header lines are listed in the include dataset. +2. Discard reads whose header lines are listed in the exclude dataset. +3. Retain reads that match the provided filter expression. + +The filter expression can be used to select reads based on read length (l), average read quality (q), or a combination of both. The grammar for constructing filter expressions is outlined below: - Header Comment Length A C G T N GC Average Quality - m54306U_210528_154706/69206614/ccs 22812 6170 5146 4802 6694 0 0.44 89.9705 - m54306U_210528_154706/25888573/ccs 32200 9162 7270 7112 8656 0 0.45 56.8306 - m54306U_210528_154706/40634168/ccs 8487 2443 1858 1876 2310 0 0.44 90.3828 - m54306U_210528_154706/103745617/ccs 16496 4546 3752 3760 4438 0 0.46 88.3554 + * filter-expression ::= <length-expression> | <quality-expression> | <length-expression> <combination-operator> <quality-expression> | <quality-expression> <combination-operator> <length-expression> + * length-expression ::= "l" <comparison-operator> <integer> + * quality-expression ::= "q" <comparison-operator> <integer> + * combination-operator := "&" | "|" + * comparison-operator ::= "<" | "=" | ">" + * integer ::= <digit> | <digit><integer> + * digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -:: +Retain reads longer than 10 base pairs + + l>10 + +Retain reads with average quality greather than 20 + + q>20 - Options: - --sequence-report generates a per-read report - -e --exclude-list <file> generates output on a excluding list of headers. - -f --filter <exp> filter reads using <exp> in quotes, e.g. 'l>10' for longer than 10bp or 'l>10 & q>10' to further exclude reads by quality (default: none). - -i --include-list <file> generates output on a subset list of headers. - -o --out-format <file> output file (fa*[.gz], bam, cram, rd). Optionally write reads to file or generate rd summary file. - -q --quality q|a generates list of average quality for each read (q) or both length and quality (a). - -r --input-reads <file1> <file2> <file n> input file (fa*[.gz], bam, cram, rd). - -s --out-size u|s|h|c generates size list (unsorted|sorted|histogram|inverse cumulative table). - --homopolymer-compress <int> compress all the homopolymers longer than n in the input. - --sample <float> fraction of reads to subsample. - --random-seed <int> an optional random seed to make subsampling reproducible. - --md5 print md5 of .rd files. - --tabular tabular output. - --verbose verbose output. - -j --threads <int> numbers of threads (default:5). - -v --version software version. - --cmd print $0 to stdout. +Retain reads longer than 10 base pairs with average quality greather than 20 + + l>10 & q>20 + +.. _sampling-label: -**Attribution** +Sub-sampling +============ + +4. Retain a subsample of the reads by specifying the fraction to be kept. Use the *random seed* option to keep subsampling reproducible. -This tool relies on the gfastar suite and the gfalibs toolkit `vgl-hub/gfalibs <https://github.com/vgl-hub/gfalibs>`_, developed by Giulio Formenti at the Rockefeller University +Homopolymer Compression +======================= + +5. Runs of repeated nucleotides in each read are collapsed, with any associated quality data discarded. For example, CAGGCTTT would become CAGCT. ]]></help> <expand macro="citations"/> </tool>
--- a/rdeval_report.xml Wed Apr 23 19:31:12 2025 +0000 +++ b/rdeval_report.xml Fri May 02 22:49:30 2025 +0000 @@ -10,23 +10,46 @@ export RDEVAL_SHARE_DIR="\$(dirname \$(dirname \$(type -P rdeval)))/share/rdeval" && ln -s "\$RDEVAL_SHARE_DIR/figures.Rmd" && ln -s "\$RDEVAL_SHARE_DIR/rdeval_interface.R" && - #set $num_files = 0 - #for $input_file in $input_files - ln -s '$input_file' '${num_files}.rd' && - #set $num_files += 1 + #set $input_file_list = [] + #for $idx, $input_file in enumerate($input_files) + ln -s '$input_file' '${idx}.rd' && + #silent $input_file_list.append(f"'{idx}.rd'") #end for - R -e "rmarkdown::render('figures.Rmd', output_file='$html_outfile')" --args #for $idx in range($num_files)# '${idx}.rd' #end for + #set $r_vector = "c(" + ",".join(input_file_list) + ")" + #set $interactive_value = "FALSE" + #if $output_format.format_selector == "html" and $output_format.interactive + #set $interactive_value = "TRUE" + #end if + R -e "rmarkdown::render('figures.Rmd', output_file='$outfile', output_format='${output_format.format_selector}_document', params=list(input_files=$r_vector, interactive='$interactive_value'))" ]]></command> <inputs> <param name="input_files" type="data" format="data" multiple="true" label="Input dataset" help="RD files"/> + <conditional name="output_format"> + <param name="format_selector" type="select" label="output format" help="output format"> + <option value="html" selected="true">html</option> + <option value="pdf">pdf</option> + </param> + <when value="html"> + <param name="interactive" type="boolean" value="true" label="interactive" help="interactive"/> + </when> + <when value="pdf"/> + </conditional> </inputs> <outputs> - <data name="html_outfile" format="html" label="Rdeval report"/> + <data name="outfile" format="html" label="Rdeval report"> + <change_format> + <when input="output_format" value="pdf" format="pdf"/> + </change_format> + </data> </outputs> <tests> <test expect_num_outputs="1"> - <param name="input_files" value="test.rd"/> - <output name="html_outfile" ftype="html"> + <param name="input_files" value="input1.rd"/> + <conditional name="output_format"> + <param name="format_selector" value="html"/> + </conditional> + + <output name="outfile" ftype="html"> <assert_contents> <has_size size="833000" delta="8330"/> </assert_contents>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/images/pipeline.svg Fri May 02 22:49:30 2025 +0000 @@ -0,0 +1,434 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + width="210mm" + height="297mm" + viewBox="0 0 210 297" + version="1.1" + id="svg5" + xmlns="http://www.w3.org/2000/svg" + xmlns:svg="http://www.w3.org/2000/svg"> + <defs + id="defs2"> + <marker + style="overflow:visible;" + id="marker1298" + refX="0.0" + refY="0.0" + orient="auto"> + <path + transform="scale(0.8) rotate(180) translate(12.5,0)" + style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;" + d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z " + id="path892" /> + </marker> + <marker + style="overflow:visible;" + id="Arrow1Lend" + refX="0.0" + refY="0.0" + orient="auto"> + <path + transform="scale(0.8) rotate(180) translate(12.5,0)" + style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;" + d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z " + id="path36356" /> + </marker> + <marker + style="overflow:visible" + id="marker1298-3" + refX="0" + refY="0" + orient="auto"> + <path + transform="matrix(-0.8,0,0,-0.8,-10,0)" + style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt" + d="M 0,0 5,-5 -12.5,0 5,5 Z" + id="path892-3" /> + </marker> + <marker + style="overflow:visible" + id="marker1298-3-8" + refX="0" + refY="0" + orient="auto"> + <path + transform="matrix(-0.8,0,0,-0.8,-10,0)" + style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt" + d="M 0,0 5,-5 -12.5,0 5,5 Z" + id="path892-3-6" /> + </marker> + <marker + style="overflow:visible" + id="marker1298-3-8-4" + refX="0" + refY="0" + orient="auto"> + <path + transform="matrix(-0.8,0,0,-0.8,-10,0)" + style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt" + d="M 0,0 5,-5 -12.5,0 5,5 Z" + id="path892-3-6-8" /> + </marker> + <marker + style="overflow:visible" + id="marker1298-3-8-4-8" + refX="0" + refY="0" + orient="auto"> + <path + transform="matrix(-0.8,0,0,-0.8,-10,0)" + style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt" + d="M 0,0 5,-5 -12.5,0 5,5 Z" + id="path892-3-6-8-9" /> + </marker> + <marker + style="overflow:visible" + id="marker1298-3-8-4-7" + refX="0" + refY="0" + orient="auto"> + <path + transform="matrix(-0.8,0,0,-0.8,-10,0)" + style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt" + d="M 0,0 5,-5 -12.5,0 5,5 Z" + id="path892-3-6-8-6" /> + </marker> + <marker + style="overflow:visible" + id="marker1298-3-8-4-7-3" + refX="0" + refY="0" + orient="auto"> + <path + transform="matrix(-0.8,0,0,-0.8,-10,0)" + style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt" + d="M 0,0 5,-5 -12.5,0 5,5 Z" + id="path892-3-6-8-6-0" /> + </marker> + <marker + style="overflow:visible" + id="marker1298-3-8-4-7-3-0" + refX="0" + refY="0" + orient="auto"> + <path + transform="matrix(-0.8,0,0,-0.8,-10,0)" + style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt" + d="M 0,0 5,-5 -12.5,0 5,5 Z" + id="path892-3-6-8-6-0-9" /> + </marker> + <marker + style="overflow:visible" + id="marker1298-3-8-4-7-3-5" + refX="0" + refY="0" + orient="auto"> + <path + transform="matrix(-0.8,0,0,-0.8,-10,0)" + style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt" + d="M 0,0 5,-5 -12.5,0 5,5 Z" + id="path892-3-6-8-6-0-4" /> + </marker> + </defs> + <g + id="layer1"> + <g + id="g4714"> + <g + id="g30989"> + <g + id="g29274" + style="vector-effect:non-scaling-stroke;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;-inkscape-stroke:hairline"> + <path + id="rect868-8-8-8-3" + style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 105.67591,35.29722 h 11.2448 l 5.62239,5.62241 v 16.20572 h -8.43359 -8.4336 z" /> + <path + id="rect8392-9-6-9-0" + style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 116.9207,35.29723 5.6224,5.6224 v 0 h -5.6224 z" /> + <rect + style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + id="rect8938-6-0-7-3" + width="16.867188" + height="5.622396" + x="105.67591" + y="46.376678" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + x="111.0649" + y="50.956284" + id="text50132-4-4-7-0"><tspan + id="tspan1187" + x="111.0649" + y="50.956284">RD</tspan></text> + </g> + <g + id="g29996" + transform="translate(-0.76821861,-0.139146)"> + <path + id="rect868-8-8-8-3-2" + style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 84.115444,35.43637 h 11.2448 l 5.622386,5.62241 V 57.2645 h -8.433593 -8.433593 z" /> + <path + id="rect8392-9-6-9-0-8" + style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 95.360234,35.43638 5.622396,5.6224 v 0 h -5.622396 z" /> + <rect + style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + id="rect8938-6-0-7-3-3" + width="16.867188" + height="5.622396" + x="84.115448" + y="46.515827" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + x="84.894775" + y="50.765331" + id="text50132-4-4-7-0-8"><tspan + id="tspan29989" + x="84.894775" + y="50.765331">FASTQ</tspan></text> + </g> + <g + id="g30165" + transform="translate(-2.1639183,0.04923)"> + <path + id="rect868-8-8-8-3-2-4" + style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 63.182464,35.247994 h 11.2448 l 5.622386,5.62241 v 16.20572 h -8.433593 -8.433593 z" /> + <path + id="rect8392-9-6-9-0-8-9" + style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 74.427254,35.248004 5.622396,5.6224 v 0 h -5.622396 z" /> + <rect + style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + id="rect8938-6-0-7-3-3-9" + width="16.867188" + height="5.622396" + x="63.182468" + y="46.32745" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + x="63.879272" + y="50.942425" + id="text50132-4-4-7-0-8-3"><tspan + id="tspan30158" + x="63.879272" + y="50.942425">FASTA</tspan></text> + </g> + <g + id="g30334" + transform="translate(-2.9423309,-0.081337)"> + <path + id="rect868-8-8-8-3-2-4-5" + style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 41.632196,35.378561 h 11.2448 l 5.622386,5.62241 v 16.20572 h -8.433593 -8.433593 z" /> + <path + id="rect8392-9-6-9-0-8-9-0" + style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 52.876986,35.378571 5.622396,5.6224 v 0 h -5.622396 z" /> + <rect + style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + id="rect8938-6-0-7-3-3-9-2" + width="16.867188" + height="5.622396" + x="41.632202" + y="46.458015" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + x="43.811523" + y="51.07299" + id="text50132-4-4-7-0-8-3-9"><tspan + id="tspan30327" + x="43.811523" + y="51.07299">CRAM</tspan></text> + </g> + <g + id="g30539" + transform="translate(0,-5.126281)"> + <path + id="rect868-8-8-8-3-2-4-5-5" + style="vector-effect:non-scaling-stroke;fill:#daecf8;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 16.361184,40.423505 h 11.2448 l 5.622386,5.62241 v 16.20572 h -8.433593 -8.433593 z" /> + <path + id="rect8392-9-6-9-0-8-9-0-1" + style="vector-effect:non-scaling-stroke;fill:#abd4ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + d="m 27.605974,40.423515 5.622396,5.6224 v 0 h -5.622396 z" /> + <rect + style="vector-effect:non-scaling-stroke;fill:#abd5ed;fill-opacity:1;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + id="rect8938-6-0-7-3-3-9-2-7" + width="16.867188" + height="5.622396" + x="16.361191" + y="51.50296" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.8947px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke:#000000;stroke-width:7e-05;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;-inkscape-stroke:hairline" + x="20.137974" + y="56.150356" + id="text50132-4-4-7-0-8-3-9-4"><tspan + id="tspan30532" + x="20.137974" + y="56.150356">BAM</tspan></text> + </g> + </g> + <g + id="g30994" + transform="translate(0,3.6326433)"> + <path + id="rect30677" + style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline" + d="M 16.769016,62.697968 H 69.898328 123.02764 V 84.508125 H 69.898328 16.769016 Z" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;vector-effect:non-scaling-stroke;stroke-width:0.00700043;stroke-miterlimit:4;stroke-dasharray:none;-inkscape-stroke:hairline" + x="39.109085" + y="76.636116" + id="text3658-1-5-1-1-6"><tspan + id="tspan18890" + x="39.109085" + y="76.636116">include list</tspan></text> + </g> + <g + id="g31018" + transform="translate(0,1.6148885e-5)"> + <rect + style="fill:#d8ebf8;fill-opacity:1;stroke-width:0.143519" + id="rect1218-6-1-9-9" + width="106.259" + height="21.81016" + x="17.913553" + y="190.39183" /> + <path + id="rect30677-1" + style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline" + d="m 18.021629,190.4646 h 53.129311 53.12931 v 21.81016 H 18.021629 Z" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;stroke-width:0.241085" + x="20.666126" + y="203.17255" + id="text3658-1-5-1-1"><tspan + id="tspan17582" + x="20.666126" + y="203.17255" + style="stroke-width:0.241085">homopolymer compress</tspan></text> + </g> + <g + id="g31012" + transform="translate(0,-1.9580039)"> + <rect + style="fill:#d8ebf8;fill-opacity:1;stroke-width:0.143519" + id="rect1218-6-1-9" + width="106.25862" + height="21.81016" + x="17.529228" + y="161.36182" /> + <path + id="rect30677-1-4" + style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline" + d="m 17.529228,161.36183 h 53.129311 53.129311 v 21.81016 H 70.658539 17.529228 Z" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;stroke-width:0.241085" + x="47.823051" + y="174.17146" + id="text3658-1-5-1-1-4"><tspan + id="tspan24382" + x="47.823051" + y="174.17146">subsample</tspan></text> + </g> + <g + id="g31006" + transform="translate(0,-3.2617034)"> + <rect + style="fill:#d8ebf8;fill-opacity:1;stroke-width:0.143519" + id="rect1218-6-1" + width="106.25862" + height="21.81016" + x="16.716393" + y="131.64111" /> + <path + id="rect30677-1-4-6" + style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline" + d="m 16.716393,131.64111 h 53.129309 53.129308 v 21.81016 H 69.845702 16.716393 Z" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;stroke-width:0.241085" + x="26.682009" + y="144.47969" + id="text3658-1-5-1-1-0"><tspan + id="tspan23044" + x="26.682009" + y="144.47969">filter expression</tspan></text> + </g> + <g + id="g31000" + transform="translate(0,-0.80474739)"> + <rect + style="fill:#d8ebf8;fill-opacity:1;stroke-width:0.143519" + id="rect1218-6" + width="106.25862" + height="21.81016" + x="16.769016" + y="98.15976" /> + <path + id="rect30677-1-4-6-9" + style="vector-effect:non-scaling-stroke;fill:#d8ebf8;stroke:#000000;stroke-width:0.0113528;-inkscape-stroke:hairline" + d="m 16.769016,98.15976 h 53.129312 53.129312 v 21.81016 H 69.898328 16.769016 Z" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:9.64337px;line-height:1.25;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;word-spacing:0px;stroke-width:0.241085" + x="39.627808" + y="111.98196" + id="text3658-1-5-1-1-2"><tspan + id="tspan21056" + x="39.627808" + y="111.98196">exclude list</tspan></text> + </g> + <path + style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298)" + d="m 24.795,57.125 v 8.676833" + id="path1295" /> + <path + style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3)" + d="m 47.123,57.125 v 8.677" + id="path1295-3" /> + <path + style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8)" + d="m 69.452,57.125 v 8.677" + id="path1295-3-0" /> + <path + style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4)" + d="m 91.781,57.125 v 8.677" + id="path1295-3-0-8" /> + <path + style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-8)" + d="m 114.11,57.125 v 8.677" + id="path1295-3-0-8-7" /> + <path + style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-7)" + d="m 69.452,88.114 v 8.711833" + id="path1295-3-0-8-4" /> + <path + style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-7-3)" + d="m 69.452,119.165 v 8.90466" + id="path1295-3-0-8-4-3" /> + <path + style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-7-3-0)" + d="m 69.452,150.19 v 8.68483" + id="path1295-3-0-8-4-3-2" /> + <path + style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker1298-3-8-4-7-3-5)" + d="m 69.452,181.214 -0.027,8.72183" + id="path1295-3-0-8-4-3-0" /> + </g> + </g> +</svg>
--- a/test-data/output1.tabular Wed Apr 23 19:31:12 2025 +0000 +++ b/test-data/output1.tabular Fri May 02 22:49:30 2025 +0000 @@ -7,4 +7,4 @@ Coverage inf GC content % 50.00 Base composition (A:C:T:G) 9:14:11:6 -Average per base quality 0.00 +Average per base quality 11.81
--- a/test-data/output2.tabular Wed Apr 23 19:31:12 2025 +0000 +++ b/test-data/output2.tabular Fri May 02 22:49:30 2025 +0000 @@ -1,10 +1,10 @@ -# reads 11 -Total read length 264855 -Average read length 24077.73 -Read N50 24322 -Smallest read length 17465 -Largest read length 36274 +# reads 2 +Total read length 30 +Average read length 15.00 +Read N50 15 +Smallest read length 15 +Largest read length 15 Coverage inf -GC content % 40.81 -Base composition (A:C:T:G) 79479:54455:77277:53644 -Average per base quality 23.58 +GC content % 54.17 +Base composition (A:C:T:G) 5:10:6:3 +Average per base quality 9.81
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output3.tabular Fri May 02 22:49:30 2025 +0000 @@ -0,0 +1,10 @@ +# reads 5 +Total read length 43 +Average read length 8.60 +Read N50 11 +Smallest read length 5 +Largest read length 12 +Coverage inf +GC content % 51.35 +Base composition (A:C:T:G) 8:13:10:6 +Average per base quality 0.00