# HG changeset patch # User richard-burhans # Date 1746226170 0 # Node ID 7cfeba6facd11c2e8e3af2a2482b4e5281e71106 # Parent 7bf95986aaa4da3b81656686a71903b7d6169d39 planemo upload for repository https://github.com/vgl-hub/rdeval commit d55c4b3d6b91d0418950ed6b7806ef779a916099 diff -r 7bf95986aaa4 -r 7cfeba6facd1 macros.xml --- a/macros.xml Wed Apr 23 19:31:12 2025 +0000 +++ b/macros.xml Fri May 02 22:49:30 2025 +0000 @@ -4,9 +4,8 @@ rdeval - - 0.0.5 - 4 + 0.0.7 + 0 23.02 diff -r 7bf95986aaa4 -r 7cfeba6facd1 rdeval.xml --- a/rdeval.xml Wed Apr 23 19:31:12 2025 +0000 +++ b/rdeval.xml Fri May 02 22:49:30 2025 +0000 @@ -15,46 +15,62 @@ #end for #if $output_options.output_type.type_selector == "combined_reads" ln -s '$reads_outfile' 'output.${output_type.format_selector}' && - #end if - rdeval --input-reads #echo " ".join([f"'{input}'" for $input in $mangled_inputs]) + #end if + rdeval --input-reads + #for $input in $mangled_inputs + '$input' + #end for #if $expected_gsize '$expected_gsize' #end if - #if $input_filter.filter_selector == "exclude_file" - --exclude-list '$exclude_file' - #else if $input_filter.filter_selector == "include_file" - --include-list '$include_file' - #end if - #if $filter - --filter '$filter' + #if $input_filter.include_list + --include-list '$input_filter.include_list' + #end if + #if $input_filter.exclude_list + --exclude-list '$input_filter.exclude_list' #end if - --sample '$sample' - #if $input_subsample.seed_selector == "yes" - --random-seed '$random_seed' + #set $filter_exp_type = $input_filter.filter_expression.filter_selector + #if $filter_exp_type != "no_exp" + #set $l_exp = "l" + str($input_filter.filter_expression.length_comparison) + str($input_filter.filter_expression.length_value) + #set $q_exp = "q" + str($input_filter.filter_expression.quality_comparison) + str($input_filter.filter_expression.quality_value) + #if $filter_exp_type == "l_exp" + #set $filter_exp = $l_exp + #else if $filter_exp_type == "q_exp" + #set $filter_exp = $q_exp + #else if $filter_exp_type == "lq_exp" + #set $filter_exp = $l_exp + str($input_filter.filter_expression.exp_operator) + $q_exp + #end if + --filter '$filter_exp' + #end if + #if int($input_subsample.sample) != 1 + --sample '$input_subsample.sample' + #end if + #if $input_subsample.random_seed.seed_selector == "yes" + --random-seed '$input_subsample.random_seed.random_seed' #end if #if $input_compress.compress_selector == "yes" - --homopolymer-compress '$homopolymer_compress' + --homopolymer-compress '$input_compress.homopolymer_compress' #end if - #if $stats_flavor.flavor_selector == "stats" - #if $sequence_report + #set $stats_type = $output_options.stats_flavor.flavor_selector + #if $stats_type == "stats" + #if $output_options.stats_flavor.sequence_report --sequence-report #end if - #else if $stats_flavor.flavor_selector == "quality" - --quality '$quality' - #else if $stats_flavor.flavor_selector == "size" - --out-size '$out_size' + #else if $stats_type == "quality" + --quality '$output_options.stats_flavor.quality' + #else if $stats_type == "size" + --out-size '$output_options.stats_flavor.out_size' #end if - #if $output_options.output_type.type_selector == "rd_file" - #if $md5 + #set $output_type = $output_options.output_type.type_selector + #if $output_type == "rd_file" + #if $output_options.output_type.md5 --md5 #end if -o output.rd - #else if $output_options.output_type.type_selector == "combined_reads" - -o 'output.${output_type.format_selector}' + #else if $output_type == "combined_reads" + -o 'output.${output_options.output_type.format_selector}' #end if - #if $verbose --verbose - #end if --tabular --threads \${GALAXY_SLOTS:-2} > '$stats_outfile' @@ -63,21 +79,56 @@
- - - - - + + + + + + + + - - - + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + -
@@ -144,7 +195,6 @@ -
@@ -164,91 +214,103 @@ - + - + - - + +
+ + + + + + + + +
+ - +
- + +
+ + +
- - + + + + + +
| | | + * length-expression ::= "l" + * quality-expression ::= "q" + * combination-operator := "&" | "|" + * comparison-operator ::= "<" | "=" | ">" + * integer ::= | + * digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -:: +Retain reads longer than 10 base pairs + + l>10 + +Retain reads with average quality greather than 20 + + q>20 - Options: - --sequence-report generates a per-read report - -e --exclude-list generates output on a excluding list of headers. - -f --filter filter reads using in quotes, e.g. 'l>10' for longer than 10bp or 'l>10 & q>10' to further exclude reads by quality (default: none). - -i --include-list generates output on a subset list of headers. - -o --out-format output file (fa*[.gz], bam, cram, rd). Optionally write reads to file or generate rd summary file. - -q --quality q|a generates list of average quality for each read (q) or both length and quality (a). - -r --input-reads input file (fa*[.gz], bam, cram, rd). - -s --out-size u|s|h|c generates size list (unsorted|sorted|histogram|inverse cumulative table). - --homopolymer-compress compress all the homopolymers longer than n in the input. - --sample fraction of reads to subsample. - --random-seed an optional random seed to make subsampling reproducible. - --md5 print md5 of .rd files. - --tabular tabular output. - --verbose verbose output. - -j --threads numbers of threads (default:5). - -v --version software version. - --cmd print $0 to stdout. +Retain reads longer than 10 base pairs with average quality greather than 20 + + l>10 & q>20 + +.. _sampling-label: -**Attribution** +Sub-sampling +============ + +4. Retain a subsample of the reads by specifying the fraction to be kept. Use the *random seed* option to keep subsampling reproducible. -This tool relies on the gfastar suite and the gfalibs toolkit `vgl-hub/gfalibs `_, developed by Giulio Formenti at the Rockefeller University +Homopolymer Compression +======================= + +5. Runs of repeated nucleotides in each read are collapsed, with any associated quality data discarded. For example, CAGGCTTT would become CAGCT. ]]> diff -r 7bf95986aaa4 -r 7cfeba6facd1 rdeval_report.xml --- a/rdeval_report.xml Wed Apr 23 19:31:12 2025 +0000 +++ b/rdeval_report.xml Fri May 02 22:49:30 2025 +0000 @@ -10,23 +10,46 @@ export RDEVAL_SHARE_DIR="\$(dirname \$(dirname \$(type -P rdeval)))/share/rdeval" && ln -s "\$RDEVAL_SHARE_DIR/figures.Rmd" && ln -s "\$RDEVAL_SHARE_DIR/rdeval_interface.R" && - #set $num_files = 0 - #for $input_file in $input_files - ln -s '$input_file' '${num_files}.rd' && - #set $num_files += 1 + #set $input_file_list = [] + #for $idx, $input_file in enumerate($input_files) + ln -s '$input_file' '${idx}.rd' && + #silent $input_file_list.append(f"'{idx}.rd'") #end for - R -e "rmarkdown::render('figures.Rmd', output_file='$html_outfile')" --args #for $idx in range($num_files)# '${idx}.rd' #end for + #set $r_vector = "c(" + ",".join(input_file_list) + ")" + #set $interactive_value = "FALSE" + #if $output_format.format_selector == "html" and $output_format.interactive + #set $interactive_value = "TRUE" + #end if + R -e "rmarkdown::render('figures.Rmd', output_file='$outfile', output_format='${output_format.format_selector}_document', params=list(input_files=$r_vector, interactive='$interactive_value'))" ]]> + + + + + + + + + + - + + + + + - - + + + + + + diff -r 7bf95986aaa4 -r 7cfeba6facd1 static/images/pipeline.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/images/pipeline.svg Fri May 02 22:49:30 2025 +0000 @@ -0,0 +1,434 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RD + + + + + + FASTQ + + + + + + FASTA + + + + + + CRAM + + + + + + BAM + + + + + include list + + + + + homopolymer compress + + + + + subsample + + + + + filter expression + + + + + exclude list + + + + + + + + + + + + + diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/input1.fastq.gz Binary file test-data/input1.fastq.gz has changed diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/input1.rd Binary file test-data/input1.rd has changed diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/output1.tabular --- a/test-data/output1.tabular Wed Apr 23 19:31:12 2025 +0000 +++ b/test-data/output1.tabular Fri May 02 22:49:30 2025 +0000 @@ -7,4 +7,4 @@ Coverage inf GC content % 50.00 Base composition (A:C:T:G) 9:14:11:6 -Average per base quality 0.00 +Average per base quality 11.81 diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/output2.fastq.gz Binary file test-data/output2.fastq.gz has changed diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/output2.tabular --- a/test-data/output2.tabular Wed Apr 23 19:31:12 2025 +0000 +++ b/test-data/output2.tabular Fri May 02 22:49:30 2025 +0000 @@ -1,10 +1,10 @@ -# reads 11 -Total read length 264855 -Average read length 24077.73 -Read N50 24322 -Smallest read length 17465 -Largest read length 36274 +# reads 2 +Total read length 30 +Average read length 15.00 +Read N50 15 +Smallest read length 15 +Largest read length 15 Coverage inf -GC content % 40.81 -Base composition (A:C:T:G) 79479:54455:77277:53644 -Average per base quality 23.58 +GC content % 54.17 +Base composition (A:C:T:G) 5:10:6:3 +Average per base quality 9.81 diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/output3.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output3.tabular Fri May 02 22:49:30 2025 +0000 @@ -0,0 +1,10 @@ +# reads 5 +Total read length 43 +Average read length 8.60 +Read N50 11 +Smallest read length 5 +Largest read length 12 +Coverage inf +GC content % 51.35 +Base composition (A:C:T:G) 8:13:10:6 +Average per base quality 0.00 diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/test.rd Binary file test-data/test.rd has changed diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/test1.fasta.gz Binary file test-data/test1.fasta.gz has changed diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/test1.fastq.gz Binary file test-data/test1.fastq.gz has changed diff -r 7bf95986aaa4 -r 7cfeba6facd1 test-data/test2.bam Binary file test-data/test2.bam has changed