Mercurial > repos > richard-burhans > rdeval
view rdeval.xml @ 1:853c32c31a6c draft
planemo upload for repository https://github.com/vgl-hub/rdeval commit a78bcc1bb205cb7fcf6b984851857455224c37b6
author | richard-burhans |
---|---|
date | Mon, 17 Feb 2025 19:31:33 +0000 |
parents | 425a2aa541df |
children | 1597f4ccde5d |
line wrap: on
line source
<tool id="rdeval" name="rdeval" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Multithreaded read analysis and manipulation tool.</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ #import re #set $mangled_inputs = [] #for $input in $input_reads #set $mangled_base = re.sub(r"[^\w\-\s]", "_", str($input.element_identifier)) #set $mangled_input = $mangled_base + "." + str($input.ext) #silent $mangled_inputs.append($mangled_input) ln -s '$input' '$mangled_input' && #end for #if $output_options.output_type.type_selector == "combined_reads" ln -s '$reads_outfile' 'output.${output_type.format_selector}' && #end if rdeval --input-reads #echo " ".join([f"'{input}'" for $input in $mangled_inputs]) #if $expected_gsize '$expected_gsize' #end if #if $input_filter.filter_selector == "exclude_file" --exclude-list '$exclude_file' #else if $input_filter.filter_selector == "include_file" --include-list '$include_file' #end if #if $filter --filter '$filter' #end if --sample '$sample' #if $input_subsample.seed_selector == "yes" --random-seed '$random_seed' #end if #if $homopolymer_compress.compress_selector == "yes" --homopolymer-compress '$homopolymer_compress' #end if #if $stats_flavor.flavor_selector == "stats" #if $sequence_report --sequence-report #end if #else if $stats_flavor.flavor_selector == "quality" --quality '$quality' #else if $stats_flavor.flavor_selector == "size" --out-size '$out_size' #end if #if $output_options.output_type.type_selector == "rd_file" #if $md5 --md5 #end if -o output.rd #else if $output_options.output_type.type_selector == "combined_reads" -o 'output.${output_type.format_selector}' #end if #if $verbose --verbose #end if --tabular --threads \${GALAXY_SLOTS:-2} > '$stats_outfile' ]]></command> <inputs> <param argument="--input-reads" type="data" format="bam,cram,fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input dataset" help="FASTA/FASTQ, BAM, or CRAM files."/> <param name="expected_gsize" type="integer" label="Expected Genome Size" optional="true" help="Integer (e.g., 3000000000 for human)."/> <section name="input_filter" title="Filter input reads" expanded="false"> <conditional name="file_filter"> <param name="filter_selector" type="select" label="Use an exclude or include file"> <option value="no_file" selected="true">no</option> <option value="exclude_file">Use an exclude file</option> <option value="include_file">Use an include file</option> </param> <when value="no_file"/> <when value="exclude_file"> <param argument="--exclude-list" type="data" format="txt" optional="true" label="File containing headers to exclude"/> </when> <when value="include_file"> <param argument="--include-list" type="data" format="txt" optional="true" label="File containing headers to include"/> </when> </conditional> <param argument="--filter" type="text" optional="true" label="filter" help="e.g. l>1000 & q>20"/> </section> <section name="input_subsample" title="Subsample input reads" expanded="false"> <param argument="--sample" type="float" min="0" max="1" value="1" label="fraction of reads to subsample"/> <conditional name="random_seed"> <param name="seed_selector" type="select" label="supply random seed to make subsampling reproducible"> <option value="no" selected="true">no</option> <option value="yes">yes</option> </param> <when value="no"/> <when value="yes"> <param argument="--random-seed" type="integer" min="0" value="0" label="random seed to make subsampling reproducible"/> </when> </conditional> </section> <conditional name="homopolymer_compress"> <param name="compress_selector" type="select" label="Compress homopolymers"> <option value="no" selected="true">no</option> <option value="yes">yes</option> </param> <when value="no"/> <when value="yes"> <param argument="--homopolymer-compress" type="integer" min="0" label="Compress homopolymers longer than n in the input"/> </when> </conditional> <section name="output_options" title="Output options"> <conditional name="stats_flavor"> <param name="flavor_selector" type="select" label="Stats output"> <option value="stats" selected="true">Stats</option> <option value="quality">Quality</option> <option value="size">Size</option> </param> <when value="stats"> <param argument="--sequence-report" type="boolean" checked="false" label="Per read sequence report"/> </when> <when value="quality"> <param argument="--quality" type="select" optional="true" label="quality type"> <option value="q" selected="true">Average quality for each read</option> <option value="a">Both length and quality for each read</option> </param> </when> <when value="size"> <param argument="--out-size" type="select" optional="true" label="size list type"> <option value="u" selected="true">unsorted</option> <option value="s">sorted</option> <option value="h">histogram</option> <option value="c">inverse cumulative table</option> </param> </when> </conditional> <conditional name="output_type"> <param name="type_selector" type="select" label="output type"> <option value="rd_file" selected="true">RD file</option> <option value="combined_reads">Combined reads</option> </param> <when value="combined_reads"> <param name="format_selector" type="select" optional="true" label="Output format"> <option value="fasta.gz" selected="true">fasta</option> <option value="fastq.gz">fastq</option> <option value="bam">bam</option> <option value="cram">cram</option> </param> </when> <when value="rd_file"> <param argument="--md5" type="boolean" checked="false" label="Print md5 of .rd files"/> </when> </conditional> <param argument="--verbose" type="boolean" checked="false" label="Verbose output"/> </section> </inputs> <outputs> <data name="stats_outfile" format="tabular" label="Rdeval summary"/> <data name="rd_outfile" from_work_dir="output.rd" format="binary" label="RD File"> <filter>output_options["output_type"]["type_selector"] == "rd_file"</filter> </data> <data name="reads_outfile" format="binary" label="Output reads"> <filter>output_options["output_type"]["type_selector"] == "combined_reads"</filter> <change_format> <when input="format_selector" value="fasta.gz" format="fasta.gz"/> <when input="format_selector" value="fastq.gz" format="fastq.gz"/> <when input="format_selector" value="bam" format="bam"/> <when input="format_selector" value="cram" format="cram"/> </change_format> </data> </outputs> <tests> <test expect_num_outputs="2"> <param name="input_reads" value="test1.fasta.gz" ftype="fasta.gz"/> <output name="stats_outfile" file="output1.tabular" ftype="tabular"/> <output name="rd_outfile" ftype="binary"> <assert_contents> <has_size size="109" delta="1"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <param name="input_reads" value="test1.fastq.gz" ftype="fastq.gz"/> <output name="stats_outfile" file="output1.tabular" ftype="tabular" lines_diff="2"/> <output name="rd_outfile" ftype="binary"> <assert_contents> <has_size size="128" delta="1"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <param name="input_reads" value="test2.bam" ftype="bam"/> <param name="type_selector" value="combined_reads"/> <output name="stats_outfile" file="output2.tabular" ftype="tabular"/> <param name="format_selector" value="fastq.gz"/> <output name="reads_outfile" file="output2.fastq.gz" ftype="fastq.gz"/> </test> </tests> <help><![CDATA[ **rdeval** is a general-purpose, multithreaded tool for analyzing and manipulating reads (FASTA/FASTQ/BAM/CRAM/RD). rdeval input.fa*[.gz]|bam|cram|rd [expected genome size] :: Dataset report example: +++Read summary+++: # reads: 10000 Total read length: 134014104 Average read length: 13401.41 Read N50: 14270 Smallest read length: 1142 Largest read length: 40910 Coverage: inf GC content %: 43.78 Base composition (A:C:T:G): 37693226:29331833:37655925:29333120 Average per base quality: 26.47 :: Per sequence/read report (--sequence-report) example: Header Comment Length A C G T N GC Average Quality m54306U_210528_154706/69206614/ccs 22812 6170 5146 4802 6694 0 0.44 89.9705 m54306U_210528_154706/25888573/ccs 32200 9162 7270 7112 8656 0 0.45 56.8306 m54306U_210528_154706/40634168/ccs 8487 2443 1858 1876 2310 0 0.44 90.3828 m54306U_210528_154706/103745617/ccs 16496 4546 3752 3760 4438 0 0.46 88.3554 :: Options: --sequence-report generates a per-read report -e --exclude-list <file> generates output on a excluding list of headers. -f --filter <exp> filter reads using <exp> in quotes, e.g. 'l>10' for longer than 10bp or 'l>10 & q>10' to further exclude reads by quality (default: none). -i --include-list <file> generates output on a subset list of headers. -o --out-format <file> output file (fa*[.gz], bam, cram, rd). Optionally write reads to file or generate rd summary file. -q --quality q|a generates list of average quality for each read (q) or both length and quality (a). -r --input-reads <file1> <file2> <file n> input file (fa*[.gz], bam, cram, rd). -s --out-size u|s|h|c generates size list (unsorted|sorted|histogram|inverse cumulative table). --homopolymer-compress <int> compress all the homopolymers longer than n in the input. --sample <float> fraction of reads to subsample. --random-seed <int> an optional random seed to make subsampling reproducible. --md5 print md5 of .rd files. --tabular tabular output. --verbose verbose output. -j --threads <int> numbers of threads (default:5). -v --version software version. --cmd print $0 to stdout. @ATTRIBUTION@ ]]></help> <expand macro="citations"/> </tool>