Mercurial > repos > avowinkel > stacks
view process_radtags.xml @ 0:af879fc0d734 draft
Uploaded
author | avowinkel |
---|---|
date | Fri, 26 Jun 2015 16:48:45 -0400 |
parents | |
children | 228d0cbc14f9 |
line wrap: on
line source
<?xml version="1.0"?> <tool id="process_radtags" name="process_radtags" version="0.1.0"> <description>from Stacks toolbox</description> <macros> <import>process_radtags_macros.xml</import> </macros> <requirements> <requirement type="package" version="1.32">stacks</requirement> </requirements> <command> <![CDATA[ mkdir output && process_radtags #if $analysis_type.analysis_type_select == 'se' -f ${analysis_type.fastq_input1} #elif $analysis_type.analysis_type_select == 'pe' -1 ${analysis_type.fastq_input1} -2 ${analysis_type.fastq_input2} #end if -b ${barcode_file} -o output #if $analysis_type.fastq_input1.is_of_type('fastqsanger') -E "phred33" #elif $analysis_type.fastq_input1.is_of_type('fastqillumina') -E "phred64" #end if #if $double_digest.double_digest_enabled --renz_1 ${enzyme} --renz_2 ${double_digest.enzyme} #else -e ${enzyme} #end if ${c} ${q} ${r} #if $t > 0 -t ${t} #end if -w ${w} -s ${s} -D #if $analysis_type.adapter_options.adapter_options_enabled #if $analysis_type.analysis_type_select == 'se' --adapter_1 ${$analysis_type.adapter_options.adapter_1} #elif $analysis_type.analysis_type_select == 'pe' --adapter_1 ${$analysis_type.adapter_options.adapter_1} --adapter_2 ${$analysis_type.adapter_options.adapter_2} #end if --adapter_mm ${$analysis_type.adapter_options.adapter_mm} #end if #if $advanced_options.advanced_options_enabled ${advanced_options.filter_illumina} ${advanced_options.disable_rad_check} --barcode_dist_1 ${advanced_options.barcode_dist} #end if > ${log_file} 2>&1 && bash $__tool_directory__/process_radtags_rename.sh ${analysis_type.fastq_input1.ext} ]]> </command> <inputs> <conditional name="analysis_type"> <param name="analysis_type_select" type="select" label="Analysis type"> <option value="se" selected="true">Single End Reads</option> <option value="pe">Paired End Reads (NOT IMPLEMENTED)</option> </param> <when value="se"> <param name="fastq_input1" type="data" format="fasta,fastqsanger,fastqillumina" label="Select the fastq/a file" help="Specify fastq/a file with reads"/> <param name="barcode_type" type="select" label="Barcode type"> <option value="--inline_null">inline barcode</option> <option value="--index_null">barcode in header</option> </param> <expand macro="macro_adapter_options_se"/> </when> <when value="pe"> <param name="fastq_input1" type="data" format="fasta,fastqsanger,fastqillumina" label="Select first fastq/a file" help="Specify fastq/a file with forward reads"/> <param name="fastq_input2" type="data" format="fasta,fastqsanger,fastqillumina" label="Select second fastq/a file" help="Specify fastq/a file with reverse reads"/> <param name="barcode_type" type="select" label="Barcode type"> <option value="--inline_inline">inline barcode</option> <option value="--index_index">barcode in header</option> <option value="--inline_index">forward read: inline; reverse read: header</option> <option value="--index_inline">forward read: header; reverse read: inline</option> </param> <expand macro="macro_adapter_options_pe"/> </when> </conditional> <param name="barcode_file" type="data" format="tabular" label="Select the barcode file" /> <expand macro="macro_enzyme_selector"/> <conditional name="double_digest"> <param name="double_digest_enabled" type="boolean" label="Double Digest was used?" /> <when value="true"> <expand macro="macro_enzyme_selector2"/> </when> <when value="false" /> </conditional> <param name="c" type="boolean" truevalue="-c" falsevalue="" label="clean data, remove any read with an uncalled base" help="-c" /> <param name="q" type="boolean" truevalue="-q" falsevalue="" label="discard reads with low quality scores" help="-q" /> <param name="r" type="boolean" truevalue="-r" falsevalue="" label="rescue barcodes and RAD-Tags" help="-r" /> <param name="t" type="integer" value="0" size="4" label="truncate final read length to this value" help="0 = don't truncate; -t &lt;len&gt;" /> <param name="w" type="float" value="0.15" label="set the size of the sliding window" help="... as a fraction of the read length, between 0 and 1 (default 0.15); -w &lt;size&gt;"> <validator type="expression" message="Window size is a fraction between 0 and 1."><![CDATA[value > 0 and value < 1]]></validator> </param> <param name="s" type="integer" value="10" label="set the score limit" help="If the average score within the sliding window drops below this value, the read is discarded (default 10); -s &lt;lim&gt;"> <validator type="expression" message="Score limit must be between 0 and 40."><![CDATA[value >= 0 and value <= 40]]></validator> </param> <conditional name="advanced_options"> <param name="advanced_options_enabled" type="boolean" label="Specify advanced options?" /> <when value="true"> <param name="filter_illumina" type="boolean" truevalue="--filter_illumina" falsevalue="" label="discard reads that have been marked by Illumina’s chastity/purity filter as failing" /> <param name="disable_rad_check" type="boolean" truevalue="--disable_rad_check" falsevalue="" label="disable checking if the RAD site is intact" /> <param name="barcode_dist" type="integer" value="2" size="1" label="distance between barcodes to allow for barcode rescue (default 2)" /> </when> <when value="false" /> </conditional> </inputs> <outputs> <collection name="split_output" type="list" label="@OUTPUT_NAME_PREFIX@ on ${on_string} (Fastq Collection)"> <discover_datasets pattern="sample_(?P<designation>.+)\.(?P<ext>.+)" directory="splits" /> </collection> <data format="txt" name="log_file" label="@OUTPUT_NAME_PREFIX@ on ${on_string} (log)"> <discover_datasets pattern="(?P<designation>.+)\.log" ext="txt" directory="output" visible="true" /> <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>.+)\.discards" directory="output" visible="true" /> </data> </outputs> <tests> <test> <param name="analysis_type_select" value="se" /> <param name="fastq_input1" value="input-se-inline.fastqsanger" /> <param name="barcode_type" value="--inline_null" /> <param name="adapter_options_enabled" value="false" /> <param name="barcode_file" value="barcodes.tbl" /> <param name="enzyme" value="ecoT22I" /> <param name="double_digest_enabled" value="false" /> <output name="log_file"> <assert_contents> <has_line line="Processing single-end data." /> <has_line line="Using Phred+33 encoding for quality scores." /> <has_line line="Found 1 input file(s)." /> <has_line line="Searching for single-end, inlined barcodes." /> <has_line line="Loaded 9 barcodes (6-10bp)." /> <not_has_text text="Will attempt to recover barcodes" /> <has_line line=" 11 total reads; -4 ambiguous barcodes; -0 ambiguous RAD-Tags; +0 recovered; -0 low quality reads; 7 retained reads." /> </assert_contents> <discovered_dataset designation="Report"> <assert_contents> <has_line_matching expression="process_radtags version 1.32 executed \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}" /> <has_line_matching expression="dataset_1.dat 7 0 4 0 11" /> </assert_contents> </discovered_dataset> </output> </test> </tests> <help> <![CDATA[ **Tool website (with examples)**: http://catchenlab.life.illinois.edu/stacks/comp/process_radtags.php ---- **Tool help output**:: process_radtags 1.32 process_radtags [-f in_file | -p in_dir [-P] [-I] | -1 pair_1 -2 pair_2] -b barcode_file -o out_dir -e enz [-c] [-q] [-r] [-t len] [-D] [-w size] [-s lim] [-h] f: path to the input file if processing single-end sequences. i: input file type, either 'bustard' for the Illumina BUSTARD format, 'bam', 'fastq' (default), or 'gzfastq' for gzipped FASTQ. y: output type, either 'fastq', 'gzfastq', 'fasta', or 'gzfasta' (default is to match the input file type). p: path to a directory of files. P: files contained within directory specified by '-p' are paired. I: specify that the paired-end reads are interleaved in single files. 1: first input file in a set of paired-end sequences. 2: second input file in a set of paired-end sequences. o: path to output the processed files. b: path to a file containing barcodes for this run. c: clean data, remove any read with an uncalled base. q: discard reads with low quality scores. r: rescue barcodes and RAD-Tags. t: truncate final read length to this value. E: specify how quality scores are encoded, 'phred33' (Illumina 1.8+, Sanger, default) or 'phred64' (Illumina 1.3 - 1.5). D: capture discarded reads to a file. w: set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15). s: set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10). h: display this help messsage. Barcode options: --inline_null: barcode is inline with sequence, occurs only on single-end read (default). --index_null: barcode is provded in FASTQ header, occurs only on single-end read. --inline_inline: barcode is inline with sequence, occurs on single and paired-end read. --index_index: barcode is provded in FASTQ header, occurs on single and paired-end read. --inline_index: barcode is inline with sequence on single-end read, occurs in FASTQ header for paired-end read. --index_inline: barcode occurs in FASTQ header for single-end read, is inline with sequence on paired-end read. Restriction enzyme options: -e <enz>, --renz_1 <enz>: provide the restriction enzyme used (cut site occurs on single-end read) --renz_2 <enz>: if a double digest was used, provide the second restriction enzyme used (cut site occurs on the paired-end read). Currently supported enzymes include: 'aluI', 'apeKI', 'apoI', 'bamHI', 'bgIII', 'bstYI', 'claI', 'ddeI', 'dpnII', 'eaeI', 'ecoRI', 'ecoRV', 'ecoT22I', 'hindIII', 'kpnI', 'mluCI', 'mseI', 'mspI', 'ndeI', 'nheI', 'nlaIII', 'notI', 'nsiI', 'pstI', 'rsaI', 'sacI', 'sau3AI', 'sbfI', 'sexAI', 'sgrAI', 'speI', 'sphI', 'taqI', 'xbaI', or 'xhoI' Adapter options: --adapter_1 <sequence>: provide adaptor sequence that may occur on the single-end read for filtering. --adapter_2 <sequence>: provide adaptor sequence that may occur on the paired-read for filtering. --adapter_mm <mismatches>: number of mismatches allowed in the adapter sequence. Output options: --merge: if no barcodes are specified, merge all input files into a single output file. Advanced options: --filter_illumina: discard reads that have been marked by Illumina's chastity/purity filter as failing. --disable_rad_check: disable checking if the RAD site is intact. --len_limit <limit>: specify a minimum sequence length (useful if your data has already been trimmed). --barcode_dist_1: the number of allowed mismatches when rescuing single-end barcodes (default 1). --barcode_dist_2: the number of allowed mismatches when rescuing paired-end barcodes (defaults to --barcode_dist_1). ]]> </help> </tool>