Mercurial > repos > cmonjeau > stacks
view STACKS_procrad.xml @ 2:c9e10e0d6c10
fix discard file
author | cmonjeau |
---|---|
date | Mon, 24 Aug 2015 15:39:12 +0000 |
parents | d6ba40f6c824 |
children | 0e0ff9e9c761 |
line wrap: on
line source
<?xml version="1.0"?> <tool id="STACKSprocrad" name="STACKS : Process radtags" force_history_refresh="True" version="1.1.0"> <description>Run the STACKS cleaning script</description> <configfiles> <configfile name="input_single"> #if str( $options_type.options_type_selector ) == "single": #for $input in $options_type.inputs_single: ${input.display_name}::${input} #end for #end if </configfile> <configfile name="input_paired1"> #if str( $options_type.options_type_selector ) == "paired": #for $input in $options_type.inputs_paired1: ${input.display_name}::${input} #end for #end if </configfile> <configfile name="input_paired2"> #if str( $options_type.options_type_selector ) == "paired": #for $input in $options_type.inputs_paired2: ${input.display_name}::${input} #end for #end if </configfile> </configfiles> <requirements> <requirement type="package" version="1.18">stacks</requirement> </requirements> <command interpreter="python"> STACKS_procrad.py --input_type $options_type.options_type_selector #if str( $options_type.options_type_selector ) == "single": --input_single $input_single #else --input_paired1 $input_paired1 --input_paired2 $input_paired2 #end if --inputype $inputype --barcode $barcode --sample_name $sample_name --output_choice $options_output_infos_selector #if str( $options_output_infos_selector ) != "1": --output_archive $output_archive #end if --input_enzyme $options_enzyme.options_enzyme_selector --enzyme1 $options_enzyme.enzyme #if str( $options_enzyme.options_enzyme_selector ) == "2": --enzyme2 $options_enzyme.enzyme2 #end if --outype $outype --qualitenc $options_quality.qualitenc #if $capture: -D --discard_file $discard_file #end if --activate_advanced_options $activate_advanced_options -t $options_advanced.truncate #if $options_advanced.discard: -q #end if #if $options_advanced.rescue: -r #end if -w $options_advanced.sliding -s $options_advanced.score #if $options_advanced.remove: -c #end if #if $options_advanced.inline: --inline_null #end if #if $options_advanced.index: --index_null #end if #if $options_advanced.inlinein: --inline_inline #end if #if $options_advanced.indexind: --index_index #end if #if $options_advanced.inlineind: --inline_index #end if #if $options_advanced.indexin: --index_inline #end if --logfile $output </command> <inputs> <conditional name="options_type"> <param name="options_type_selector" type="select" label="Single-end or paired-end reads files"> <option value="single" selected="True">Single-end files</option> <option value="paired">Paired-end files</option> </param> <when value="single"> <param name="inputs_single" format="fastq,fastq.gz" type="data" multiple="true" label="singles-end reads infile(s)" help="input files" /> </when> <when value="paired"> <param name="inputs_paired1" format="fastq,fastq.gz" type="data" multiple="true" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" /> <param name="inputs_paired2" format="fastq,fastq.gz" type="data" multiple="true" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" /> </when> </conditional> <param name="inputype" type="select" format="text" label="Inputs format"> <option value="fastq" selected="True">fastq</option> <option value="gzfastq">fastq.gz</option> <option value="bustard">Illumina BUSTARD</option> </param> <param name="barcode" type="data" format="tabular,txt" label="Barcode file" help="Barcode file" /> <param name="sample_name" type="text" value="sample" label="Sample name" help="Precise the sample name if using several NGS runs" /> <conditional name="options_enzyme"> <param name="options_enzyme_selector" type="select" label="Number of enzymes"> <option value="1" >One</option> <option value="2">Two</option> </param> <when value="1"> <param name="enzyme" type="select" format="text" label="Enzyme" help="provide the restriction enzyme used" > <option value="apeKI">apeKI</option> <option value="bamHI">bamHI</option> <option value="claI">claI</option> <option value="dpnII">dpnII</option> <option value="eaeI">eaeI</option> <option value="ecoRI">ecoRI</option> <option value="ecoT22I">ecoT22I</option> <option value="hindIII">hindIII</option> <option value="mluCI">mluCI</option> <option value="mseI">mseI</option> <option value="mspI">mspI</option> <option value="ndeI">ndeI</option> <option value="nlaIII">nlaIII</option> <option value="notI">notI</option> <option value="nsiI">nsiI</option> <option value="pstI">pstI</option> <option value="sau3AI">sau3AI</option> <option value="sbfI">sbfI</option> <option value="sexAI">sexAI</option> <option value="sgrAI">sgrAI</option> <option value="sphI">sphI</option> <option value="taqI">taqI</option> <option value="xbaI">xbaI</option> </param> </when> <when value="2"> <param name="enzyme" type="select" format="text" label="Enzyme" help="provide the restriction enzyme used" > <option value="apeKI">apeKI</option> <option value="bamHI">bamHI</option> <option value="claI">claI</option> <option value="dpnII">dpnII</option> <option value="eaeI">eaeI</option> <option value="ecoRI">ecoRI</option> <option value="ecoT22I">ecoT22I</option> <option value="hindIII">hindIII</option> <option value="mluCI">mluCI</option> <option value="mseI">mseI</option> <option value="mspI">mspI</option> <option value="ndeI">ndeI</option> <option value="nlaIII">nlaIII</option> <option value="notI">notI</option> <option value="nsiI">nsiI</option> <option value="pstI">pstI</option> <option value="sau3AI">sau3AI</option> <option value="sbfI">sbfI</option> <option value="sexAI">sexAI</option> <option value="sgrAI">sgrAI</option> <option value="sphI">sphI</option> <option value="taqI">taqI</option> <option value="xbaI">xbaI</option> </param> <param name="enzyme2" type="select" format="text" label="Second enzyme" help="provide the second restriction enzyme used" > <option value="apeKI">apeKI</option> <option value="bamHI">bamHI</option> <option value="claI">claI</option> <option value="dpnII">dpnII</option> <option value="eaeI">eaeI</option> <option value="ecoRI">ecoRI</option> <option value="ecoT22I">ecoT22I</option> <option value="hindIII">hindIII</option> <option value="mluCI">mluCI</option> <option value="mseI">mseI</option> <option value="mspI">mspI</option> <option value="ndeI">ndeI</option> <option value="nlaIII">nlaIII</option> <option value="notI">notI</option> <option value="nsiI">nsiI</option> <option value="pstI">pstI</option> <option value="sau3AI">sau3AI</option> <option value="sbfI">sbfI</option> <option value="sexAI">sexAI</option> <option value="sgrAI">sgrAI</option> <option value="sphI">sphI</option> <option value="taqI">taqI</option> <option value="xbaI">xbaI</option> </param> </when> </conditional> <param name="capture" type="boolean" label="Capture discarded reads to a file" /> <section name="options_quality" title="quality options" expanded="False"> <param name="qualitenc" type="select" format="text" label="Quality encoded type" help="specify how quality scores are encoded, 'phred33' (Illumina 1.8+, Sanger, default) or 'phred64' (Illumina 1.3 - 1.5)" > <option value="phred33">phred33</option> <option value="phred64">phred64</option> </param> </section> <param name="activate_advanced_options" type="boolean" label="Activate advanced options" help="advanced options are defined below" /> <section name="options_advanced" title="advanced options" expanded="False"> <param name="sliding" type="float" value="0.15" label="set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15)" /> <param name="score" type="integer" value="10" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10)" /> <param name="remove" type="boolean" checked="false" default="false" label="Clean data, remove any read with an uncalled base" /> <param name="discard" type="boolean" checked="false" default="false" label="Discard reads with low quality scores"/> <param name="rescue" type="boolean" checked="false" default="false" label="Rescue barcodes and RAD-Tags?"/> <param name="truncate" type="integer" value="-1" label="Truncate final read length to this value" help="default = -1" /> <param name="inline" type="boolean" checked="true" default="true" label="Barcode options -> inline_null" help="barcode is inline with sequence, occurs only on single-end read" /> <param name="index" type="boolean" checked="false" default="false" label="Barcode options -> index_null" help="barcode is provided in FASTQ header, occurs only on single-end read"/> <param name="inlinein" type="boolean" checked="false" default="false" label="Barcode options -> inline_inline" help="barcode is inline with sequence, occurs on single and paired-end read" /> <param name="indexind" type="boolean" checked="false" default="false" label="Barcode options -> index_index" help="barcode is provided in FASTQ header, occurs on single and paired-end read" /> <param name="inlineind" type="boolean" checked="false" default="false" label="Barcode options -> inline_index" help="barcode is inline with sequence on single-end read, occurs in FASTQ header for paired-end read" /> <param name="indexin" type="boolean" checked="false" default="false" label="Barcode options -> index_inline" help="barcode occurs in FASTQ header for single-end read, is inline with sequence on paired-end read" /> </section> <param name="outype" type="select" format="text" label="Output format" help="output type, either 'fastq' or 'fasta' (default fastq)" > <option value="fastq">fastq</option> <option value="fasta">fasta</option> </param> <param name="options_output_infos_selector" type="select" label="Output type"> <option value="1">Normal (a fastq file by barcode)</option> <option value="2" selected="True">Additional zip archive with all files (Normal + one archive with all fastq files)</option> <option value="3">Only a zip archive with all files (one archive with all fastq files)</option> </param> </inputs> <outputs> <data format="txt" name="output" label="results.log with ${tool.name} on ${on_string}: demultiplexed and cleaned reads" /> <data format="txt" name="additional" label="fast(a/q) file with ${tool.name}" hidden="true"> <discover_datasets pattern="__designation_and_ext__" directory="galaxy_outputs" visible="true" /> </data> <data format="zip" name="output_archive" label="all_files.zip with ${tool.name} on ${on_string}: demultiplexed and cleaned reads "> <filter>options_output_infos_selector != "1"</filter> </data> <data format="fastq" name="discard_file" label="discard.fastq with ${tool.name} on ${on_string}: demultiplexed and cleaned reads "> <filter>capture</filter> </data> </outputs> <stdio> <exit_code range="1" level="fatal" description="Error in Stacks Process radtag execution" /> </stdio> <help> .. class:: infomark **What it does** This program examines raw reads from an Illumina sequencing run and first, checks that the barcode and the RAD cutsite are intact, and demultiplexes the data. If there are errors in the barcode or the RAD site within a certain allowance process_radtags can correct them. Second, it slides a window down the length of the read and checks the average quality score within the window. If the score drops below 90% probability of being correct (a raw phred score of 10), the read is discarded. This allows for some seqeuncing errors while elimating reads where the sequence is degrading as it is being sequenced. By default the sliding window is 15% of the length of the read, but can be specified on the command line (the threshold and window size can be adjusted). The process_radtags program can: handle data that is barcoded, either inline or using an index, or unbarcoded. use combinatorial barcodes. check and correct for a restriction enzyme cutsite for single or double-digested data. filter adapter sequence while allowing for sequencing error in the adapter pattern. process individual files or whole directories of files. directly read gzipped data filter reads based on Illumina's Chastity filter -------- **Help** Input files: - FASTQ, FASTA, zip, tar.gz - Barcode File Format The barcode file is a very simple format : one barcode per line. CGATA CGGCG GAAGC GAGAT CGATA CGGCG GAAGC GAGAT Combinatorial barcodes are specified, one per column, separated by a tab:: CGATA ACGTA CGGCG CGTA GAAGC CGTA GAGAT CGTA CGATA AGCA CGGCG AGCA GAAGC AGCA GAGAT AGCA Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ . -------- **Created by:** Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko -------- **Project links:** `STACKS website <http://creskolab.uoregon.edu/stacks/>`_ . `STACKS manual <http://creskolab.uoregon.edu/stacks/stacks_manual.pdf>`_ . `STACKS google group <https://groups.google.com/forum/#!forum/stacks-users>`_ . -------- **References:** -J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013. -J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013. -J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011. -A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799'808, 2011. -P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011. -K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010. -------- **Integrated by:** Yvan Le Bras and Cyril Monjeaud GenOuest Bio-informatics Core Facility UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) support@genouest.org If you use this tool in Galaxy, please cite : `Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ </help> <citations> <citation type="doi">10.1111/mec.12354</citation> <citation type="doi">10.1111/mec.12330</citation> <citation type="doi">10.1534/g3.111.000240</citation> <citation type="doi">10.1534/genetics.111.127324</citation> <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation> <citation type="doi">10.1073/pnas.1006538107</citation> <citation type="bibtex">@INPROCEEDINGS{JOBIM2013, author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, booktitle = {JOBIM 2013 Proceedings}, year = {2013}, url = {https://www.e-biogenouest.org/resources/128}, pages = {97-106} }</citation> </citations> </tool>