view STACKS_procrad.xml @ 3:0e0ff9e9c761 default tip

fix inputs clean name
author cmonjeau
date Mon, 28 Sep 2015 13:21:35 +0000
parents c9e10e0d6c10
children
line wrap: on
line source

<?xml version="1.0"?>
<tool id="STACKSprocrad" name="STACKS : Process radtags"  force_history_refresh="True" version="1.1.0">
<description>Run the STACKS cleaning script</description>
<configfiles>
<configfile name="input_single">
#if str( $options_type.options_type_selector ) == "single":
#for $input in $options_type.inputs_single:
${input.display_name}::${input}
#end for
#end if
</configfile>
<configfile name="input_paired1">
#if str( $options_type.options_type_selector ) == "paired":
#for $input in $options_type.inputs_paired1:
${input.display_name}::${input}
#end for
#end if
</configfile>
<configfile name="input_paired2">
#if str( $options_type.options_type_selector ) == "paired":
#for $input in $options_type.inputs_paired2:
${input.display_name}::${input}
#end for
#end if
</configfile>
</configfiles>  
<requirements>
    <requirement type="package" version="1.18">stacks</requirement>
  </requirements>
<command interpreter="python">

STACKS_procrad.py
    --input_type $options_type.options_type_selector
    #if str( $options_type.options_type_selector ) == "single":
        --input_single $input_single
    #else
        --input_paired1 $input_paired1
        --input_paired2 $input_paired2
    #end if
    --inputype $inputype
    --barcode $barcode
    --sample_name $sample_name
    --output_choice $options_output_infos_selector
    #if str( $options_output_infos_selector ) != "1":
        --output_archive $output_archive
    #end if
    --input_enzyme $options_enzyme.options_enzyme_selector
    --enzyme1 $options_enzyme.enzyme
    #if str( $options_enzyme.options_enzyme_selector ) == "2":
        --enzyme2 $options_enzyme.enzyme2
    #end if
    --outype $outype
    --qualitenc $options_quality.qualitenc
    #if $capture:
        -D
        --discard_file $discard_file
    #end if
    --activate_advanced_options $activate_advanced_options
    -t $options_advanced.truncate
    #if $options_advanced.discard:
        -q
    #end if
    #if $options_advanced.rescue:
        -r
    #end if
    -w $options_advanced.sliding
    -s $options_advanced.score
    #if $options_advanced.remove:
        -c
    #end if
    #if $options_advanced.inline:
        --inline_null
    #end if
    #if $options_advanced.index:
        --index_null
    #end if
    #if $options_advanced.inlinein:
        --inline_inline
    #end if
    #if $options_advanced.indexind:
        --index_index
    #end if
    #if $options_advanced.inlineind:
        --inline_index
    #end if
    #if $options_advanced.indexin:
        --index_inline
    #end if
    --logfile $output

</command>

<inputs>
	
	<conditional name="options_type">
	    <param name="options_type_selector" type="select" label="Single-end or paired-end reads files">
			<option value="single" selected="True">Single-end files</option>
			<option value="paired">Paired-end files</option>
	    </param>
	    <when value="single">
			<param name="inputs_single" format="fastq,fastq.gz" type="data" multiple="true" label="singles-end reads infile(s)" help="input files" />
	    </when>
	    <when value="paired">
			<param name="inputs_paired1" format="fastq,fastq.gz" type="data" multiple="true" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" />
			<param name="inputs_paired2" format="fastq,fastq.gz" type="data" multiple="true" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" />
	    </when>
	</conditional>
	<param name="inputype" type="select" format="text" label="Inputs format">
		<option value="fastq" selected="True">fastq</option>
		<option value="gzfastq">fastq.gz</option>
		<option value="bustard">Illumina BUSTARD</option>
	</param>
	<param name="barcode" type="data" format="tabular,txt" label="Barcode file" help="Barcode file" />

	<param name="sample_name" type="text" value="sample" label="Sample name" help="Precise the sample name if using several NGS runs" />

	<conditional name="options_enzyme">
	    <param name="options_enzyme_selector" type="select" label="Number of enzymes">
			<option value="1" >One</option>
			<option value="2">Two</option>
	    </param>
	    <when value="1">
			<param name="enzyme" type="select" format="text" label="Enzyme" help="provide the restriction enzyme used" >
				<option value="apeKI">apeKI</option>
				<option value="bamHI">bamHI</option>
				<option value="claI">claI</option>
				<option value="dpnII">dpnII</option>
				<option value="eaeI">eaeI</option>
				<option value="ecoRI">ecoRI</option>
				<option value="ecoT22I">ecoT22I</option>
				<option value="hindIII">hindIII</option>
				<option value="mluCI">mluCI</option>
				<option value="mseI">mseI</option>
				<option value="mspI">mspI</option>
				<option value="ndeI">ndeI</option>
				<option value="nlaIII">nlaIII</option>
				<option value="notI">notI</option>
				<option value="nsiI">nsiI</option>
				<option value="pstI">pstI</option>
				<option value="sau3AI">sau3AI</option>
				<option value="sbfI">sbfI</option>
				<option value="sexAI">sexAI</option>
				<option value="sgrAI">sgrAI</option>
				<option value="sphI">sphI</option>
				<option value="taqI">taqI</option>
				<option value="xbaI">xbaI</option>
			</param>
	    </when>
	    <when value="2">
			<param name="enzyme" type="select" format="text" label="Enzyme" help="provide the restriction enzyme used" >
				<option value="apeKI">apeKI</option>
				<option value="bamHI">bamHI</option>
				<option value="claI">claI</option>
				<option value="dpnII">dpnII</option>
				<option value="eaeI">eaeI</option>
				<option value="ecoRI">ecoRI</option>
				<option value="ecoT22I">ecoT22I</option>
				<option value="hindIII">hindIII</option>
				<option value="mluCI">mluCI</option>
				<option value="mseI">mseI</option>
				<option value="mspI">mspI</option>
				<option value="ndeI">ndeI</option>
				<option value="nlaIII">nlaIII</option>
				<option value="notI">notI</option>
				<option value="nsiI">nsiI</option>
				<option value="pstI">pstI</option>
				<option value="sau3AI">sau3AI</option>
				<option value="sbfI">sbfI</option>
				<option value="sexAI">sexAI</option>
				<option value="sgrAI">sgrAI</option>
				<option value="sphI">sphI</option>
				<option value="taqI">taqI</option>
				<option value="xbaI">xbaI</option>
			</param>
			<param name="enzyme2" type="select" format="text" label="Second enzyme" help="provide the second restriction enzyme used" >
				<option value="apeKI">apeKI</option>
				<option value="bamHI">bamHI</option>
				<option value="claI">claI</option>
				<option value="dpnII">dpnII</option>
				<option value="eaeI">eaeI</option>
				<option value="ecoRI">ecoRI</option>
				<option value="ecoT22I">ecoT22I</option>
				<option value="hindIII">hindIII</option>
				<option value="mluCI">mluCI</option>
				<option value="mseI">mseI</option>
				<option value="mspI">mspI</option>
				<option value="ndeI">ndeI</option>
				<option value="nlaIII">nlaIII</option>
				<option value="notI">notI</option>
				<option value="nsiI">nsiI</option>
				<option value="pstI">pstI</option>
				<option value="sau3AI">sau3AI</option>
				<option value="sbfI">sbfI</option>
				<option value="sexAI">sexAI</option>
				<option value="sgrAI">sgrAI</option>
				<option value="sphI">sphI</option>
				<option value="taqI">taqI</option>
				<option value="xbaI">xbaI</option>
			</param>
	    </when>
	</conditional>
        <param name="capture" type="boolean" label="Capture discarded reads to a file" />
	<section name="options_quality" title="quality options" expanded="False">
	<param name="qualitenc" type="select" format="text" label="Quality encoded type" help="specify how quality scores are encoded, 'phred33' (Illumina 1.8+, Sanger, default) or 'phred64' (Illumina 1.3 - 1.5)" >
		<option value="phred33">phred33</option>
		<option value="phred64">phred64</option>
	</param>
	</section>
        <param name="activate_advanced_options" type="boolean" label="Activate advanced options" help="advanced options are defined below" />
	<section name="options_advanced" title="advanced options" expanded="False">
    			<param name="sliding" type="float" value="0.15" label="set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15)" />
			<param name="score" type="integer" value="10" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10)" />
			<param name="remove" type="boolean" checked="false" default="false" label="Clean data, remove any read with an uncalled base" />
			<param name="discard" type="boolean" checked="false" default="false" label="Discard reads with low quality scores"/>
			<param name="rescue" type="boolean" checked="false" default="false" label="Rescue barcodes and RAD-Tags?"/>
			<param name="truncate" type="integer" value="-1" label="Truncate final read length to this value" help="default = -1" />
			<param name="inline" type="boolean" checked="true" default="true" label="Barcode options -> inline_null" help="barcode is inline with sequence, occurs only on single-end read" />
			<param name="index" type="boolean" checked="false" default="false" label="Barcode options -> index_null" help="barcode is provided in FASTQ header, occurs only on single-end read"/>
			<param name="inlinein" type="boolean" checked="false" default="false" label="Barcode options -> inline_inline" help="barcode is inline with sequence, occurs on single and paired-end read" />
			<param name="indexind" type="boolean" checked="false" default="false" label="Barcode options -> index_index" help="barcode is provided in FASTQ header, occurs on single and paired-end read" />
			<param name="inlineind" type="boolean" checked="false" default="false" label="Barcode options -> inline_index" help="barcode is inline with sequence on single-end read, occurs in FASTQ header for paired-end read" />
			<param name="indexin" type="boolean" checked="false" default="false" label="Barcode options -> index_inline" help="barcode occurs in FASTQ header for single-end read, is inline with sequence on paired-end read" />
	</section>
	<param name="outype" type="select" format="text" label="Output format" help="output type, either 'fastq' or 'fasta' (default fastq)" >
                <option value="fastq">fastq</option>
                <option value="fasta">fasta</option>
        </param>

        <param name="options_output_infos_selector" type="select" label="Output type">
                        <option value="1">Normal (a fastq file by barcode)</option>
			<option value="2" selected="True">Additional zip archive with all files (Normal + one archive with all fastq files)</option>
                        <option value="3">Only a zip archive with all files (one archive with all fastq files)</option>
        </param>

</inputs>
<outputs>

    <data format="txt" name="output" label="results.log with ${tool.name} on ${on_string}: demultiplexed and cleaned reads" />
    <data format="txt" name="additional" label="fasta/fastq file with ${tool.name}" hidden="true">
          <discover_datasets pattern="__designation_and_ext__" directory="galaxy_outputs" visible="true" />
    </data>
    <data format="zip" name="output_archive" label="all_files.zip with ${tool.name} on ${on_string}: demultiplexed and cleaned reads ">
	<filter>options_output_infos_selector != "1"</filter>
    </data>
    <data format="fastq" name="discard_file" label="discard.fastq with ${tool.name} on ${on_string}: demultiplexed and cleaned reads ">
	<filter>capture</filter>
    </data>
</outputs>

<stdio>
   <exit_code range="1" level="fatal" description="Error in Stacks Process radtag execution" />
</stdio>


<help>

.. class:: infomark

**What it does**

This program examines raw reads from an Illumina sequencing run and first, checks that the barcode and the RAD cutsite are intact, and demultiplexes the data. If there are 
errors in the barcode or the RAD site within a certain allowance process_radtags can correct them. Second, it slides a window down the length of the read and checks the 
average quality score within the window. If the score drops below 90% probability of being correct (a raw phred score of 10), the read is discarded. This allows for some 
seqeuncing errors while elimating reads where the sequence is degrading as it is being sequenced. By default the sliding window is 15% of the length of the read, but can be 
specified on the command line (the threshold and window size can be adjusted).
The process_radtags program can:
handle data that is barcoded, either inline or using an index, or unbarcoded.
use combinatorial barcodes.
check and correct for a restriction enzyme cutsite for single or double-digested 
data.
filter adapter sequence while allowing for sequencing error in the adapter pattern.
process individual files or whole directories of files.
directly read gzipped data
filter reads based on Illumina's Chastity filter

--------

**Help**

Input files:

- FASTQ, FASTA, zip, tar.gz

- Barcode File Format

The barcode file is a very simple format : one barcode per line.

	CGATA
	CGGCG
	GAAGC
	GAGAT
	CGATA
	CGGCG
	GAAGC
	GAGAT

Combinatorial barcodes are specified, one per column, separated by a tab::

	CGATA	ACGTA
	CGGCG	CGTA
	GAAGC	CGTA
	GAGAT	CGTA
	CGATA	AGCA 
	CGGCG	AGCA
	GAAGC	AGCA
	GAGAT	AGCA


Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki &lt;https://www.e-biogenouest.org/wiki/ManArchiveGalaxy&gt;`_ .

--------


**Created by:**

Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko

--------

**Project links:**

`STACKS website &lt;http://creskolab.uoregon.edu/stacks/&gt;`_ .

`STACKS manual &lt;http://creskolab.uoregon.edu/stacks/stacks_manual.pdf&gt;`_ .

`STACKS google group &lt;https://groups.google.com/forum/#!forum/stacks-users&gt;`_ .

--------

**References:**

-J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.

-J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.

-J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.

-A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799'808, 2011.

-P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.

-K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.

--------

**Integrated by:**

Yvan Le Bras and Cyril Monjeaud

GenOuest Bio-informatics Core Facility

UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)

support@genouest.org

If you use this tool in Galaxy, please cite :

`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. &lt;https://www.e-biogenouest.org/resources/128&gt;`_



</help>
<citations>
    <citation type="doi">10.1111/mec.12354</citation>
    <citation type="doi">10.1111/mec.12330</citation>
    <citation type="doi">10.1534/g3.111.000240</citation>
    <citation type="doi">10.1534/genetics.111.127324</citation>
    <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>
    <citation type="doi">10.1073/pnas.1006538107</citation>

    <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
    author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
    title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
    booktitle = {JOBIM 2013 Proceedings},
    year = {2013},
    url = {https://www.e-biogenouest.org/resources/128},
    pages = {97-106}
    }</citation>
</citations>
</tool>