view demultiplex.xml @ 0:834843ebe569 draft

"planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/tree/v3.2.2 commit 6150f59723cf8b0d3c0a2e35208078473f0ec5cf"
author frogs
date Thu, 08 Apr 2021 14:30:48 +0000
parents
children 2ed857799fd5
line wrap: on
line source

<?xml version="1.0"?>
<!--
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-->
<tool id="FROGS_demultiplex" name="FROGS Demultiplex reads" version="3.2.2">
	<description>Attribute reads to samples in function of inner barcode.</description>
        <requirements>
            <requirement type="binary">perl</requirement>
            <requirement type="package" version="3.2.2">frogs</requirement>
        </requirements>
        <stdio>
                <exit_code range="1:" />
                <exit_code range=":-1" />
        </stdio>
	<command>
		demultiplex.py
		#if str( $fastq_input.fastq_input_selector ) == "paired":
		    --input-R1 '${fastq_input.fastq_input1}'            
		    --input-R2 '${fastq_input.fastq_input2}'
		#else:
		    --input-R1 '${fastq_input.fastq_input1}'
		#end if              
		    --input-barcode $barcode_file
		    --mismatches $mismatches
		    --end $end
		    --summary $summary
		    --output-demultiplexed $demultiplexed_archive
		    --output-excluded $undemultiplexed_archive
	</command>
	<inputs>
		<!-- Input file -->
		<param format="tabular" name="barcode_file" type="data" label="Barcode file" help="This file describes barcodes and samples (one line by sample tabulated separated from barcode sequence(s)). See Help section" optional="false" />
		
		<conditional name="fastq_input">
	      	<param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single-end data">
	      		<option value="single">Single</option>
	        	<option value="paired">Paired</option>
	        </param>
	      	<when value="paired">
	        	<param name="fastq_input1" type="data" format="fastq" label="Select first set of reads" help="Specify dataset of your forward reads"/>
	        	<param name="fastq_input2" type="data" format="fastq" label="Select second set of reads" help="Specify dataset of your reverse reads"/>
	        </when>
	        <when value="single">
	        	<param name="fastq_input1" type="data" format="fastq" label="Select fastq dataset" help="Specify dataset of your single end reads"/>
	      	</when>
	      	</conditional>

		<!-- Option -->
		<param name="mismatches" type="integer" label="Barcode mismatches" help="Number of mismatches allowed in barcode" value="0" optional="false" />
		<param name="end" type="select" label="Barcode on which end ?" help="The barcode is placed either at the beginning of the forward end or of the reverse end or both?">
			<option value="bol" selected="true">Forward</option>
			<option value="eol">Reverse</option>
			<option value="both">Both ends</option>
		</param>
	</inputs>
	<outputs>
		<data name="demultiplexed_archive" format="tar" label="${tool.name}: demultiplexed.tar.gz" from_work_dir="demultiplexed.tar.gz"/>
		<data name="undemultiplexed_archive" format="tar" label="${tool.name}: undemultiplexed.tar.gz" from_work_dir="undemultiplexed.tar.gz"/>
		<data name="summary" format="tabular" label="${tool.name}: report" from_work_dir="report.tsv"/>
	</outputs>
    <tests>
        <test>
            <param name="barcode_file" value="input/demultiplex_barcode.txt"/>
			<conditional name="fastq_input">
                <param name="fastq_input_selector" value="paired"/>
            </conditional>
            <param name="fastq_input1" value="input/demultiplex_test2_R1.fq.gz"/>
            <param name="fastq_input2" value="input/demultiplex_test2_R2.fq.gz"/>
            <param name="mismatches" value="1"/>
            <param name="end" value="both"/>
            <output name="summary" file="references/demultiplex_summary.txt" compare="sim_size" delta="0" />
        </test>
    </tests>
	<help>
.. class:: infomark page-header h2

What it does

This tool classifies single or paired-end reads in function of barcode forward or reverse in the first, second or both reads.

**Command line**::

  demultiplex.py --input-R1 *FQ_INPUT1* [--input-R2 *FQ_INPUT2*] --input-barcode *TXT_BARCODE* --mismatches *MISMATCH* --end *END* --summary *TXT_SUMMARY_OUTPUT* --output-demultiplexed *TARGZ_DEMULT_ARCHIVE_OUTPUT* --output-excluded *TARGZ_UNDEMULT_ARCHIVE_OUTPUT*

**Inputs**

.. csv-table:: 
   :header: "Input name", "Meaning"
   :widths: 20, 80
   :class: table table-striped

   "FQ_INPUT1", "Fastq input file for the first read (single-end or forward read of paired-end sequences)"
   "FQ_INPUT2", "Fastq input file for the second read (only for paired-end sequences)"
   "TXT_BARCODE", "Tabulated text file that describes barcode sequences used to multiplexe samples: SAMPLE_NAME  BARCODE1  [BARCODE2]"

**Options**

.. csv-table::
   :header: "Option name", "Meaning"
   :widths: 20, 80
   :class: table table-striped

   "-m/--mismatches MISMATCH", "Number of allowed mismatch in each barcode"
   "-e/--end END", "To which end must the barcode be found : forward (begin of the (first) read), reverse (end of the (second) read) or both"

**Outputs**

.. csv-table:: 
   :header: "Output name", "Meaning"
   :widths: 20, 80
   :class: table table-striped

   "TXT_SUMMARY_OUTPUT", "A tabulated text file which summarises the number of sequences (single or paired) for each sample"
   "TARGZ_DEMULT_ARCHIVE_OUTPUT", "A TAR.GZ archive that contains all fastq files for each sample"
   "TARGZ_UNDEMULT_ARCHIVE_OUTPUT", "A TAR.GZ archive that contains all fastq files for undemultiplexed reads"

.. class:: h3

Format

INPUT FILE :
 Sequence input file(s) are expected to be in `fastq format &lt;https://en.wikipedia.org/wiki/FASTQ_format&gt;`_

BARCODE_FILE :
 This file is expected to be tabulated
		
 -first column corresponds to the sample name
 
 -second column corresponds to the sequence barcode used
 
 -third column (optional) corresponds to the reverse sequence barcode
 
.. class:: warningmark

 Take care to indicate sequence barcode in the strand of the read, so you may need to reverse complement the reverse barcode sequence

.. class:: warningmark

 All barcode sequences must have the same length
 
Example of barcode file: Here the sample is multiplexed by both fragment ends.

 .. image:: static/images/FROGS_demultiplex_barcode.png
   :height: 60
   :width: 316

.. class:: infomark page-header h2

How it works

For each sequence or sequence pair, the sequence fragment at the beginning (forward multiplexing) of the (first) read or at the end (reverse multiplexing) of the (second) read will be compared to all barcodes of the barecode file.

If this fragment is found once and only once (regarding the mismatch threshold), the fragment is trimmed and the sequence will be attributed to the corresponding sample.

Finally fastq files (or pair of fastq files) for each sample are included in an archive and a report, describing how many sequences are attributed for each sample, is created.


.. class:: infomark page-header h2

Advices

Do not forget to indicate barcode sequence as they really are in the fastq sequence file, especially if you have multiplexed data via the reverse strand.

For the mismatch threshold, we advised to let the threshold to 0. Then if you are not satisfied by the result try with 1. The number of mismatches depends on the length of the barcode, but frequently this sequences are very short so 1 mismatch is already more than the sequencing error rate.

If you have different barcode lengths, you must demultiplex your data in several steps, beginning by the longest barcode set. Then to trim the barcodes with smaller lengths, you need to use the "unmatched" sequences file from the previous step with smaller barcodes and so on.

If you have Roche 454 sequences in sff format, you must convert them with some programs like `sff2fastq &lt;https://github.com/indraniel/sff2fastq&gt;`_ or sff_to_fastq (installable in Galaxy)


----

**Contact**

Contacts: frogs-support@inrae.fr

Repositories: https://github.com/geraldinepascal/FROGS, https://github.com/geraldinepascal/FROGS-wrappers

Website: http://frogs.toulouse.inrae.fr/

Please cite the **FROGS article**: `Escudie F., et al. Bioinformatics, 2018. FROGS: Find, Rapidly, OTUs with Galaxy Solution. &lt;https://doi.org/10.1093/bioinformatics/btx791&gt;`_

	</help>
</tool>