comparison demultiplex.xml @ 0:76c750c5f0d1 draft default tip

planemo upload for repository https://github.com/oinizan/FROGS-wrappers commit 0b900a51e220ce6f17c1e76292c06a5f4d934055-dirty
author frogs
date Thu, 25 Oct 2018 05:01:13 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:76c750c5f0d1
1 <?xml version="1.0"?>
2 <!--
3 # Copyright (C) 2015 INRA
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 -->
18 <tool id="FROGS_demultiplex" name="FROGS Demultiplex reads" version="2.0.0">
19 <description>Attribute reads to samples in function of inner barcode.</description>
20 <requirements>
21 <!--requirement type="binary">perl</requirement-->
22 <requirement type="package" version="2.0.1">frogs</requirement>
23 <requirement type="package" version="1.10">perl-io-zlib</requirement>
24 <requirement type="package" version="0.20">perl-io-gzip</requirement>
25 <requirement type="package">perl</requirement>
26 </requirements>
27 <stdio>
28 <exit_code range="1:" />
29 <exit_code range=":-1" />
30 </stdio>
31 <command>
32 demultiplex.py
33 #if str( $fastq_input.fastq_input_selector ) == "paired":
34 --input-R1 "${fastq_input.fastq_input1}"
35 --input-R2 "${fastq_input.fastq_input2}"
36 #else:
37 --input-R1 "${fastq_input.fastq_input1}"
38 #end if
39 --input-barcode $barcode_file
40 --mismatches $mismatches
41 --end $end
42 --summary $summary
43 --output-demultiplexed $demultiplexed_archive
44 --output-excluded $undemultiplexed_archive
45 </command>
46 <inputs>
47 <!-- Input file -->
48 <param format="tabular" name="barcode_file" type="data" label="Barcode file" help="This file describes barcodes and samples (one line by sample tabulated separated from barcode sequence(s)). See Help section" optional="false" />
49
50 <conditional name="fastq_input">
51 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single-end data">
52 <option value="single">Single</option>
53 <option value="paired">Paired</option>
54 </param>
55 <when value="paired">
56 <param name="fastq_input1" type="data" format="fastq" label="Select first set of reads" help="Specify dataset of your forward reads"/>
57 <param name="fastq_input2" type="data" format="fastq" label="Select second set of reads" help="Specify dataset of your reverse reads"/>
58 </when>
59 <when value="single">
60 <param name="fastq_input1" type="data" format="fastq" label="Select fastq dataset" help="Specify dataset of your single end reads"/>
61 </when>
62 </conditional>
63
64 <!-- Option -->
65 <param name="mismatches" type="integer" label="Barcode mismatches" help="Number of mismatches allowed in barcode" value="0" optional="false" />
66 <param name="end" type="select" label="Barcode on which end ?" help="The barcode is placed either at the beginning of the forward end or of the reverse end or both?">
67 <option value="bol" selected="true">Forward</option>
68 <option value="eol">Reverse</option>
69 <option value="both">Both ends</option>
70 </param>
71 </inputs>
72 <outputs>
73 <data name="demultiplexed_archive" format="tar" label="${tool.name}: demultiplexed.tar.gz" from_work_dir="demultiplexed.tar.gz"/>
74 <data name="undemultiplexed_archive" format="tar" label="${tool.name}: undemultiplexed.tar.gz" from_work_dir="undemultiplexed.tar.gz"/>
75 <data name="summary" format="tabular" label="${tool.name}: report" from_work_dir="report.tsv"/>
76 </outputs>
77 <tests>
78 <test>
79 <param name="barcode_file" value="references/demultiplex_barcode.txt"/>
80 <conditional name="fastq_input">
81 <param name="fastq_input_selector" value="paired"/>
82 </conditional>
83 <param name="fastq_input1" value="references/demultiplex_test2_R1.fq"/>
84 <param name="fastq_input2" value="references/demultiplex_test2_R2.fq"/>
85 <param name="mismatches" value="1"/>
86 <param name="end" value="both"/>
87 <output name="summary" file="references/demultiplex_log.txt" compare="sim_size" delta="0" />
88 </test>
89 </tests>
90 <help>
91 .. class:: infomark page-header h2
92
93 What it does
94
95 This tool classifies single or paired-end reads in function of barcode forward or reverse in the first or both reads.
96
97 **Command line**::
98
99 demultiplex.py --input-R1 *FQ_INPUT1* [--input-R2 *FQ_INPUT2*] --input-barcode *TXT_BARCODE* --mismatches *MISMATCH* --end *END* --summary *TXT_SUMMARY_OUTPUT* --output-demultiplexed *TARGZ_DEMULT_ARCHIVE_OUTPUT* --output-excluded *TARGZ_UNDEMULT_ARCHIVE_OUTPUT*
100
101 .. csv-table:: Inputs
102 :header: "Input name", "Meaning"
103 :widths: 20, 80
104 :class: table table-striped
105
106 "FQ_INPUT1", "Fastq input file for the first read (single-end or forward read of paired-end sequences)"
107 "FQ_INPUT2", "Fastq input file for the second read (only for paired-end sequences)"
108 "TXT_BARCODE", "Tabulated text file that describes barcode sequences used to multiplexe samples: SAMPLE_NAME BARCODE1 [BARCODE2]"
109
110 .. csv-table:: Options
111 :header: "Option name", "Meaning"
112 :widths: 20, 80
113 :class: table table-striped
114
115 "-m/--mismatches MISMATCH", "Number of allowed mismatch in each barcode"
116 "-e/--end END", "To which end must the barcode be found : forward (begin of the (first) read), reverse (end of the (second) read) or both"
117
118 .. csv-table:: Outputs
119 :header: "Output name", "Meaning"
120 :widths: 20, 80
121 :class: table table-striped
122
123 "TXT_SUMMARY_OUTPUT", "A tabulated text file which summarises the number of sequences (single or paired) for each sample"
124 "TARGZ_DEMULT_ARCHIVE_OUTPUT", "A TAR.GZ archive that contains all fastq files for each sample"
125 "TARGZ_UNDEMULT_ARCHIVE_OUTPUT", "A TAR.GZ archive that contains all fastq files for undemultiplexed reads"
126
127 .. class:: h3
128
129 Format
130
131 BARCODE_FILE :
132 This file is expected to be tabulated
133
134 -first column corresponds to the sample name
135
136 -second column corresponds to the sequence barcode used
137
138 -third column (optional) corresponds to the reverse sequence barcode
139
140 .. class:: warningmark
141
142 Take care to indicate sequence barcode in the strand of the read, so you may need to reverse complement the reverse barcode sequence
143
144 .. class:: warningmark
145
146 All barcode sequences must have the same length
147
148 Example of barcode file: Here the sample is multiplexed by both fragment ends.
149
150 .. image:: static/images/demultiplex_barcode.png
151 :height: 18
152 :width: 286
153
154 FASTQ :
155 Text file describing biological sequences in a 4 line format:
156
157 -first line starts by "@" corresponds to the sequence identifier and optionally the sequence description
158
159 -second line is the sequence itself
160
161 -third line is a "+" following by the sequence identifier or not depending on the version
162
163 -fourth line is the quality sequence, one code per base. The code depends on its version and the sequencer
164
165 `Click here for more details on the fastq format &lt;https://en.wikipedia.org/wiki/FASTQ_format&gt;`_
166
167 Example of fastq read corresponding to the previous barcode file
168
169 .. image:: static/images/demultiplex_fastq_ex.png
170 :height: 57
171 :width: 420
172
173
174 .. class:: infomark page-header h2
175
176 How it works
177
178 For each sequence or sequence pair, the sequence fragment at the beginning (forward multiplexing) of the (first) read or at the end (reverse multiplexing) of the (second) read will be compared to all barcodes of the barecode file.
179
180 If this fragment is found once and only once (regarding the mismatch threshold), the fragment is trimmed and the sequence will be attributed to the corresponding sample.
181
182 Finally fastq files (or pair of fastq files) for each sample are included in an archive and a report, describing how many sequences are attributed for each sample, is created.
183
184
185 .. class:: infomark page-header h2
186
187 Advices
188
189 Do not forget to indicate barcode sequence as they really are in the fastq sequence file, especially if you have multiplexed data via the reverse strand.
190
191 For the mismatch threshold, we advised to let the threshold to 0. Then if you are not satisfied by the result try with 1. The number of mismatches depends on the length of the barcode, but frequently this sequences are very short so 1 mismatch is already more than the sequencing error rate.
192
193 If you have different barcode lengths, you must demultiplex your data in several steps, beginning by the longest barcode set. Then to trim the barcodes with smaller lengths, you use the "unmatched" or "ambiguous" sequence file with smaller barcodes and so on.
194
195 If you have Roche 454 sequences in sff format, you must convert them with some programs like `sff2fastq &lt;https://github.com/indraniel/sff2fastq&gt;`_ or sff_to_fastq (installable in Galaxy)
196
197
198 ----
199
200 **Contact**
201
202 Contacts: frogs@inra.fr
203
204 Repository: https://github.com/geraldinepascal/FROGS
205
206 Please cite the FROGS Publication: *Escudie F., Auer L., Bernard M., Cauquil L., Vidal K., Maman S., Mariadassou M., Combes S., Hernandez-Raquet G., Pascal G., 2016. FROGS: Find Rapidly OTU with Galaxy Solution. In: ISME-2016 Montreal, CANADA ,* http://bioinfo.genotoul.fr/wp-content/uploads/FROGS_ISME2016_poster.pdf
207
208 Depending on the help provided you can cite us in acknowledgements, references or both.
209 </help>
210 <citations>
211 <citation type="doi">10.7287/peerj.preprints.386v1</citation>
212 </citations>
213 </tool>