comparison ALFA/alfa_wrapper.xml @ 18:a1e2ab10b317 draft

Uploaded
author charles-bernard
date Tue, 11 Oct 2016 09:18:48 -0400
parents
children a74f6350f32e
comparison
equal deleted inserted replaced
17:e3d439570972 18:a1e2ab10b317
1 <tool id="alfa" name="ALFA" version="0.1.0">
2 <description>- Compute and display distribution of reads by genomic categories</description>
3
4 <!-- ALFA requires bedtools suite v2.20.0 and above -->
5 <requirements>
6 <requirement type="package" version="2.24">bedtools</requirement>
7 <requirement type="package" version="1.2">samtools</requirement>
8 <requirement type="package" version="1.4">matplotlib</requirement>
9 </requirements>
10
11 <command interpreter="bash">
12 alfa_wrapper.sh ${ALFA_config} $logReport
13 </command>
14
15 <inputs>
16 <param name="projectName" value="ALFA" type="text" size="20" label="Project Name">
17 <validator type="empty_field" message="Please, specify a name for your project."/>
18 </param>
19
20 <section name="annotation" title="INPUT 1: Annotation File (GTF format)" expanded="True">
21 <conditional name="annotationSource">
22 <param name="annotationSourceSelection" type="select" label="Select the source of your annotated sequence/genome">
23 <option value="personal_gtf" selected="true">Personal annotation file (GTF format)</option>
24 <option value="index">Stranded and Unstranded Indexes previously generated by ALFA (Index format)</option>
25 <option value="built_in_index">Built-in indexes among a list of referenced genome (Index format)</option>
26 </param>
27 <when value="personal_gtf">
28 <param name="annotationFile" type="data" format="Gff, Gtf" label="Select your personal annotation file (GTF format)">
29 </param>
30 </when>
31 <when value="index">
32 <param name="strandedIndex" type="data" format="index" label="Select your ALFA stranded index file (index format)"/>
33 <param name="unstrandedIndex" type="data" format="index" label="Select your ALFA Unstranded index file (index format)"/>
34 </when>
35 <when value="built_in_index">
36 <param name="built_in_index_prefix" type="select" label="Select Genome">
37 <options from_data_table="alfa_indexes">
38 <validator type="no_options" message="No indexes are available for the selected input dataset" />
39 </options>
40 </param>
41 </when>
42 </conditional>
43 </section>
44
45 <section name="reads" title="INPUT 2: Aligned Reads File(s) of the annotated sequence (BAM or BEDGRAPH format)" expanded="True">
46 <conditional name="readsType">
47 <param name="readsTypeSelection" type="select" label="Select the format of the reads file(s)">
48 <option value="bam" selected="true">BAM</option>
49 <option value="bedgraph">BEDGRAPH</option>
50 </param>
51 <when value="bam">
52 <repeat name="readsList" title="Reads File" min="1" >
53 <param name="readsFile" type="data" format="Bam" label="Select the reads file of your annotated sequence (BAM format)"/>
54 <param name="readsLabel" type="text" size="20" value="" label="Label of the reads" optional="True"/>
55 </repeat>
56 </when>
57 <when value="bedgraph">
58 <repeat name="readsList" title="Reads File" min="1">
59 <param name="readsFile" type="data" format="Bed" label="Select the reads file of your annotated sequence (BEDGRAPH format)"/>
60 <param name="readsLabel" type="text" size="20" value="" label="Label of the reads" optional="True"/>
61 </repeat>
62 </when>
63 </conditional>
64 <param name="strandness" type="select" label="Select the strandness of your mapped reads dataset">
65 <option value="unstranded" selected="true">Unstranded (reads will match genomic features on both forward and reverse strands of the annotated sequence)</option>
66 <option value="forward">Forward (reads will match only genomic features on the forward strand of the annotated sequence)</option>
67 <option value="reverse">Reverse (reads will match only genomic features on the reverse strand of the annotated sequence)</option>
68 </param>
69 </section>
70
71 <section name="outputFiles" title="OUTPUT FILES: Choose the output files" expanded="False">
72 <param name="plot" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Categories and Biotypes Histograms" help="Plot the nucleotides distribution of the reads per genomic categories and biotypes"/>
73 <param name="countFile" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Categories Count File" help="Edit the exact count of nucleotides in the reads per genomic categories and biotypes"/>
74 <param name="index" type="boolean" truevalue="True" falsevalue="False" checked="False" label ="Indexes" help="Print the resulting stranded and unstranded indexes from the gtf input file (useful if you plan to run ALFA again with this annotated sequence)"/>
75 </section>
76
77 <section name="outputOptions" title="ADVANCED OPTIONS" expanded="False">
78 <param name="categoriesDepth" type="select" label="Categories to Display">
79 <option value="1">gene | intergenic</option>
80 <option value="2">exon | intron | intergenic</option>
81 <option value="3" selected="true">5’-UTR | CDS | 3’-UTR | intron | intergenic</option>
82 <option value="4">5’-UTR | start_codon | CDS | stop_codon | 3’-UTR | intron | intergenic</option>
83 </param>
84 <param name="plotFormat" type="select" label="Plot Options: Select graph format" help="Ignore if you did not choose the histograms output file">
85 <option value="pdf" selected="true">pdf</option>
86 <option value="svg">svg</option>
87 <option value="png">png</option>
88 </param>
89 <conditional name="plotThreshold">
90 <param name="plotThresholdChoice" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Plot Options: Modify y axis range of the normalized counts of bio-features" help="Ignore if you did not choose the histograms output file"/>
91 <when value="True">
92 <param name="yMin" type="float" value="-2.0" label="y min"/>
93 <param name="yMax" type="float" value="2.0" label="y max"/>
94 </when>
95 </conditional>
96 </section>
97 </inputs>
98
99 <outputs>
100 <data name="logReport" format="txt" label="${projectName}-Log Report"/>
101 <data name="outputPdf" format="pdf" label="${projectName}-BioFeatures Distribution">
102 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'pdf'</filter>
103 </data>
104 <data name="outputCategoriesPng" format="png" label="${projectName}-Categories Distribution">
105 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'png'</filter>
106 </data>
107 <data name="outputBiotypesPng" format="png" label="${projectName}-Biotypes Distribution">
108 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'png'</filter>
109 </data>
110 <data name="outputCategoriesSvg" format="svg" label="${projectName}-Categories Distribution">
111 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'svg'</filter>
112 </data>
113 <data name="outputBiotypesSvg" format="svg" label="${projectName}-Biotypes Distribution">
114 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'svg'</filter>
115 </data>
116 <data name="outputCountFile" format="txt" label="${projectName}-Categories Count">
117 <filter>outputFiles['countFile'] is True</filter>
118 </data>
119 <data name="outputStrandedIndex" format="txt" label="${projectName}-Stranded Index">
120 <filter>outputFiles['index'] is True</filter>
121 </data>
122 <data name="outputUnstrandedIndex" format="txt" label="${projectName}-Unstranded Index">
123 <filter>outputFiles['index'] is True</filter>
124 </data>
125 </outputs>
126
127 <configfiles>
128 <configfile name="ALFA_config">
129 projectName=$projectName
130
131 ##__INPUT 1__##
132 annotationSource=$annotation.annotationSource['annotationSourceSelection']
133 #if str ( $annotation.annotationSource['annotationSourceSelection'] ) == "index"
134 annotationFile=None
135 strandedIndex=$annotation.annotationSource['strandedIndex']
136 unstrandedIndex=$annotation.annotationSource['unstrandedIndex']
137 #else if str ( $annotation.annotationSource['annotationSourceSelection'] ) == "built_in_index"
138 annotationFile=None
139 built_in_index_prefix=$annotation.annotationSource.built_in_index_prefix.fields.prefix
140 #else
141 annotationFile=$annotation.annotationSource['annotationFile']
142 strandedIndex=None
143 unstrandedIndex=None
144 #end if
145
146 ##__INPUT 2__##
147 readsType=$reads.readsType['readsTypeSelection']
148 #for $i, $r in enumerate ( $reads.readsType['readsList'] )
149 readsFile[$i]=$r.readsFile
150 readsLabel[$i]=$r.readsLabel
151 #end for
152 strandness=$reads['strandness']
153
154 ##__OUTPUT FILES__##
155 plotChoice=$outputFiles['plot']
156 countFileChoice=$outputFiles['countFile']
157 indexChoice=$outputFiles['index']
158
159 outputPdf=$outputPdf
160 outputCategoriesPng=$outputCategoriesPng
161 outputBiotypesPng=$outputBiotypesPng
162 outputCategoriesSvg=$outputCategoriesSvg
163 outputBiotypesSvg=$outputBiotypesSvg
164 outputCountFile=$outputCountFile
165 outputStrandedIndex=$outputStrandedIndex
166 outputUnstrandedIndex=$outputUnstrandedIndex
167
168 ##__OUTPUT OPTIONS__##
169 categoriesDepth=$outputOptions['categoriesDepth']
170 plotFormat=$outputOptions['plotFormat']
171 plotThresholdChoice=$outputOptions.plotThreshold['plotThresholdChoice']
172 #if str ( $outputOptions.plotThreshold['plotThresholdChoice'] ) == "True"
173 yMin=$outputOptions.plotThreshold.yMin
174 yMax=$outputOptions.plotThreshold.yMax
175 #else
176 yMin=None
177 yMax=None
178 #end if
179 </configfile>
180 </configfiles>
181
182 <tests>
183 <test>
184 <param name="alfa_toy" />
185 <section name="annotation">
186 <conditional name="annotationSource">
187 <param name="annotationSourceSelection" value="personal_gtf" />
188 <param name="annotationFile" value="alfa_toy.gtf" ftype="gtf" />
189 </conditional>
190 </section>
191 <section name="reads">
192 <conditional name="readsType">
193 <param name="readsTypeSelection" value="bam" />
194 <repeat name="readsList">
195 <param name="readsFile" value="alfa_toy.bam" ftype="bam" />
196 <param name="readsLabel" value="alfa_toy" />
197 </repeat>
198 <param name="strandness" value="unstranded" />
199 </conditional>
200 </section>
201 <section name="outputFiles">
202 <param name="plot" value="True" />
203 <param name="countFile" value="True" />
204 <param name="index" value="True" />
205 </section>
206 <section name="outputOptions">
207 <param name="categoriesDepth" value="3" />
208 <param name="plotFormat" value="pdf" />
209 <conditional name="plotThreshold">
210 <param name="plotThresholdChoice" value="False" />
211 </conditional>
212 </section>
213 <output name="outputPdf" file="alfa_toy-Biofeatures Distribution.pdf" ftype="pdf" />
214 <output name="outputCountFile" file="alfa_toy.categories_count" ftype="txt" />
215 <output name="outputStrandedIndex" file="alfa_toy.stranded.index" ftype="txt" />
216 <output name="outputUnstrandedIndex" file="alfa_toy.unstranded.index" ftype="txt" />
217 <assert_stdout>
218 <has_text text="### End of the program" />
219 </assert_stdout>
220 </test>
221 </tests>
222
223 <help>
224 **ALFA acronym**
225
226 - Annotation.Landscape.For.Aligned reads
227
228 ----
229
230 **What it does**
231
232 | ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s).
233 |
234 | Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism.
235
236 ----
237
238 **Official documentation of the tool**
239
240
241 - https://github.com/biocompibens/ALFA
242
243 ----
244
245 **Detailed example**
246
247 - https://github.com/biocompibens/ALFA#detailed-example
248
249 ----
250
251 **Nota Bene**
252
253 1. **Input 1: Annotation File**
254
255
256 | ALFA requires as first input an annotation file (sequence, genome...) in gtf format in order to generate indexes that will be used in a 2nd step of the program.
257 | Indexes are files which list all the coordinates of all categories (stop codon, 5'-UTR, CDS, intergenic...) and biotypes (protein coding genes, miRNA, tRNA, ...) encountered in the annotated sequence.
258
259 .. class:: warningmark
260
261 Gtf File must be sorted.
262
263 .. class:: infomark
264
265 Generation of indexes from an annotation file may be time consuming (i.e ~10min for the human genome). Thus, ALFA allows the user to submit directly indexes generated in previous runs.
266
267 2. **Input 2: Reads**
268
269 | ALFA requires as second input a single or a set of mapped reads file(s) in either bam or bedgraph format. These files are required to intersect the coordinates of the mapped reads with the associated categories and biotypes on the annotated sequence.
270 | The strandness option determines which strand of the annotated sequence will be taken into account during this intersection.
271
272 .. class:: warningmark
273
274 Bam or Bedgraph file(s) must be sorted.
275
276 <!--
277 .. class:: warningmark
278
279 For oriented reads (either matching the forward strand or the reverse strand), 'forward' or 'reverse' strandness must be selected.
280
281 .. class:: warningmark
282
283 For paired-end or non-oriented reads, 'unstranded' strandness must be selected.
284 -->
285
286 3. **Output files**
287
288 | The result of the intersection is a count file displaying the count of nucleotides in the reads for each genomic categories and biotypes. From this count file, plots of the raw and normalized distributions of the reads among these categories are generated.
289 | In the output files section, the user can choose what kind of files he desires as ALFA output. Categories Count File and Plots are proposed by default.
290
291 .. class:: infomark
292
293 The user can also select the 'indexes' option as output. This option is interesting if you plan to run ALFA again with the same submitted annotation file. *See Nota Bene/Input 1: Annotation File for more information.*
294
295
296 - `How the plots look like`_
297
298 .. _How the plots look like: https://github.com/biocompibens/ALFA#plots
299
300 - `How they are generated`_
301
302 .. _How they are generated: https://github.com/biocompibens/ALFA#detailed-example
303
304 ----
305
306 **ALFA Developpers**
307
308 | Benoît Noël and Mathieu Bahin: *compbio team, Institut de Biologie de l'Ecole Normale Supérieure de Paris*
309
310
311 </help>
312
313 <citations>
314 <citation type="bibtex">@MISC{
315 author="Benoît Noël and Mathieu Bahin"
316 title="ALFA: Annotation Landscape For Aligned reads"
317 crossref="https://github.com/biocompibens/ALFA"
318 institution="Institut de Biologie de l'Ecole Normale Supérieure de Paris"
319 }
320 </citation>
321 </citations>
322 </tool>