comparison falco.xml @ 2:eee1a2f6abd8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/falco commit e1af25e42839c772b023f700210013658e42dc15
author iuc
date Sun, 30 Jun 2024 11:28:30 +0000
parents da336e3ead38
children babbcf02d35c
comparison
equal deleted inserted replaced
1:da336e3ead38 2:eee1a2f6abd8
1 <tool id="falco" name="Falco" version="1.2.2+galaxy0" profile="21.05"> 1 <tool id="falco" name="Falco" version="1.2.2+galaxy1" profile="21.05">
2 <description>A high throughput sequence QC analysis tool</description> 2 <description>An alternative, more performant implementation of FastQC for high throughput sequence quality control</description>
3 <xrefs> 3 <xrefs>
4 <xref type="bio.tools">falco</xref> 4 <xref type="bio.tools">falco</xref>
5 </xrefs> 5 </xrefs>
6 <requirements> 6 <requirements>
7 <requirement type="package" version="1.2.2">falco</requirement> 7 <requirement type="package" version="1.2.2">falco</requirement>
8 </requirements> 8 </requirements>
9 <command detect_errors="aggressive"><![CDATA[ 9 <command detect_errors="aggressive"><![CDATA[
10 #import re 10 #import re
11 #set input_name = re.sub('[^\w\-\s]', '_', str($input_file.element_identifier)) 11 #set input_name_sl = re.sub('[^\w\-\s]', '_', str($input_file.element_identifier))
12
13 #if $input_file.ext.endswith('.gz'):
14 #set input_file_sl = $input_name + '.gz'
15 #elif $input_file.ext.endswith('.bz2'):
16 #set input_file_sl = $input_name + '.bz2'
17 #else
18 #set input_file_sl = $input_name
19 #end if
20 12
21 #if 'bam' in $input_file.ext: 13 #if 'bam' in $input_file.ext:
22 #set format = 'bam' 14 #set format = 'bam'
23 #elif 'sam' in $input_file.ext: 15 #elif 'sam' in $input_file.ext:
24 #set format = 'sam' 16 #set format = 'sam'
26 #set format = 'fastq.gz' 18 #set format = 'fastq.gz'
27 #else 19 #else
28 #set format = 'fastq' 20 #set format = 'fastq'
29 #end if 21 #end if
30 22
31 ln -s '${input_file}' '${input_file_sl}' && 23 ln -s '${input_file}' '${input_name_sl}' &&
32 falco 24 falco
33 #if $contaminants: 25 #if $contaminants:
34 --contaminants '${contaminants}' 26 --contaminants '${contaminants}'
35 #end if 27 #end if
36 28
41 #if $limits.dataset and str($limits) > '' 33 #if $limits.dataset and str($limits) > ''
42 --limits '${limits}' 34 --limits '${limits}'
43 #end if 35 #end if
44 --threads \${GALAXY_SLOTS:-2} 36 --threads \${GALAXY_SLOTS:-2}
45 --quiet 37 --quiet
46 --extract
47 ## #if $min_length: 38 ## #if $min_length:
48 ## --min_length $min_length 39 ## --min_length $min_length
49 ## #end if 40 ## #end if
50 $nogroup 41 $nogroup
51 ## --kmers $kmers 42 ## --kmers $kmers
52 -f '${format}' 43 -f '${format}'
53 '${input_file_sl}' 44 '${input_name_sl}'
54 -subsample $subsample 45 #if $subsample > 1:
46 -subsample $subsample
47 #end if
55 $bisulfite 48 $bisulfite
56 $reverse_complement 49 $reverse_complement
57 50 $generate_summary
58 ]]></command> 51 ]]></command>
59 <inputs> 52 <inputs>
60 <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="input_file" type="data" label="Raw read data from your current history"/> 53 <param format="fastq,fastq.gz,bam,sam" name="input_file" type="data" label="Raw read data from your current history"/>
61 <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" help="tab delimited file with 2 columns: name and sequence. For example: Illumina Small RNA RT Primer&#x9;CAAGCAGAAGACGGCATACGA"/> 54 <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" help="tab delimited file with 2 columns: name and sequence. For example: Illumina Small RNA RT Primer&#x9;CAAGCAGAAGACGGCATACGA"/>
62 <param argument="--adapters" type="data" format="tabular" optional="true" label="Adapter list" help="List of adapters adapter sequences which will be explicity searched against the library. It should be a tab-delimited file with 2 columns: name and sequence."/> 55 <param argument="--adapters" type="data" format="tabular" optional="true" label="Adapter list" help="List of adapters adapter sequences which will be explicity searched against the library. It should be a tab-delimited file with 2 columns: name and sequence."/>
63 <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file" help="a file that specifies which submodules are to be executed (default=all) and also specifies the thresholds for the each submodules warning parameter."/> 56 <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file" help="a file that specifies which submodules are to be executed (default=all) and also specifies the thresholds for the each submodules warning parameter."/>
64 <param argument="--nogroup" type="boolean" truevalue="--nogroup" falsevalue="" checked="False" label="Disable grouping of bases for reads &gt;50bp" help=" Using this option, your plots may end up a ridiculous size. You have been warned!"/> 57 <param argument="--nogroup" type="boolean" truevalue="--nogroup" falsevalue="" checked="False" label="Disable grouping of bases for reads &gt;50bp" help=" Using this option, your plots may end up a ridiculous size. You have been warned!"/>
65 <!-- Not implemented in falco yet <param argument="-min_length" type="integer" value="" optional="true" label="Lower limit on the length of the sequence to be shown in the report" help=" [NOT YET IMPLEMENTED IN FALCO]. Sets an artificial lower limit on the length of the sequence to be shown in the report. As long as you set this to a value greater or equal to your longest read length then this will be the sequence length used to create your read groups. This can be useful for making directly comaparable statistics from datasets with somewhat variable read length."/> --> 58 <!-- Not implemented in falco yet <param argument="-min_length" type="integer" value="" optional="true" label="Lower limit on the length of the sequence to be shown in the report" help=" [NOT YET IMPLEMENTED IN FALCO]. Sets an artificial lower limit on the length of the sequence to be shown in the report. As long as you set this to a value greater or equal to your longest read length then this will be the sequence length used to create your read groups. This can be useful for making directly comaparable statistics from datasets with somewhat variable read length."/> -->
66 <!-- Ignored by falco and always set to 7 <param argument="-kmers" type="integer" value="7" min="2" max="10" label="Length of Kmer to look for" help="IGNORED BY FALCO AND ALWAYS SET TO 7. Specifies the length of Kmer to look for in the Kmer content module. Specified Kmer length must be between 2 and 10. Default length is 7 if not specified." /> --> 59 <!-- Ignored by falco and always set to 7 <param argument="-kmers" type="integer" value="7" min="2" max="10" label="Length of Kmer to look for" help="IGNORED BY FALCO AND ALWAYS SET TO 7. Specifies the length of Kmer to look for in the Kmer content module. Specified Kmer length must be between 2 and 10. Default length is 7 if not specified." /> -->
67 <param argument="-subsample" type="integer" value="1" min="1" optional="true" label="Subsampling Factor" help="This makes falco faster (but possibly less accurate) by only processing reads that are multiple of this value (using 0-based indexing to number reads)"/> 60 <param argument="-subsample" type="integer" value="1" min="1" label="Subsampling Factor" help="This makes falco faster (but possibly less accurate) by only processing reads that are multiple of this value (using 0-based indexing to number reads)"/>
68 <param argument="-bisulfite" type="boolean" truevalue="-bisulfite" falsevalue="" checked="False" label="Bisulfite Sequencing" help="This parameter indicates whether the reads are from whole genome bisulfite sequencing. When enabled, Falco will account for the expected increase in Ts and decrease in Cs in the base content."/> 61 <param argument="-bisulfite" type="boolean" truevalue="-bisulfite" falsevalue="" checked="False" label="Bisulfite Sequencing" help="This parameter indicates whether the reads are from whole genome bisulfite sequencing. When enabled, Falco will account for the expected increase in Ts and decrease in Cs in the base content."/>
69 <param argument="reverse_complement" type="boolean" truevalue="-reverse-complement" falsevalue="" checked="False" label="Reverse Complement" help="This parameter specifies whether the input sequences are reverse-complemented. When enabled, all modules in Falco will be tested by swapping A/T and C/G."/> 62 <param argument="reverse_complement" type="boolean" truevalue="-reverse-complement" falsevalue="" checked="False" label="Reverse Complement" help="This parameter specifies whether the input sequences are reverse-complemented. When enabled, all modules in Falco will be tested by swapping A/T and C/G."/>
63 <param name="generate_summary" type="boolean" truevalue="" falsevalue="-skip-summary" checked="False" label="Generate summary output of QC test results" />
70 </inputs> 64 </inputs>
71 <outputs> 65 <outputs>
72 <data format="html" name="html_file" from_work_dir="fastqc_report.html" label="${tool.name} on ${on_string}: Webpage"/> 66 <data format="html" name="html_file" from_work_dir="fastqc_report.html" label="${tool.name} on ${on_string}: Webpage"/>
73 <data format="txt" name="text_file" from_work_dir="fastqc_data.txt" label="${tool.name} on ${on_string}: RawData"/> 67 <data format="txt" name="text_file" from_work_dir="fastqc_data.txt" label="${tool.name} on ${on_string}: RawData"/>
74 <data format="txt" name="summary_file" from_work_dir="summary.txt" label="${tool.name} on ${on_string}: SummaryData"/> 68 <data format="txt" name="summary_file" from_work_dir="summary.txt" label="${tool.name} on ${on_string}: SummaryData">
69 <filter>generate_summary</filter>
70 </data>
75 </outputs> 71 </outputs>
76 <tests> 72 <tests>
77 <test> 73 <test expect_num_outputs="2">
78 <param name="input_file" value="1000trimmed.fastq"/> 74 <param name="input_file" value="1000trimmed.fastq"/>
79 <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="2"/> 75 <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="2"/>
80 <output name="text_file" file="fastqc_data.txt" ftype="txt"/> 76 <output name="text_file" file="fastqc_data.txt" ftype="txt"/>
81 <output name="summary_file" file="summary.txt" ftype="txt"/> 77 </test>
82 </test> 78 <test expect_num_outputs="2">
83 <test>
84 <param name="input_file" value="1000trimmed.fastq"/> 79 <param name="input_file" value="1000trimmed.fastq"/>
85 <param name="contaminants" value="contaminant_list.txt" ftype="tabular"/> 80 <param name="contaminants" value="contaminant_list.txt" ftype="tabular"/>
86 <output name="html_file" file="fastqc_report_contaminants.html" ftype="html" lines_diff="2"/> 81 <output name="html_file" file="fastqc_report_contaminants.html" ftype="html" lines_diff="2"/>
87 <output name="text_file" file="fastqc_data_contaminants.txt" ftype="txt"/> 82 <output name="text_file" file="fastqc_data_contaminants.txt" ftype="txt"/>
88 <output name="summary_file" file="fastqc_data_contaminant_summary.txt" ftype="txt"/> 83 </test>
89 </test> 84 <test expect_num_outputs="2">
90 <test>
91 <param name="input_file" value="1000trimmed.fastq"/> 85 <param name="input_file" value="1000trimmed.fastq"/>
92 <param name="adapters" value="adapter_list.txt" ftype="tabular"/> 86 <param name="adapters" value="adapter_list.txt" ftype="tabular"/>
93 <output name="html_file" file="fastqc_report_adapters.html" ftype="html" lines_diff="2"/> 87 <output name="html_file" file="fastqc_report_adapters.html" ftype="html" lines_diff="2"/>
94 <output name="text_file" file="fastqc_data_adapters.txt" ftype="txt"/> 88 <output name="text_file" file="fastqc_data_adapters.txt" ftype="txt"/>
95 <output name="summary_file" file="fastqc_data_adapters_summary.txt" ftype="txt"/> 89 </test>
96 </test> 90 <test expect_num_outputs="2">
97 <test>
98 <param name="input_file" value="1000trimmed.fastq"/> 91 <param name="input_file" value="1000trimmed.fastq"/>
99 <param name="limits" value="limits.txt" ftype="txt"/> 92 <param name="limits" value="limits.txt" ftype="txt"/>
100 <output name="html_file" file="fastqc_report_customlimits.html" ftype="html" lines_diff="2"/> 93 <output name="html_file" file="fastqc_report_customlimits.html" ftype="html" lines_diff="2"/>
101 <output name="text_file" file="fastqc_data_customlimits.txt" ftype="txt"/> 94 <output name="text_file" file="fastqc_data_customlimits.txt" ftype="txt"/>
102 <output name="summary_file" file="fastqc_data_customlimits_summary.txt" ftype="txt"/> 95 </test>
103 </test> 96
104 <!-- ## This feature has not yet been implemented in Falco, but if it is, it may go uncommented in the future. 97 <!-- ## The kmers param is ignored in Falco and always set to 7. If this ever gets reconsidered, this test could be uncommented.
105 <test> 98 <test expect_num_outputs="2">
106 <param name="input_file" value="1000trimmed.fastq" ftype="fastq"/> 99 <param name="input_file" value="1000trimmed.fastq" ftype="fastq"/>
107 <param name="kmers" value="7"/> 100 <param name="kmers" value="7"/>
108 <param name="limits" value="limits.txt" ftype="txt"/> 101 <param name="limits" value="limits.txt" ftype="txt"/>
109 <output name="html_file" file="fastqc_report_kmer.html" ftype="html" lines_diff="2"/> 102 <output name="html_file" file="fastqc_report_kmer.html" ftype="html" lines_diff="2"/>
110 <output name="text_file" file="fastqc_data_kmer.txt" ftype="txt"/> 103 <output name="text_file" file="fastqc_data_kmer.txt" ftype="txt"/>
111 <output name="summary_file" file="fastqc_data_kmer_summary.txt" ftype="txt"/>
112 <assert_command> 104 <assert_command>
113 <has_text text="kmers 7"/> 105 <has_text text="kmers 7"/>
114 </assert_command> 106 </assert_command>
115 </test> 107 </test>
116 108 ## The min_length param is not yet implemented in Falco.
117 <test> ##This feature is ignored in Falco and always set to 7. If this will be considered, may go uncommented in the future" 109 Once it is, this test may be uncommented.
110 <test expect_num_outputs="2">
118 <param name="input_file" value="1000trimmed.fastq"/> 111 <param name="input_file" value="1000trimmed.fastq"/>
119 <param name="min_length" value="108"/> 112 <param name="min_length" value="108"/>
120 <output name="html_file" file="fastqc_report_min_length.html" ftype="html" lines_diff="2"/> 113 <output name="html_file" file="fastqc_report_min_length.html" ftype="html" lines_diff="2"/>
121 <output name="text_file" file="fastqc_data_min_length.txt" ftype="txt"/> 114 <output name="text_file" file="fastqc_data_min_length.txt" ftype="txt"/>
122 <output name="summary_file" file="fastqc_data_min_length_summary.txt" ftype="txt"/>
123 </test> --> 115 </test> -->
124 116
125 <test> 117 <test expect_num_outputs="3">
126 <param name="input_file" value="1000trimmed.fastq" ftype="fastq"/> 118 <param name="input_file" value="1000trimmed.fastq" ftype="fastq"/>
127 <param name="nogroup" value="--nogroup"/> 119 <param name="nogroup" value="--nogroup"/>
120 <param name="generate_summary" value="true"/>
128 <output name="html_file" file="fastqc_report_nogroup.html" ftype="html" lines_diff="2"/> 121 <output name="html_file" file="fastqc_report_nogroup.html" ftype="html" lines_diff="2"/>
129 <output name="text_file" file="fastqc_data_nogroup.txt" ftype="txt"/> 122 <output name="text_file" file="fastqc_data_nogroup.txt" ftype="txt"/>
130 <output name="summary_file" file="fastqc_data_nogroup_summary.txt" ftype="txt"/> 123 <output name="summary_file" file="fastqc_data_nogroup_summary.txt" ftype="txt"/>
131 <assert_command> 124 <assert_command>
132 <has_text text="--nogroup"/> 125 <has_text text="--nogroup"/>
133 </assert_command> 126 </assert_command>
134 </test> 127 </test>
135 <test> 128 <test expect_num_outputs="3">
136 <param name="input_file" value="1000trimmed.fastq"/> 129 <param name="input_file" value="1000trimmed.fastq"/>
137 <param name="subsample" value="10"/> 130 <param name="subsample" value="10"/>
131 <param name="generate_summary" value="true"/>
138 <output name="html_file" file="fastqc_report_subsample.html" ftype="html" lines_diff="2"/> 132 <output name="html_file" file="fastqc_report_subsample.html" ftype="html" lines_diff="2"/>
139 <output name="text_file" file="fastqc_report_subsample.txt" ftype="txt"/> 133 <output name="text_file" file="fastqc_report_subsample.txt" ftype="txt"/>
140 <output name="summary_file" file="fastqc_report_subsample_summary.txt" ftype="txt"/> 134 <output name="summary_file" file="fastqc_report_subsample_summary.txt" ftype="txt"/>
141 </test> 135 </test>
142 <test> 136 <test expect_num_outputs="3">
143 <param name="input_file" value="1000trimmed.fastq"/> 137 <param name="input_file" value="1000trimmed.fastq"/>
144 <param name="bisulfite" value="-bisulfite"/> 138 <param name="bisulfite" value="-bisulfite"/>
139 <param name="generate_summary" value="true"/>
145 <output name="html_file" file="fastqc_report_bisulfite.html" ftype="html" lines_diff="2"/> 140 <output name="html_file" file="fastqc_report_bisulfite.html" ftype="html" lines_diff="2"/>
146 <output name="text_file" file="fastqc_report_bisulfite.txt" ftype="txt"/> 141 <output name="text_file" file="fastqc_report_bisulfite.txt" ftype="txt"/>
147 <output name="summary_file" file="fastqc_report_bisulfite_summary.txt" ftype="txt"/> 142 <output name="summary_file" file="fastqc_report_bisulfite_summary.txt" ftype="txt"/>
148 </test> 143 </test>
149 <test> 144 <test expect_num_outputs="3">
150 <param name="input_file" value="1000trimmed.fastq"/> 145 <param name="input_file" value="1000trimmed.fastq"/>
151 <param name="reverse_complement" value="-reverse-complement"/> 146 <param name="reverse_complement" value="-reverse-complement"/>
147 <param name="generate_summary" value="true"/>
152 <output name="html_file" file="fastqc_report_reverse_complement.html" ftype="html" lines_diff="2"/> 148 <output name="html_file" file="fastqc_report_reverse_complement.html" ftype="html" lines_diff="2"/>
153 <output name="text_file" file="fastqc_report_reverse_complement.txt" ftype="txt"/> 149 <output name="text_file" file="fastqc_report_reverse_complement.txt" ftype="txt"/>
154 <output name="summary_file" file="fastqc_report_reverse_complement_summary.txt" ftype="txt"/> 150 <output name="summary_file" file="fastqc_report_reverse_complement_summary.txt" ftype="txt"/>
155 </test> 151 </test>
156 </tests> 152 </tests>
157 <help><![CDATA[ 153 <help><![CDATA[
158 .. class:: infomark 154 **What it does**
159 155
160 **Purpose** 156 Falco_ is a high-speed emulation of the popular FastQC software for quality control of sequencing data.
161 157
162 Falco is an emulation of the popular FastQC software to check large sequencing reads for common problems. 158 💚️ With its superior performance Falco saves computational resources and gives you back results faster than FastQC.
163 159
164 The main functions of Falco are: 160 We recommend it for most use cases (but see below for exceptions). 💚️
161
162 The main functions of Falco are very similar to those of FastQC:
165 163
166 - Import of data from BAM, SAM or FastQ/FastQ.gz files (any variant), 164 - Import of data from BAM, SAM or FastQ/FastQ.gz files (any variant),
167 - Providing a quick overview to tell you in which areas there may be problems 165 - Providing a quick overview to tell you in which areas there may be problems
168 - Summary graphs and tables to quickly assess your data 166 - Summary graphs and tables to quickly assess your data
169 - Export of results to an HTML based permanent report 167 - Export of results to an HTML based permanent report
170 - Offline operation to allow automated generation of reports without running the interactive application 168 - Offline operation to allow automated generation of reports without running the interactive application
171 169
170 .. class:: infomark
171
172 The plain text report generated by Falco can be used as a "FastQC" report in MultiQC and its data is very similar though not 100% identical to that generated by FastQC on the same inputs.
173
174 .. class:: Warning mark
175
176 In the following situations, FastQC is still a better solution than this version of Falco:
177
178 - your input is bz2-compressed fastq
179
180 Falco doesn't currently support fastq.bz2 as input format meaning Galaxy has to perform a relatively slow format conversion before running the tool, which together makes the analysis slower than with FastQC.
181
182 - you are interested in PolyA and PolyG statistics in the Adapter Content section of the quality report
183
184 Falco doesn't currently calculate statistics for these "Adapters" by default.
185
186 - your input consists of *mapped* reads in SAM/BAM format
187
188 Due to a bug in the current version of Falco, reads mapped to the reverse strand of the reference genome are not handled correctly and reported metrics are wrong!
189
172 ----- 190 -----
173 191
174 .. class:: infomark
175
176 **Inputs and outputs** 192 **Inputs and outputs**
177 193
178 Falco_ is the best place to look for documentation - it's very good. 194 The Falco_ development repo includes very good documentation.
179 A summary follows below for those in a tearing hurry. 195 A summary of it follows below for those in a tearing hurry.
180 196
181 This wrapper will accept a Galaxy fastq, fastq.gz, sam or bam as the input read file to check. 197 This wrapper will accept a Galaxy fastq, fastq.gz, sam or bam as the input read file to check.
182 It will also take an optional file containing a list of contaminants information, in the form of 198 It will also take an optional file containing a list of contaminants information, in the form of
183 a tab-delimited file with 2 columns, name and sequence. As another option the tool takes a custom 199 a tab-delimited file with 2 columns, name and sequence. As another option the tool takes a custom
184 limits.txt file that allows setting the warning thresholds for the different modules and also specifies 200 limits.txt file that allows setting the warning thresholds for the different modules and also specifies
198 - Overrepresented sequences 214 - Overrepresented sequences
199 - Adapter Content 215 - Adapter Content
200 216
201 All except Basic Statistics and Overrepresented sequences are plots. 217 All except Basic Statistics and Overrepresented sequences are plots.
202 .. _Falco: https://github.com/smithlabcode/falco/ 218 .. _Falco: https://github.com/smithlabcode/falco/
203 .. _Picard-tools: https://broadinstitute.github.io/picard/
204 ]]></help> 219 ]]></help>
205 <citations> 220 <citations>
206 <citation type="bibtex"> 221 <citation type="doi">10.12688/f1000research.21142.2</citation>
207 @article{deSenaBrandine2021,
208 author = {de Sena Brandine, Gabriel and Smith, Andrew D.},
209 title = {Falco: high-speed FastQC emulation for quality control of sequencing data},
210 journal = {F1000Research},
211 year = {2021},
212 volume = {8},
213 pages = {1874},
214 url = {https://doi.org/10.12688/f1000research.21142.2},
215 doi = {10.12688/f1000research.21142.2},
216 note = {Version 2; peer review: 2 approved},
217 }
218
219 </citation>
220 </citations> 222 </citations>
221 </tool> 223 </tool>