comparison falco.xml @ 0:e462044ece67 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/falco commit 8593dacde03b50726e9ca12fa15e4a104531708c
author iuc
date Tue, 04 Jun 2024 14:14:49 +0000
parents
children da336e3ead38
comparison
equal deleted inserted replaced
-1:000000000000 0:e462044ece67
1 <tool id="falco" name="Falco" version="1.2.2+galaxy0" profile="21.05">
2 <description>A high throughput sequence QC analysis tool</description>
3 <xrefs>
4 <xref type="bio.tools">falco</xref>
5 </xrefs>
6 <requirements>
7 <requirement type="package" version="1.2.2">falco</requirement>
8 </requirements>
9 <command detect_errors="aggressive"><![CDATA[
10 #import re
11 #set input_name = re.sub('[^\w\-\s]', '_', str($input_file.element_identifier))
12
13 #if $input_file.ext.endswith('.gz'):
14 #set input_file_sl = $input_name + '.gz'
15 #elif $input_file.ext.endswith('.bz2'):
16 #set input_file_sl = $input_name + '.bz2'
17 #else
18 #set input_file_sl = $input_name
19 #end if
20
21 #if 'bam' in $input_file.ext:
22 #set format = 'bam'
23 #elif 'sam' in $input_file.ext:
24 #set format = 'sam'
25 #else
26 #set format = 'fastq'
27 #end if
28
29 ln -s '${input_file}' '${input_file_sl}' &&
30 falco
31 #if $contaminants:
32 --contaminants '${contaminants}'
33 #end if
34
35 #if $adapters.dataset and str($adapters) > ''
36 --adapters '${adapters}'
37 #end if
38
39 #if $limits.dataset and str($limits) > ''
40 --limits '${limits}'
41 #end if
42 --threads \${GALAXY_SLOTS:-2}
43 --quiet
44 --extract
45 ## #if $min_length:
46 ## --min_length $min_length
47 ## #end if
48 $nogroup
49 ## --kmers $kmers
50 -f '${format}'
51 '${input_file_sl}'
52 -subsample $subsample
53 $bisulfite
54 $reverse_complement
55
56 ]]></command>
57 <inputs>
58 <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="input_file" type="data" label="Raw read data from your current history"/>
59 <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" help="tab delimited file with 2 columns: name and sequence. For example: Illumina Small RNA RT Primer&#x9;CAAGCAGAAGACGGCATACGA"/>
60 <param argument="--adapters" type="data" format="tabular" optional="true" label="Adapter list" help="List of adapters adapter sequences which will be explicity searched against the library. It should be a tab-delimited file with 2 columns: name and sequence."/>
61 <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file" help="a file that specifies which submodules are to be executed (default=all) and also specifies the thresholds for the each submodules warning parameter."/>
62 <param argument="--nogroup" type="boolean" truevalue="--nogroup" falsevalue="" checked="False" label="Disable grouping of bases for reads &gt;50bp" help=" Using this option, your plots may end up a ridiculous size. You have been warned!"/>
63 <!-- Not implemented in falco yet <param argument="-min_length" type="integer" value="" optional="true" label="Lower limit on the length of the sequence to be shown in the report" help=" [NOT YET IMPLEMENTED IN FALCO]. Sets an artificial lower limit on the length of the sequence to be shown in the report. As long as you set this to a value greater or equal to your longest read length then this will be the sequence length used to create your read groups. This can be useful for making directly comaparable statistics from datasets with somewhat variable read length."/> -->
64 <!-- Ignored by falco and always set to 7 <param argument="-kmers" type="integer" value="7" min="2" max="10" label="Length of Kmer to look for" help="IGNORED BY FALCO AND ALWAYS SET TO 7. Specifies the length of Kmer to look for in the Kmer content module. Specified Kmer length must be between 2 and 10. Default length is 7 if not specified." /> -->
65 <param argument="-subsample" type="integer" value="1" min="1" optional="true" label="Subsampling Factor" help="This makes falco faster (but possibly less accurate) by only processing reads that are multiple of this value (using 0-based indexing to number reads)"/>
66 <param argument="-bisulfite" type="boolean" truevalue="-bisulfite" falsevalue="" checked="False" label="Bisulfite Sequencing" help="This parameter indicates whether the reads are from whole genome bisulfite sequencing. When enabled, Falco will account for the expected increase in Ts and decrease in Cs in the base content."/>
67 <param argument="reverse_complement" type="boolean" truevalue="-reverse-complement" falsevalue="" checked="False" label="Reverse Complement" help="This parameter specifies whether the input sequences are reverse-complemented. When enabled, all modules in Falco will be tested by swapping A/T and C/G."/>
68 </inputs>
69 <outputs>
70 <data format="html" name="html_file" from_work_dir="fastqc_report.html" label="${tool.name} on ${on_string}: Webpage"/>
71 <data format="txt" name="text_file" from_work_dir="fastqc_data.txt" label="${tool.name} on ${on_string}: RawData"/>
72 <data format="txt" name="summary_file" from_work_dir="summary.txt" label="${tool.name} on ${on_string}: SummaryData"/>
73 </outputs>
74 <tests>
75 <test>
76 <param name="input_file" value="1000trimmed.fastq"/>
77 <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="2"/>
78 <output name="text_file" file="fastqc_data.txt" ftype="txt"/>
79 <output name="summary_file" file="summary.txt" ftype="txt"/>
80 </test>
81 <test>
82 <param name="input_file" value="1000trimmed.fastq"/>
83 <param name="contaminants" value="contaminant_list.txt" ftype="tabular"/>
84 <output name="html_file" file="fastqc_report_contaminants.html" ftype="html" lines_diff="2"/>
85 <output name="text_file" file="fastqc_data_contaminants.txt" ftype="txt"/>
86 <output name="summary_file" file="fastqc_data_contaminant_summary.txt" ftype="txt"/>
87 </test>
88 <test>
89 <param name="input_file" value="1000trimmed.fastq"/>
90 <param name="adapters" value="adapter_list.txt" ftype="tabular"/>
91 <output name="html_file" file="fastqc_report_adapters.html" ftype="html" lines_diff="2"/>
92 <output name="text_file" file="fastqc_data_adapters.txt" ftype="txt"/>
93 <output name="summary_file" file="fastqc_data_adapters_summary.txt" ftype="txt"/>
94 </test>
95 <test>
96 <param name="input_file" value="1000trimmed.fastq"/>
97 <param name="limits" value="limits.txt" ftype="txt"/>
98 <output name="html_file" file="fastqc_report_customlimits.html" ftype="html" lines_diff="2"/>
99 <output name="text_file" file="fastqc_data_customlimits.txt" ftype="txt"/>
100 <output name="summary_file" file="fastqc_data_customlimits_summary.txt" ftype="txt"/>
101 </test>
102 <!-- ## This feature has not yet been implemented in Falco, but if it is, it may go uncommented in the future.
103 <test>
104 <param name="input_file" value="1000trimmed.fastq" ftype="fastq"/>
105 <param name="kmers" value="7"/>
106 <param name="limits" value="limits.txt" ftype="txt"/>
107 <output name="html_file" file="fastqc_report_kmer.html" ftype="html" lines_diff="2"/>
108 <output name="text_file" file="fastqc_data_kmer.txt" ftype="txt"/>
109 <output name="summary_file" file="fastqc_data_kmer_summary.txt" ftype="txt"/>
110 <assert_command>
111 <has_text text="kmers 7"/>
112 </assert_command>
113 </test>
114
115 <test> ##This feature is ignored in Falco and always set to 7. If this will be considered, may go uncommented in the future"
116 <param name="input_file" value="1000trimmed.fastq"/>
117 <param name="min_length" value="108"/>
118 <output name="html_file" file="fastqc_report_min_length.html" ftype="html" lines_diff="2"/>
119 <output name="text_file" file="fastqc_data_min_length.txt" ftype="txt"/>
120 <output name="summary_file" file="fastqc_data_min_length_summary.txt" ftype="txt"/>
121 </test> -->
122
123 <test>
124 <param name="input_file" value="1000trimmed.fastq" ftype="fastq"/>
125 <param name="nogroup" value="--nogroup"/>
126 <output name="html_file" file="fastqc_report_nogroup.html" ftype="html" lines_diff="2"/>
127 <output name="text_file" file="fastqc_data_nogroup.txt" ftype="txt"/>
128 <output name="summary_file" file="fastqc_data_nogroup_summary.txt" ftype="txt"/>
129 <assert_command>
130 <has_text text="--nogroup"/>
131 </assert_command>
132 </test>
133 <test>
134 <param name="input_file" value="1000trimmed.fastq"/>
135 <param name="subsample" value="10"/>
136 <output name="html_file" file="fastqc_report_subsample.html" ftype="html" lines_diff="2"/>
137 <output name="text_file" file="fastqc_report_subsample.txt" ftype="txt"/>
138 <output name="summary_file" file="fastqc_report_subsample_summary.txt" ftype="txt"/>
139 </test>
140 <test>
141 <param name="input_file" value="1000trimmed.fastq"/>
142 <param name="bisulfite" value="-bisulfite"/>
143 <output name="html_file" file="fastqc_report_bisulfite.html" ftype="html" lines_diff="2"/>
144 <output name="text_file" file="fastqc_report_bisulfite.txt" ftype="txt"/>
145 <output name="summary_file" file="fastqc_report_bisulfite_summary.txt" ftype="txt"/>
146 </test>
147 <test>
148 <param name="input_file" value="1000trimmed.fastq"/>
149 <param name="reverse_complement" value="-reverse-complement"/>
150 <output name="html_file" file="fastqc_report_reverse_complement.html" ftype="html" lines_diff="2"/>
151 <output name="text_file" file="fastqc_report_reverse_complement.txt" ftype="txt"/>
152 <output name="summary_file" file="fastqc_report_reverse_complement_summary.txt" ftype="txt"/>
153 </test>
154 </tests>
155 <help><![CDATA[
156 .. class:: infomark
157
158 **Purpose**
159
160 Falco is an emulation of the popular FastQC software to check large sequencing reads for common problems.
161
162 The main functions of Falco are:
163
164 - Import of data from BAM, SAM or FastQ/FastQ.gz files (any variant),
165 - Providing a quick overview to tell you in which areas there may be problems
166 - Summary graphs and tables to quickly assess your data
167 - Export of results to an HTML based permanent report
168 - Offline operation to allow automated generation of reports without running the interactive application
169
170 -----
171
172 .. class:: infomark
173
174 **Inputs and outputs**
175
176 Falco_ is the best place to look for documentation - it's very good.
177 A summary follows below for those in a tearing hurry.
178
179 This wrapper will accept a Galaxy fastq, fastq.gz, sam or bam as the input read file to check.
180 It will also take an optional file containing a list of contaminants information, in the form of
181 a tab-delimited file with 2 columns, name and sequence. As another option the tool takes a custom
182 limits.txt file that allows setting the warning thresholds for the different modules and also specifies
183 which modules to include in the output.
184
185 The tool produces a basic text and a HTML output file that contain all of the results, including the following:
186
187 - Basic Statistics
188 - Per base sequence quality
189 - Per sequence quality scores
190 - Per base sequence content
191 - Per base GC content
192 - Per sequence GC content
193 - Per base N content
194 - Sequence Length Distribution
195 - Sequence Duplication Levels
196 - Overrepresented sequences
197 - Adapter Content
198
199 All except Basic Statistics and Overrepresented sequences are plots.
200 .. _Falco: https://github.com/smithlabcode/falco/
201 .. _Picard-tools: https://broadinstitute.github.io/picard/
202 ]]></help>
203 <citations>
204 <citation type="bibtex">
205 @article{deSenaBrandine2021,
206 author = {de Sena Brandine, Gabriel and Smith, Andrew D.},
207 title = {Falco: high-speed FastQC emulation for quality control of sequencing data},
208 journal = {F1000Research},
209 year = {2021},
210 volume = {8},
211 pages = {1874},
212 url = {https://doi.org/10.12688/f1000research.21142.2},
213 doi = {10.12688/f1000research.21142.2},
214 note = {Version 2; peer review: 2 approved},
215 }
216
217 </citation>
218 </citations>
219 </tool>