comparison PanExplorer.xml @ 14:5a5c9a6b047b draft

Uploaded
author dereeper
date Tue, 10 Dec 2024 16:20:53 +0000
parents 152d7c43478b
children dbde253606c5
comparison
equal deleted inserted replaced
13:152d7c43478b 14:5a5c9a6b047b
1 <tool id="PanExplorer" name="PanExplorer" version="1.0"> 1 <tool id="PanExplorer2" name="PanExplorer2" version="2.0">
2 <description> Bacterial pan-genome analysis </description> 2 <description> Bacterial pan-genome analysis </description>
3 <requirements> 3 <requirements>
4 <!-- 4 <!--
5 <requirement type="package" version="2.2.26">blast-legacy</requirement> 5 <requirement type="package" version="2.2.26">blast-legacy</requirement>
6 <requirement type="package" version="7.480">mafft</requirement> 6 <requirement type="package" version="7.480">mafft</requirement>
15 15
16 <command><![CDATA[ 16 <command><![CDATA[
17 17
18 export PANEX_PATH=${__tool_directory__}; 18 export PANEX_PATH=${__tool_directory__};
19 19
20 perl ${__tool_directory__}/Perl/generateConfig.pl '$private_genomes' '$input' config.yaml '$private_genomes_fasta'; 20
21 #if str($mode.mode) == "accessions":
22 perl ${__tool_directory__}/Perl/generateConfig.pl 'None' '$input' config.yaml 'None';
23 #else if str($mode.mode) == "genbanks":
24 perl ${__tool_directory__}/Perl/generateConfig.pl '$private_genomes' 'None' config.yaml 'None';
25 #else if str($mode.mode) == "fasta":
26 perl ${__tool_directory__}/Perl/generateConfig.pl '$private_genomes' 'None' config.yaml '$private_genomes_fasta';
27 #end if
21 28
22 cat config.yaml >$logfile; 29 cat config.yaml >$logfile;
23 30
24 31
25 32
72 #end if 79 #end if
73 ]]></command> 80 ]]></command>
74 81
75 82
76 <inputs> 83 <inputs>
77 <param name="input" type="text" multiple="true" label="List of genbank identifiers" help="Coma separated list (ex: CP000235.1,CP001079.1,CP001759.1,CP015994.2)"/> 84 <conditional name="mode">
78 <param name="private_genomes" type="data" format="zip" label="Zip of genbank or GFF files" optional="true"/> 85 <param name = "mode" type="select" label="What is your inputs?">
79 <param name="private_genomes_fasta" type="data" format="zip" label="Zip of Fasta files" optional="true"/> 86 <option value="accessions">Prokaryote genomes: List of Genbank assembly accessions (GCA)</option>
87 <option value="genbanks">Prokaryote genomes: Genbank files</option>
88 <option value="fasta">Eukaryote genomes: FASTA + GFF files</option>
89 </param>
90 <when value="accessions">
91 <param name="input" type="text" multiple="true" label="List of genbank identifiers" help="Coma separated list (ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2)"/>
92 <param type="select" name="software" label="Choose the pan-genome software">
93 <option value="roary">Roary</option>
94 <option value="panacota">PanACoTA</option>
95 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
96 </param>
97 </when>
98 <when value="genbanks">
99 <param name="private_genomes" type="data" format="zip" label="Zip of genbank files" optional="true"/>
100 <param type="select" name="software" label="Choose the pan-genome software">
101 <option value="roary">Roary</option>
102 <option value="panacota">PanACoTA</option>
103 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
104 </param>
105 </when>
106 <when value="fasta">
107 <param name="private_genomes_fasta" type="data" format="zip" label="Zip of Fasta files" optional="true"/>
108 <param name="private_genomes" type="data" format="zip" label="Zip of GFF files" optional="true"/>
109 <param type="select" name="software" label="Choose the pan-genome software">
110 <option value="orthofinder">OrthoFinder</option>
111 <option value="cactus">Minigraph-Cactus</option>
112 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
113 </param>
114 </when>
115 </conditional>
116
80 <param name="min_identity" type="text" value="80" label="Minimum percentage identity for BlastP" /> 117 <param name="min_identity" type="text" value="80" label="Minimum percentage identity for BlastP" />
81 <param type="select" name="software" label="Choose the pan-genome software">
82 <option value="pgap">PGAP</option>
83 <option value="roary">Roary</option>
84 <option value="panacota">PanACoTA</option>
85 <option value="orthofinder">OrthoFinder</option>
86 <option value="cactus">Minigraph-Cactus</option>
87 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
88 </param>
89 </inputs> 118 </inputs>
90 119
91 <outputs> 120 <outputs>
92 <data format="txt" name="output" label="Pangenome presence absence matrix"/> 121 <data format="txt" name="output" label="Pangenome presence absence matrix"/>
93 <data format="newick" name="njtree" label="PanBased NJ tree"/> 122 <data format="newick" name="njtree" label="PanBased NJ tree"/>
108 <data format="txt" name="distance_matrix" label="Accessory based distance matrix"/> 137 <data format="txt" name="distance_matrix" label="Accessory based distance matrix"/>
109 <data format="vcf" name="vcf" label="VCF file"/> 138 <data format="vcf" name="vcf" label="VCF file"/>
110 <data format="txt" name="logfile" label="Logfile"/> 139 <data format="txt" name="logfile" label="Logfile"/>
111 <data format="txt" name="roary_log" label="Roary Logfile"/> 140 <data format="txt" name="roary_log" label="Roary Logfile"/>
112 </outputs> 141 </outputs>
142 <tests>
143 <test>
144 <param name="input" value="GCA_000007385.1,GCA_000010025.1,GCA_000019585.2"/>
145 <param name="min_identity" value="80"/>
146 <param name="software" value="panacota"/>
147 <param name="private_genomes" value=""/>
148 <param name="private_genomes_fasta" value=""/>
149 <output name="distance_matrix" value="Accessory_based_distance_matrix.txt"/>
150 <output name="fastani" value="ANI.txt"/>
151 </test>
152 </tests>
153 <help>
154
155 PanExplorer
156 =======
157
158 PanExplorer workflow is a snakemake worklow that can be run in the backend of the PanExplorer web application.
159
160 Homepage: https://panexplorer.southgreen.fr/
161
162 It allows to perform a pan-genome analysis using published and annotated bacteria genomes, using different tools that can be invoked: Roary, PGAP, PanACoTA.
163
164 Pangenome graph builder softwares have been implemented recently in the pipeline: Minigraph-Cactus and PGGB (PanGenome Graph Builder)
165
166 It provides a presence/absence matrix of genes, an UpsetR Diagram for synthetizing the matrix information and a COG assignation summary for each strain.
167
168 Please visit the GitHub page for the PanExplorer workflow at: https://github.com/SouthGreenPlatform/PanExplorer_workflow
169
170
171 Inputs
172 ------
173
174 Inputs can be provided as one of the following:
175
176 * **List of genbank assembly identifiers** comma-separated(ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2)
177 * **Zip of genbank files** They must include the gene annotation and the complete sequence data
178 * **Zip of FASTA file of genomes + Zip of GFF annotation files**: In order to make the association between sequence and annotation, they must be named with the same basename as follows: genome1.fasta, genome1.gff, myspeciesXXX.fasta, myspeciesXXX.gff...
179
180
181
182 Outputs
183 ------
184
185 Among the outputs:
186
187 * **Pangenome presence absence matrix** Pangene presence/absence matrix indicating the PAV (Presence Absence Variation) of clustered genes.
188 * **PanBased NJ tree** Distance tree based on PAV data
189 * **Heaps law alpha** Estimating if a pan-genome is open or closed based on a Heaps law model.
190 * **Rarefaction curves** A rarefaction curve is the cumulative number of gene clusters we observe as more and more genomes are being considered
191 * **ANI** Average Nucleotide Identity between genomes
192 * **ANI heatmap** image as SVG
193 * **VCF file** If a pan-genome graph software has been selected, it provides a VCF of variations among all samples.
194
195
196
197 </help>
198 <citations>
199 <citation type="doi">10.1093/bioinformatics/btac504</citation>
200 </citations>
113 201
114 </tool> 202 </tool>