Mercurial > repos > dereeper > pangenome_explorer
comparison PanExplorer.xml @ 14:5a5c9a6b047b draft
Uploaded
author | dereeper |
---|---|
date | Tue, 10 Dec 2024 16:20:53 +0000 |
parents | 152d7c43478b |
children | dbde253606c5 |
comparison
equal
deleted
inserted
replaced
13:152d7c43478b | 14:5a5c9a6b047b |
---|---|
1 <tool id="PanExplorer" name="PanExplorer" version="1.0"> | 1 <tool id="PanExplorer2" name="PanExplorer2" version="2.0"> |
2 <description> Bacterial pan-genome analysis </description> | 2 <description> Bacterial pan-genome analysis </description> |
3 <requirements> | 3 <requirements> |
4 <!-- | 4 <!-- |
5 <requirement type="package" version="2.2.26">blast-legacy</requirement> | 5 <requirement type="package" version="2.2.26">blast-legacy</requirement> |
6 <requirement type="package" version="7.480">mafft</requirement> | 6 <requirement type="package" version="7.480">mafft</requirement> |
15 | 15 |
16 <command><![CDATA[ | 16 <command><![CDATA[ |
17 | 17 |
18 export PANEX_PATH=${__tool_directory__}; | 18 export PANEX_PATH=${__tool_directory__}; |
19 | 19 |
20 perl ${__tool_directory__}/Perl/generateConfig.pl '$private_genomes' '$input' config.yaml '$private_genomes_fasta'; | 20 |
21 #if str($mode.mode) == "accessions": | |
22 perl ${__tool_directory__}/Perl/generateConfig.pl 'None' '$input' config.yaml 'None'; | |
23 #else if str($mode.mode) == "genbanks": | |
24 perl ${__tool_directory__}/Perl/generateConfig.pl '$private_genomes' 'None' config.yaml 'None'; | |
25 #else if str($mode.mode) == "fasta": | |
26 perl ${__tool_directory__}/Perl/generateConfig.pl '$private_genomes' 'None' config.yaml '$private_genomes_fasta'; | |
27 #end if | |
21 | 28 |
22 cat config.yaml >$logfile; | 29 cat config.yaml >$logfile; |
23 | 30 |
24 | 31 |
25 | 32 |
72 #end if | 79 #end if |
73 ]]></command> | 80 ]]></command> |
74 | 81 |
75 | 82 |
76 <inputs> | 83 <inputs> |
77 <param name="input" type="text" multiple="true" label="List of genbank identifiers" help="Coma separated list (ex: CP000235.1,CP001079.1,CP001759.1,CP015994.2)"/> | 84 <conditional name="mode"> |
78 <param name="private_genomes" type="data" format="zip" label="Zip of genbank or GFF files" optional="true"/> | 85 <param name = "mode" type="select" label="What is your inputs?"> |
79 <param name="private_genomes_fasta" type="data" format="zip" label="Zip of Fasta files" optional="true"/> | 86 <option value="accessions">Prokaryote genomes: List of Genbank assembly accessions (GCA)</option> |
87 <option value="genbanks">Prokaryote genomes: Genbank files</option> | |
88 <option value="fasta">Eukaryote genomes: FASTA + GFF files</option> | |
89 </param> | |
90 <when value="accessions"> | |
91 <param name="input" type="text" multiple="true" label="List of genbank identifiers" help="Coma separated list (ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2)"/> | |
92 <param type="select" name="software" label="Choose the pan-genome software"> | |
93 <option value="roary">Roary</option> | |
94 <option value="panacota">PanACoTA</option> | |
95 <option value="pggb">PanGenome Graph Builder (PGGB)</option> | |
96 </param> | |
97 </when> | |
98 <when value="genbanks"> | |
99 <param name="private_genomes" type="data" format="zip" label="Zip of genbank files" optional="true"/> | |
100 <param type="select" name="software" label="Choose the pan-genome software"> | |
101 <option value="roary">Roary</option> | |
102 <option value="panacota">PanACoTA</option> | |
103 <option value="pggb">PanGenome Graph Builder (PGGB)</option> | |
104 </param> | |
105 </when> | |
106 <when value="fasta"> | |
107 <param name="private_genomes_fasta" type="data" format="zip" label="Zip of Fasta files" optional="true"/> | |
108 <param name="private_genomes" type="data" format="zip" label="Zip of GFF files" optional="true"/> | |
109 <param type="select" name="software" label="Choose the pan-genome software"> | |
110 <option value="orthofinder">OrthoFinder</option> | |
111 <option value="cactus">Minigraph-Cactus</option> | |
112 <option value="pggb">PanGenome Graph Builder (PGGB)</option> | |
113 </param> | |
114 </when> | |
115 </conditional> | |
116 | |
80 <param name="min_identity" type="text" value="80" label="Minimum percentage identity for BlastP" /> | 117 <param name="min_identity" type="text" value="80" label="Minimum percentage identity for BlastP" /> |
81 <param type="select" name="software" label="Choose the pan-genome software"> | |
82 <option value="pgap">PGAP</option> | |
83 <option value="roary">Roary</option> | |
84 <option value="panacota">PanACoTA</option> | |
85 <option value="orthofinder">OrthoFinder</option> | |
86 <option value="cactus">Minigraph-Cactus</option> | |
87 <option value="pggb">PanGenome Graph Builder (PGGB)</option> | |
88 </param> | |
89 </inputs> | 118 </inputs> |
90 | 119 |
91 <outputs> | 120 <outputs> |
92 <data format="txt" name="output" label="Pangenome presence absence matrix"/> | 121 <data format="txt" name="output" label="Pangenome presence absence matrix"/> |
93 <data format="newick" name="njtree" label="PanBased NJ tree"/> | 122 <data format="newick" name="njtree" label="PanBased NJ tree"/> |
108 <data format="txt" name="distance_matrix" label="Accessory based distance matrix"/> | 137 <data format="txt" name="distance_matrix" label="Accessory based distance matrix"/> |
109 <data format="vcf" name="vcf" label="VCF file"/> | 138 <data format="vcf" name="vcf" label="VCF file"/> |
110 <data format="txt" name="logfile" label="Logfile"/> | 139 <data format="txt" name="logfile" label="Logfile"/> |
111 <data format="txt" name="roary_log" label="Roary Logfile"/> | 140 <data format="txt" name="roary_log" label="Roary Logfile"/> |
112 </outputs> | 141 </outputs> |
142 <tests> | |
143 <test> | |
144 <param name="input" value="GCA_000007385.1,GCA_000010025.1,GCA_000019585.2"/> | |
145 <param name="min_identity" value="80"/> | |
146 <param name="software" value="panacota"/> | |
147 <param name="private_genomes" value=""/> | |
148 <param name="private_genomes_fasta" value=""/> | |
149 <output name="distance_matrix" value="Accessory_based_distance_matrix.txt"/> | |
150 <output name="fastani" value="ANI.txt"/> | |
151 </test> | |
152 </tests> | |
153 <help> | |
154 | |
155 PanExplorer | |
156 ======= | |
157 | |
158 PanExplorer workflow is a snakemake worklow that can be run in the backend of the PanExplorer web application. | |
159 | |
160 Homepage: https://panexplorer.southgreen.fr/ | |
161 | |
162 It allows to perform a pan-genome analysis using published and annotated bacteria genomes, using different tools that can be invoked: Roary, PGAP, PanACoTA. | |
163 | |
164 Pangenome graph builder softwares have been implemented recently in the pipeline: Minigraph-Cactus and PGGB (PanGenome Graph Builder) | |
165 | |
166 It provides a presence/absence matrix of genes, an UpsetR Diagram for synthetizing the matrix information and a COG assignation summary for each strain. | |
167 | |
168 Please visit the GitHub page for the PanExplorer workflow at: https://github.com/SouthGreenPlatform/PanExplorer_workflow | |
169 | |
170 | |
171 Inputs | |
172 ------ | |
173 | |
174 Inputs can be provided as one of the following: | |
175 | |
176 * **List of genbank assembly identifiers** comma-separated(ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2) | |
177 * **Zip of genbank files** They must include the gene annotation and the complete sequence data | |
178 * **Zip of FASTA file of genomes + Zip of GFF annotation files**: In order to make the association between sequence and annotation, they must be named with the same basename as follows: genome1.fasta, genome1.gff, myspeciesXXX.fasta, myspeciesXXX.gff... | |
179 | |
180 | |
181 | |
182 Outputs | |
183 ------ | |
184 | |
185 Among the outputs: | |
186 | |
187 * **Pangenome presence absence matrix** Pangene presence/absence matrix indicating the PAV (Presence Absence Variation) of clustered genes. | |
188 * **PanBased NJ tree** Distance tree based on PAV data | |
189 * **Heaps law alpha** Estimating if a pan-genome is open or closed based on a Heaps law model. | |
190 * **Rarefaction curves** A rarefaction curve is the cumulative number of gene clusters we observe as more and more genomes are being considered | |
191 * **ANI** Average Nucleotide Identity between genomes | |
192 * **ANI heatmap** image as SVG | |
193 * **VCF file** If a pan-genome graph software has been selected, it provides a VCF of variations among all samples. | |
194 | |
195 | |
196 | |
197 </help> | |
198 <citations> | |
199 <citation type="doi">10.1093/bioinformatics/btac504</citation> | |
200 </citations> | |
113 | 201 |
114 </tool> | 202 </tool> |