17
|
1 <tool id="pangenome_explorer" name="PanExplorer2" version="2.0">
|
2
|
2 <description> Bacterial pan-genome analysis </description>
|
|
3 <requirements>
|
|
4 <!--
|
|
5 <requirement type="package" version="2.2.26">blast-legacy</requirement>
|
|
6 <requirement type="package" version="7.480">mafft</requirement>
|
|
7 <requirement type="package" version="14.137">mcl</requirement>
|
|
8 <requirement type="package" version="3.697">phylip</requirement>
|
|
9 <requirement type="package" version="1.7.2">perl-bioperl</requirement>
|
|
10 -->
|
7
|
11 <requirement type="package" version="1.30">perl-yaml</requirement>
|
|
12 <requirement type="package" version="3.8.7">singularity</requirement>
|
16
|
13 <!-- <container type="docker">ghcr.io/pangenome/pggb:latest</container> -->
|
2
|
14 </requirements>
|
|
15
|
|
16 <command><![CDATA[
|
7
|
17
|
16
|
18 export PANEX_PATH=/usr/local/bin/PanExplorer_workflow;
|
14
|
19
|
|
20 #if str($mode.mode) == "accessions":
|
16
|
21 perl ${__tool_directory__}/generateConfig.pl 'None' '$input' config.yaml 'None';
|
14
|
22 #else if str($mode.mode) == "genbanks":
|
16
|
23 perl ${__tool_directory__}/generateConfig.pl '$private_genomes' 'None' config.yaml 'None';
|
14
|
24 #else if str($mode.mode) == "fasta":
|
16
|
25 perl ${__tool_directory__}/generateConfig.pl '$private_genomes' 'None' config.yaml '$private_genomes_fasta';
|
14
|
26 #end if
|
2
|
27
|
|
28 cat config.yaml >$logfile;
|
|
29
|
16
|
30 if [ ! -f ${__tool_directory__}/panexplorer.sif ]; then wget -O ${__tool_directory__}/panexplorer.sif https://panexplorer.southgreen.fr/singularity/panexplorer.sif >>$logfile 2>&1;fi;
|
2
|
31
|
|
32
|
|
33
|
|
34 #if $software=="pgap"
|
16
|
35 singularity exec ${__tool_directory__}/panexplorer.sif sed "s/identity\=80/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_wget_PGAP_heatmap_upset_COG 1>snakefile 2>>$logfile;
|
2
|
36 #else if $software=="roary"
|
16
|
37 singularity exec ${__tool_directory__}/panexplorer.sif sed "s/identity\=80/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_wget_roary_heatmap_upset_COG 1>snakefile 2>>$logfile;
|
2
|
38 #else if $software=="orthofinder"
|
16
|
39 singularity exec ${__tool_directory__}/panexplorer.sif sed "s/identity\=80/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_orthofinder_heatmap_upset 1>snakefile 2>>$logfile;
|
2
|
40 #else if $software=="cactus"
|
16
|
41 singularity exec ${__tool_directory__}/panexplorer.sif cp -rf \$PANEX_PATH/Snakemake_files/Snakefile_wget_cactus_heatmap_upset_COG2 1>snakefile 2>>$logfile;
|
2
|
42 #else if $software=="pggb"
|
16
|
43 singularity exec ${__tool_directory__}/panexplorer.sif sed "s/identity\=30/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_wget_pggb_heatmap_upset_COG 1>snakefile 2>>$logfile;
|
2
|
44 #else
|
16
|
45 singularity exec ${__tool_directory__}/panexplorer.sif sed "s/identity\=80/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_wget_panacota_heatmap_upset_COG 1>snakefile 2>>$logfile;
|
2
|
46 #end if
|
|
47
|
16
|
48 singularity exec ${__tool_directory__}/panexplorer.sif snakemake --cores 1 -s snakefile >>$logfile 2>&1;
|
2
|
49
|
|
50
|
|
51 cp -rf outputs/upsetr.svg $upset;
|
|
52 cp -rf outputs/pav_matrix.tsv $output;
|
|
53 cp -rf outputs/heatmap.svg.gz $heatmap;
|
|
54 cp -rf outputs/heatmap.svg.heatmap_plotly.html $heatmap_html;
|
|
55 cp -rf outputs/rarefaction_curves.txt $rarefaction_curves;
|
|
56 cp -rf outputs/rarefaction_curves.svg $rarefaction_curves_svg;
|
|
57 cp -rf outputs/heaps.tsv $heaps;
|
|
58 cp -rf outputs/heatmap.svg.complete.pdf.distance_matrix.txt $distance_matrix;
|
|
59 cp -rf outputs/heatmap.svg.complete.pdf.distance_matrix.hclust.newick $njtree;
|
|
60
|
|
61 #if $software!="orthofinder"
|
|
62 cp -rf outputs/GCskew.txt $gcfile;
|
|
63 cp -rf outputs/cog_output.txt $cogfile;
|
|
64 cp -rf outputs/cog_stats.txt $outcog_stat;
|
|
65 cp -rf outputs/cog_stats2.txt $outcog_stat2;
|
|
66 cp -rf outputs/cog_of_clusters.txt $outcog_clusters;
|
|
67 cp -rf outputs/genomes/genes.txt $genes;
|
|
68 cp -rf outputs/fastani.out.matrix.complete $fastani;
|
|
69 cp -rf outputs/fastani.out.svg $ani_svg;
|
|
70 #end if
|
|
71
|
|
72 #if $software=="pggb"
|
|
73 cp -rf outputs/all_genomes.vcf $vcf;
|
|
74 #else
|
|
75 touch $vcf;
|
|
76 #end if
|
|
77 ]]></command>
|
|
78
|
|
79
|
|
80 <inputs>
|
14
|
81 <conditional name="mode">
|
|
82 <param name = "mode" type="select" label="What is your inputs?">
|
|
83 <option value="accessions">Prokaryote genomes: List of Genbank assembly accessions (GCA)</option>
|
|
84 <option value="genbanks">Prokaryote genomes: Genbank files</option>
|
|
85 <option value="fasta">Eukaryote genomes: FASTA + GFF files</option>
|
|
86 </param>
|
|
87 <when value="accessions">
|
|
88 <param name="input" type="text" multiple="true" label="List of genbank identifiers" help="Coma separated list (ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2)"/>
|
|
89 <param type="select" name="software" label="Choose the pan-genome software">
|
|
90 <option value="roary">Roary</option>
|
|
91 <option value="panacota">PanACoTA</option>
|
|
92 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
|
|
93 </param>
|
|
94 </when>
|
|
95 <when value="genbanks">
|
|
96 <param name="private_genomes" type="data" format="zip" label="Zip of genbank files" optional="true"/>
|
|
97 <param type="select" name="software" label="Choose the pan-genome software">
|
|
98 <option value="roary">Roary</option>
|
|
99 <option value="panacota">PanACoTA</option>
|
|
100 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
|
|
101 </param>
|
|
102 </when>
|
|
103 <when value="fasta">
|
|
104 <param name="private_genomes_fasta" type="data" format="zip" label="Zip of Fasta files" optional="true"/>
|
|
105 <param name="private_genomes" type="data" format="zip" label="Zip of GFF files" optional="true"/>
|
|
106 <param type="select" name="software" label="Choose the pan-genome software">
|
|
107 <option value="orthofinder">OrthoFinder</option>
|
|
108 <option value="cactus">Minigraph-Cactus</option>
|
|
109 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
|
|
110 </param>
|
|
111 </when>
|
|
112 </conditional>
|
|
113
|
2
|
114 <param name="min_identity" type="text" value="80" label="Minimum percentage identity for BlastP" />
|
|
115 </inputs>
|
|
116
|
|
117 <outputs>
|
|
118 <data format="txt" name="output" label="Pangenome presence absence matrix"/>
|
|
119 <data format="newick" name="njtree" label="PanBased NJ tree"/>
|
|
120 <data format="txt" name="genes" label="Genes"/>
|
|
121 <data format="txt" name="cogfile" label="COG assignation"/>
|
|
122 <data format="txt" name="gcfile" label="GC_percent"/>
|
|
123 <data format="svg" name="upset" label="Upset Diagram"/>
|
|
124 <data format="svg" name="heatmap" label="Presence Absence Heatmap"/>
|
|
125 <data format="html" name="heatmap_html" label="Presence Absence Heatmap interactive"/>
|
|
126 <data format="tabular" name="outcog_stat" label="COG category counts"/>
|
|
127 <data format="tabular" name="outcog_stat2" label="COG category 2 counts"/>
|
|
128 <data format="tabular" name="outcog_clusters" label="COG of clusters"/>
|
|
129 <data format="tabular" name="fastani" label="ANI" />
|
|
130 <data format="svg" name="ani_svg" label="ANI heatmap" />
|
|
131 <data format="txt" name="rarefaction_curves" label="Rarefaction curves data"/>
|
|
132 <data format="svg" name="rarefaction_curves_svg" label="Rarefaction curves"/>
|
|
133 <data format="txt" name="heaps" label="Heaps law alpha"/>
|
|
134 <data format="txt" name="distance_matrix" label="Accessory based distance matrix"/>
|
|
135 <data format="vcf" name="vcf" label="VCF file"/>
|
|
136 <data format="txt" name="logfile" label="Logfile"/>
|
|
137 <data format="txt" name="roary_log" label="Roary Logfile"/>
|
|
138 </outputs>
|
14
|
139 <tests>
|
|
140 <test>
|
|
141 <param name="input" value="GCA_000007385.1,GCA_000010025.1,GCA_000019585.2"/>
|
|
142 <param name="min_identity" value="80"/>
|
|
143 <param name="software" value="panacota"/>
|
|
144 <param name="private_genomes" value=""/>
|
|
145 <param name="private_genomes_fasta" value=""/>
|
|
146 <output name="distance_matrix" value="Accessory_based_distance_matrix.txt"/>
|
|
147 <output name="fastani" value="ANI.txt"/>
|
|
148 </test>
|
|
149 </tests>
|
|
150 <help>
|
|
151
|
|
152 PanExplorer
|
|
153 =======
|
|
154
|
|
155 PanExplorer workflow is a snakemake worklow that can be run in the backend of the PanExplorer web application.
|
|
156
|
|
157 Homepage: https://panexplorer.southgreen.fr/
|
|
158
|
|
159 It allows to perform a pan-genome analysis using published and annotated bacteria genomes, using different tools that can be invoked: Roary, PGAP, PanACoTA.
|
|
160
|
|
161 Pangenome graph builder softwares have been implemented recently in the pipeline: Minigraph-Cactus and PGGB (PanGenome Graph Builder)
|
|
162
|
|
163 It provides a presence/absence matrix of genes, an UpsetR Diagram for synthetizing the matrix information and a COG assignation summary for each strain.
|
|
164
|
|
165 Please visit the GitHub page for the PanExplorer workflow at: https://github.com/SouthGreenPlatform/PanExplorer_workflow
|
|
166
|
|
167
|
|
168 Inputs
|
|
169 ------
|
|
170
|
|
171 Inputs can be provided as one of the following:
|
|
172
|
|
173 * **List of genbank assembly identifiers** comma-separated(ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2)
|
|
174 * **Zip of genbank files** They must include the gene annotation and the complete sequence data
|
|
175 * **Zip of FASTA file of genomes + Zip of GFF annotation files**: In order to make the association between sequence and annotation, they must be named with the same basename as follows: genome1.fasta, genome1.gff, myspeciesXXX.fasta, myspeciesXXX.gff...
|
|
176
|
|
177
|
|
178
|
|
179 Outputs
|
|
180 ------
|
|
181
|
|
182 Among the outputs:
|
|
183
|
|
184 * **Pangenome presence absence matrix** Pangene presence/absence matrix indicating the PAV (Presence Absence Variation) of clustered genes.
|
|
185 * **PanBased NJ tree** Distance tree based on PAV data
|
|
186 * **Heaps law alpha** Estimating if a pan-genome is open or closed based on a Heaps law model.
|
|
187 * **Rarefaction curves** A rarefaction curve is the cumulative number of gene clusters we observe as more and more genomes are being considered
|
|
188 * **ANI** Average Nucleotide Identity between genomes
|
|
189 * **ANI heatmap** image as SVG
|
|
190 * **VCF file** If a pan-genome graph software has been selected, it provides a VCF of variations among all samples.
|
|
191
|
|
192
|
|
193
|
|
194 </help>
|
|
195 <citations>
|
|
196 <citation type="doi">10.1093/bioinformatics/btac504</citation>
|
|
197 </citations>
|
2
|
198
|
|
199 </tool>
|