Mercurial > repos > dereeper > pangenome_explorer
view PanExplorer.xml @ 19:b8ea3d2945b5 draft
Uploaded
author | dereeper |
---|---|
date | Mon, 27 Jan 2025 13:02:48 +0000 |
parents | 716821f47909 |
children | e23149f1f55c |
line wrap: on
line source
<tool id="pangenome_explorer" name="PanExplorer2" version="2.0"> <description> Bacterial pan-genome analysis </description> <requirements> <requirement type="package" version="1.30">perl-yaml</requirement> <requirement type="package" version="3.8.7">singularity</requirement> <container type="docker">docker.io/dereeper/panexplorer_workflow:latest</container> </requirements> <command><![CDATA[ export PANEX_PATH=/usr/local/bin/PanExplorer_workflow; #if str($mode.mode) == "accessions": perl ${__tool_directory__}/generateConfig.pl 'None' '$input' config.yaml 'None'; #else if str($mode.mode) == "genbanks": perl ${__tool_directory__}/generateConfig.pl '$private_genomes' 'None' config.yaml 'None'; #else if str($mode.mode) == "fasta": perl ${__tool_directory__}/generateConfig.pl '$private_genomes' 'None' config.yaml '$private_genomes_fasta'; #end if cat config.yaml >$logfile; touch $vcf; #if $software=="pgap" snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_PGAP_heatmap_upset_COG >>$logfile 2>&1; #else if $software=="roary" snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_roary_heatmap_upset_COG >>$logfile 2>&1; #else if $software=="orthofinder" snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_orthofinder_heatmap_upset >>$logfile 2>&1; #else if $software=="cactus" snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_cactus_heatmap_upset_COG2 >>$logfile 2>&1; #else if $software=="pggb" snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_pggb_heatmap_upset_COG >>$logfile 2>&1; #else snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_panacota_heatmap_upset_COG >>$logfile 2>&1; #end if ]]></command> <inputs> <conditional name="mode"> <param name = "mode" type="select" label="What is your inputs?"> <option value="accessions">Prokaryote genomes: List of Genbank assembly accessions (GCA)</option> <option value="genbanks">Prokaryote genomes: Genbank files</option> <option value="fasta">Eukaryote genomes: FASTA + GFF files</option> </param> <when value="accessions"> <param name="input" type="text" multiple="true" label="List of genbank identifiers" help="Coma separated list (ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2)"/> <param type="select" name="software" label="Choose the pan-genome software"> <option value="roary">Roary</option> <option value="panacota">PanACoTA</option> <option value="pggb">PanGenome Graph Builder (PGGB)</option> </param> </when> <when value="genbanks"> <param name="private_genomes" type="data" format="zip" label="Zip of genbank files" optional="true"/> <param type="select" name="software" label="Choose the pan-genome software"> <option value="roary">Roary</option> <option value="panacota">PanACoTA</option> <option value="pggb">PanGenome Graph Builder (PGGB)</option> </param> </when> <when value="fasta"> <param name="private_genomes_fasta" type="data" format="zip" label="Zip of Fasta files" optional="true"/> <param name="private_genomes" type="data" format="zip" label="Zip of GFF files" optional="true"/> <param type="select" name="software" label="Choose the pan-genome software"> <option value="orthofinder">OrthoFinder</option> <option value="cactus">Minigraph-Cactus</option> <option value="pggb">PanGenome Graph Builder (PGGB)</option> </param> </when> </conditional> <param name="min_identity" type="text" value="80" label="Minimum percentage identity for BlastP" /> </inputs> <outputs> <data from_work_dir="outputs/pav_matrix.tsv" format="txt" name="output" label="Pangenome presence absence matrix"/> <data from_work_dir="outputs/heatmap.svg.complete.pdf.distance_matrix.hclust.newick" format="newick" name="njtree" label="PanBased NJ tree"/> <data from_work_dir="outputs/genomes/genes.txt" format="txt" name="genes" label="Genes"/> <data from_work_dir="outputs/cog_output.txt" format="txt" name="cogfile" label="COG assignation"/> <data from_work_dir="outputs/GCskew.txt" format="txt" name="gcfile" label="GC_percent"/> <data from_work_dir="outputs/upsetr.svg" format="svg" name="upset" label="Upset Diagram"/> <data from_work_dir="outputs/heatmap.svg.gz" format="svg" name="heatmap" label="Presence Absence Heatmap"/> <data from_work_dir="outputs/heatmap.svg.heatmap_plotly.html" format="html" name="heatmap_html" label="Presence Absence Heatmap interactive"/> <data from_work_dir="outputs/cog_stats.txt" format="tabular" name="outcog_stat" label="COG category counts"/> <data from_work_dir="outputs/cog_stats2.txt" format="tabular" name="outcog_stat2" label="COG category 2 counts"/> <data from_work_dir="outputs/cog_of_clusters.txt" format="tabular" name="outcog_clusters" label="COG of clusters"/> <data from_work_dir="outputs/fastani.out.matrix.complete" format="tabular" name="fastani" label="ANI" /> <data from_work_dir="outputs/fastani.out.svg" format="svg" name="ani_svg" label="ANI heatmap" /> <data from_work_dir="outputs/rarefaction_curves.txt" format="txt" name="rarefaction_curves" label="Rarefaction curves data"/> <data from_work_dir="outputs/rarefaction_curves.svg" format="svg" name="rarefaction_curves_svg" label="Rarefaction curves"/> <data from_work_dir="outputs/heaps.tsv" format="txt" name="heaps" label="Heaps law alpha"/> <data from_work_dir="outputs/heatmap.svg.complete.pdf.distance_matrix.txt" format="txt" name="distance_matrix" label="Accessory based distance matrix"/> <data from_work_dir="outputs/all_genomes.vcf" format="vcf" name="vcf" label="VCF file"/> <data format="txt" name="logfile" label="Logfile"/> <data format="txt" name="roary_log" label="Roary Logfile"/> </outputs> <tests> <test> <param name="input" value="GCA_000007385.1,GCA_000010025.1,GCA_000019585.2"/> <param name="min_identity" value="80"/> <param name="software" value="panacota"/> <param name="private_genomes" value=""/> <param name="private_genomes_fasta" value=""/> <output name="distance_matrix" value="Accessory_based_distance_matrix.txt"/> <output name="fastani" value="ANI.txt"/> </test> </tests> <help> PanExplorer ======= PanExplorer workflow is a snakemake worklow that can be run in the backend of the PanExplorer web application. Homepage: https://panexplorer.southgreen.fr/ It allows to perform a pan-genome analysis using published and annotated bacteria genomes, using different tools that can be invoked: Roary, PGAP, PanACoTA. Pangenome graph builder softwares have been implemented recently in the pipeline: Minigraph-Cactus and PGGB (PanGenome Graph Builder) It provides a presence/absence matrix of genes, an UpsetR Diagram for synthetizing the matrix information and a COG assignation summary for each strain. Please visit the GitHub page for the PanExplorer workflow at: https://github.com/SouthGreenPlatform/PanExplorer_workflow Inputs ------ Inputs can be provided as one of the following: * **List of genbank assembly identifiers** comma-separated(ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2) * **Zip of genbank files** They must include the gene annotation and the complete sequence data * **Zip of FASTA file of genomes + Zip of GFF annotation files**: In order to make the association between sequence and annotation, they must be named with the same basename as follows: genome1.fasta, genome1.gff, myspeciesXXX.fasta, myspeciesXXX.gff... Outputs ------ Among the outputs: * **Pangenome presence absence matrix** Pangene presence/absence matrix indicating the PAV (Presence Absence Variation) of clustered genes. * **PanBased NJ tree** Distance tree based on PAV data * **Heaps law alpha** Estimating if a pan-genome is open or closed based on a Heaps law model. * **Rarefaction curves** A rarefaction curve is the cumulative number of gene clusters we observe as more and more genomes are being considered * **ANI** Average Nucleotide Identity between genomes * **ANI heatmap** image as SVG * **VCF file** If a pan-genome graph software has been selected, it provides a VCF of variations among all samples. </help> <citations> <citation type="doi">10.1093/bioinformatics/btac504</citation> </citations> </tool>