Mercurial > repos > iuc > ppanggolin_projection
view ppanggolin_projection.xml @ 2:ded401aa8b09 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ppanggolin commit 29098ae57dcc42db8f9adf321d31ed37fb999d17
| author | iuc |
|---|---|
| date | Mon, 24 Nov 2025 12:56:19 +0000 |
| parents | a87adcb122b4 |
| children |
line wrap: on
line source
<tool id="ppanggolin_projection" name="PPanGGOLiN projection" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0"> <description>annotates external genomes using an existing pangenome</description> <macros> <import>macros.xml</import> </macros> <expand macro="xrefs"/> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ #import re mkdir -p ./tmp_ppanggolin/projection && mkdir -p ./tmp_ppanggolin/tmpdir_projection && mkdir -p ./tmp_ppanggolin/organism_list && @ORGANISM_LIST@ ppanggolin projection --pangenome '$pangenome_h5' #if $extension_input_files == "fasta": --fasta #elif $extension_input_files == "genbank": --anno #end if ./tmp_ppanggolin/organism_list/organism.list --output ./tmp_ppanggolin/projection --tmpdir ./tmp_ppanggolin/tmpdir_projection --force --cpu "\${GALAXY_SLOTS:-4}" --disable_prog_bar --coverage $coverage --identity $identity --table $do_defrag --soft_core $soft_core #if "output_genome_gff" in $advanced_pangenome_optional_files: --gff #end if #if "output_genome_proksee" in $advanced_pangenome_optional_files: --proksee #end if #for $counter_input_files, $file in enumerate($genomes): #set base_name = re.sub('[^\w\-_\.]', '_', str($file.element_identifier)) #if "output_gene_to_gene_family" in $advanced_pangenome_optional_files: && mv './tmp_ppanggolin/projection/${base_name}/gene_to_gene_family.tsv' './tmp_ppanggolin/projection/${base_name}_gene_to_gene_family.tsv' #end if #if "output_input_genome_rgp_to_spot" in $advanced_pangenome_optional_files: && mv './tmp_ppanggolin/projection/${base_name}/input_genome_rgp_to_spot.tsv' './tmp_ppanggolin/projection/${base_name}_input_genome_rgp_to_spot.tsv' #end if #if "output_modules_in_input_genome" in $advanced_pangenome_optional_files: && mv './tmp_ppanggolin/projection/${base_name}/modules_in_input_genome.tsv' './tmp_ppanggolin/projection/${base_name}_modules_in_input_genome.tsv' #end if #if "output_genome_gff" in $advanced_pangenome_optional_files: && mv './tmp_ppanggolin/projection/${base_name}/${base_name}.gff' './tmp_ppanggolin/projection/${base_name}_genome_gff.gff' #end if && mv './tmp_ppanggolin/projection/${base_name}/${base_name}.tsv' './tmp_ppanggolin/projection/${base_name}_genome_tsv.tsv' #if "output_genome_proksee" in $advanced_pangenome_optional_files: && mv './tmp_ppanggolin/projection/${base_name}/${base_name}_proksee.json' './tmp_ppanggolin/projection/${base_name}_genome_proksee.json' #end if #if "output_projection_summary" in $advanced_pangenome_optional_files: && mv './tmp_ppanggolin/projection/${base_name}/projection_summary.yaml' './tmp_ppanggolin/projection/${base_name}_projection_summary.yaml' #end if #if "output_regions_of_genomic_plasticity" in $advanced_pangenome_optional_files: && mv './tmp_ppanggolin/projection/${base_name}/regions_of_genomic_plasticity.tsv' './tmp_ppanggolin/projection/${base_name}_regions_of_genomic_plasticity.tsv' #end if #if "output_sequences_partition_projection" in $advanced_pangenome_optional_files: && mv './tmp_ppanggolin/projection/${base_name}/sequences_partition_projection.tsv' './tmp_ppanggolin/projection/${base_name}_sequences_partition_projection.tsv' #end if #if "output_specific_genes" in $advanced_pangenome_optional_files: && mv './tmp_ppanggolin/projection/${base_name}/specific_genes.tsv' './tmp_ppanggolin/projection/${base_name}_specific_genes.tsv' #end if #end for && cat './tmp_ppanggolin/projection/summary_projection.tsv' > '${summary_projection}' ]]></command> <inputs> <expand macro="inputs_pangenome"/> <expand macro="inputs_genomes" /> <expand macro="inputs_identity_coverage_do_defrag"/> <expand macro="inputs_soft_core"/> <param name="advanced_pangenome_optional_files" type="select" label="Add the following output files in the Galaxy history" multiple="true" optional="true" display="checkboxes" > <option value="output_gene_to_gene_family" selected="true">gene_to_gene_family.tsv: provide the mapping of genes to gene families of the pangenome</option> <option value="output_input_genome_rgp_to_spot" selected="true">input_genome_rgp_to_spot.tsv: provide information about the association between RGPs and insertion spots in the input genome</option> <option value="output_modules_in_input_genome" selected="true">modules_in_input_genome.tsv: list the modules that have been found in the input genome</option> <option value="output_genome_gff" selected="true">genome.gff: generate GFF files with projected pangenome annotations for each input genome</option> <option value="output_genome_proksee" selected="true">genome_proksee.json: Generate JSON map files for PROKSEE with projected pangenome annotations for each input genome ; PROKSEE (https://proksee.ca) is a user friendly interface to visualize your ppanggolin analysed genome as a circular genome plot</option> <option value="output_projection_summary" selected="true">projection_summary.yaml: provide an overview of the projection in the input genome</option> <option value="output_regions_of_genomic_plasticity" selected="true">regions_of_genomic_plasticity.tsv: contain information about RGPs within the input genome</option> <option value="output_sequences_partition_projection" selected="true">sequences_partition_projection.tsv: map the input genes to its partition (Persistent, Shell or Cloud)</option> <option value="output_specific_genes" selected="true">specific_genes.tsv: list the gene of the input genomes that do not align to any gene of the pangenome. These genes are assigned to Cloud parititon</option> </param> </inputs> <outputs> <collection name="gene_to_gene_family" type="list" label="PPanGGOLiN projection on ${on_string}: Gene to gene family"> <discover_datasets pattern="(?P<designation>.+)_gene_to_gene_family.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/> <filter>advanced_pangenome_optional_files and "output_gene_to_gene_family" in advanced_pangenome_optional_files</filter> </collection> <collection name="input_genome_rgp_to_spot" type="list" label="PPanGGOLiN projection on ${on_string}: Input genome rgp to spot"> <discover_datasets pattern="(?P<designation>.+)_input_genome_rgp_to_spot.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/> <filter>advanced_pangenome_optional_files and "output_input_genome_rgp_to_spot" in advanced_pangenome_optional_files</filter> </collection> <collection name="modules_in_input_genome" type="list" label="PPanGGOLiN projection on ${on_string}: Modules in input genome"> <discover_datasets pattern="(?P<designation>.+)_modules_in_input_genome.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/> <filter>advanced_pangenome_optional_files and "output_modules_in_input_genome" in advanced_pangenome_optional_files</filter> </collection> <collection name="genome_gff" type="list" label="PPanGGOLiN projection on ${on_string}: Genome gff"> <discover_datasets pattern="(?P<designation>.+)_genome_gff.gff" format="gff" directory="./tmp_ppanggolin/projection"/> <filter>advanced_pangenome_optional_files and "output_genome_gff" in advanced_pangenome_optional_files</filter> </collection> <collection name="genome_tsv" type="list" label="PPanGGOLiN projection on ${on_string}: Genome tsv"> <discover_datasets pattern="(?P<designation>.+)_genome_tsv.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/> </collection> <collection name="genome_proksee" type="list" label="PPanGGOLiN projection on ${on_string}: Genome proksee"> <discover_datasets pattern="(?P<designation>.+)_genome_proksee.json" format="json" directory="./tmp_ppanggolin/projection"/> <filter>advanced_pangenome_optional_files and "output_genome_proksee" in advanced_pangenome_optional_files</filter> </collection> <collection name="projection_summary" type="list" label="PPanGGOLiN projection on ${on_string}: Genome projection summary"> <discover_datasets pattern="(?P<designation>.+)_projection_summary.yaml" format="yaml" directory="./tmp_ppanggolin/projection"/> <filter>advanced_pangenome_optional_files and "output_projection_summary" in advanced_pangenome_optional_files</filter> </collection> <collection name="regions_of_genomic_plasticity" type="list" label="PPanGGOLiN projection on ${on_string}: Regions of genomic plasticity"> <discover_datasets pattern="(?P<designation>.+)_regions_of_genomic_plasticity.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/> <filter>advanced_pangenome_optional_files and "output_regions_of_genomic_plasticity" in advanced_pangenome_optional_files</filter> </collection> <collection name="sequences_partition_projection" type="list" label="PPanGGOLiN projection on ${on_string}: Sequences partition projection"> <discover_datasets pattern="(?P<designation>.+)_sequences_partition_projection.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/> <filter>advanced_pangenome_optional_files and "output_sequences_partition_projection" in advanced_pangenome_optional_files</filter> </collection> <collection name="specific_genes" type="list" label="PPanGGOLiN projection on ${on_string}: Specific genes"> <discover_datasets pattern="(?P<designation>.+)_specific_genes.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/> <filter>advanced_pangenome_optional_files and "output_specific_genes" in advanced_pangenome_optional_files</filter> </collection> <data name="summary_projection" format="tabular" label="PPanGGOLiN projection on ${on_string}: Global summary projection" /> </outputs> <tests> <test expect_num_outputs="11"> <param name="pangenome_h5" value="h5/test_data.h5" ftype="h5"/> <param name="genomes" value="fasta/PROJECT1_984801_1194801.fasta.gz,fasta/PROJECT5_1290693_1440693.fasta.gz,fasta/PROJECT8_1022972_1232972.fasta.gz" ftype="fasta"/> <param name="coverage" value="0.8"/> <param name="identity" value="0.8"/> <param name="soft_core" value="0.95"/> <output_collection name="gene_to_gene_family" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="CDS_0062" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="CDS_0093" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="CDS_0152" /> </assert_contents> </element> </output_collection> <output_collection name="input_genome_rgp_to_spot" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> </output_collection> <output_collection name="modules_in_input_genome" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="module_id" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="module_id" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="module_id" /> </assert_contents> </element> </output_collection> <output_collection name="genome_gff" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="gff"> <assert_contents> <has_text text="sequence-region" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="gff"> <assert_contents> <has_text text="sequence-region" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="gff"> <assert_contents> <has_text text="sequence-region" /> </assert_contents> </element> </output_collection> <output_collection name="genome_tsv" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="gene" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="gene" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="gene" /> </assert_contents> </element> </output_collection> <output_collection name="genome_proksee" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="json"> <assert_contents> <has_text text="cgview" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="json"> <assert_contents> <has_text text="cgview" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="json"> <assert_contents> <has_text text="cgview" /> </assert_contents> </element> </output_collection> <output_collection name="projection_summary" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="yaml"> <assert_contents> <has_text text="Projection_summary" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="yaml"> <assert_contents> <has_text text="Projection_summary" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="yaml"> <assert_contents> <has_text text="Projection_summary" /> </assert_contents> </element> </output_collection> <output_collection name="regions_of_genomic_plasticity" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> </output_collection> <output_collection name="sequences_partition_projection" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="shell" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="shell" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="shell" /> </assert_contents> </element> </output_collection> <output_collection name="specific_genes" type="list" count="3"> <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="CDS_0112" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="CDS_0137" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular"> <assert_contents> <has_text text="CDS_0017" /> </assert_contents> </element> </output_collection> <output name="summary_projection" > <assert_contents> <has_text text="Genome_name" /> </assert_contents> </output> </test> <test expect_num_outputs="11"> <param name="pangenome_h5" value="h5/test_data.h5" ftype="h5"/> <param name="genomes" value="genbank/PROJECT1_984801_1194801.gb.gz,genbank/PROJECT5_1290693_1440693.gb.gz,genbank/PROJECT8_1022972_1232972.gb.gz" ftype="genbank"/> <param name="coverage" value="0.8"/> <param name="identity" value="0.8"/> <param name="soft_core" value="0.95"/> <output_collection name="gene_to_gene_family" type="list" count="3"> <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular"> <assert_contents> <has_text text="TKY121527_11890" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular"> <assert_contents> <has_text text="OTU47_07365" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular"> <assert_contents> <has_text text="ODS73_05675" /> </assert_contents> </element> </output_collection> <output_collection name="input_genome_rgp_to_spot" type="list" count="3"> <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> </output_collection> <output_collection name="modules_in_input_genome" type="list" count="3"> <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular"> <assert_contents> <has_text text="module_id" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular"> <assert_contents> <has_text text="module_id" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular"> <assert_contents> <has_text text="module_id" /> </assert_contents> </element> </output_collection> <output_collection name="genome_gff" type="list" count="3"> <element name="PROJECT1_984801_1194801.gb.gz" ftype="gff"> <assert_contents> <has_text text="sequence-region" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.gb.gz" ftype="gff"> <assert_contents> <has_text text="sequence-region" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.gb.gz" ftype="gff"> <assert_contents> <has_text text="sequence-region" /> </assert_contents> </element> </output_collection> <output_collection name="genome_tsv" type="list" count="3"> <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular"> <assert_contents> <has_text text="gene" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular"> <assert_contents> <has_text text="gene" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular"> <assert_contents> <has_text text="gene" /> </assert_contents> </element> </output_collection> <output_collection name="genome_proksee" type="list" count="3"> <element name="PROJECT1_984801_1194801.gb.gz" ftype="json"> <assert_contents> <has_text text="cgview" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.gb.gz" ftype="json"> <assert_contents> <has_text text="cgview" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.gb.gz" ftype="json"> <assert_contents> <has_text text="cgview" /> </assert_contents> </element> </output_collection> <output_collection name="projection_summary" type="list" count="3"> <element name="PROJECT1_984801_1194801.gb.gz" ftype="yaml"> <assert_contents> <has_text text="Projection_summary" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.gb.gz" ftype="yaml"> <assert_contents> <has_text text="Projection_summary" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.gb.gz" ftype="yaml"> <assert_contents> <has_text text="Projection_summary" /> </assert_contents> </element> </output_collection> <output_collection name="regions_of_genomic_plasticity" type="list" count="3"> <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular"> <assert_contents> <has_text text="region" /> </assert_contents> </element> </output_collection> <output_collection name="sequences_partition_projection" type="list" count="3"> <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular"> <assert_contents> <has_text text="shell" /> </assert_contents> </element> <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular"> <assert_contents> <has_text text="shell" /> </assert_contents> </element> <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular"> <assert_contents> <has_text text="shell" /> </assert_contents> </element> </output_collection> <output name="summary_projection" > <assert_contents> <has_text text="Genome_name" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ PPanGGOLiN_ (Gautreau et al. 2020) is a software suite used to create and manipulate prokaryotic pangenomes from a set of either assembled genomic DNA sequences or provided genome annotations. PPanGGOLiN builds pangenomes through a graphical model and a statistical method to partition gene families in persistent, shell and cloud genomes. It integrates both information on protein-coding genes and their genomic neighborhood to build a graph of gene families where each node is a gene family, and each edge is a relation of genetic contiguity. The `ppanggolin projection` command annotates external genomes using an existing pangenome. This process eliminates the need to recompute all components, streamlining the annotation process. Input genomes are expected to belong to the same species. Genes within the input genome are aligned with genes in the pangenome to determine their gene families and partitions. Genes that do not align with any existing gene in the pangenome are considered specific to the input genome and are assigned to the “Cloud” partition. The number of this specific cloud families are detailed in the summary table. Based on the alignment and partition assignment, Regions of Plasticity (RGPs) within the input genome are predicted. Each RGP that is not located on a contig border is assigned to a spot of insertion. Finally, conserved modules of the pangenome found in the input genome are reported in the output files. .. _PPanGGOLiN: https://github.com/labgem/PPanGGOLiN .. _documentation: https://ppanggolin.readthedocs.io/en/latest/user/projection.html#projection ]]></help> <expand macro="citation"/> </tool>
