Mercurial > repos > iuc > ppanggolin_projection

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Aug 14 17:09:45 2025 +0000
@@ -0,0 +1,86 @@
+<macros>
+    <token name="@TOOL_VERSION@">2.2.4</token>
+    <token name="@VERSION_SUFFIX@">1</token>
+    <xml name="citation">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1007732</citation>
+            <citation type="doi">10.1093/bioinformatics/btaa792</citation>
+        </citations>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">ppanggolin</requirement>
+        </requirements>
+    </xml>
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">ppanggolin</xref>
+        </xrefs>
+    </xml>
+    <token name="@ORGANISM_LIST@"><![CDATA[
+        touch ./tmp_ppanggolin/organism_list/organism.list &&
+        #set extension_input_files = ""
+        #for $counter_input_files, $file in enumerate($genomes):
+            #if $counter_input_files == 0:
+            	#set extension_input_files = $file.ext
+            #else:
+                #if $file.ext != $extension_input_files:
+                    #raise Exception("All the genome files must be of the same datatype, either all genbank files or all fasta files.")
+            	#end if
+            #end if
+
+            #set base_name = re.sub('[^\w\-_\.]', '_', str($file.element_identifier))
+            echo -e '${base_name}\t${file}' >> ./tmp_ppanggolin/organism_list/organism.list &&
+        #end for
+    ]]></token>
+    <xml name="inputs_identity_coverage_do_defrag">
+        <param argument="--identity" type="float" value="0.8" min="0" max="1" label="Minimum alignment identity"/>
+        <param argument="--coverage" type="float" value="0.8" min="0" max="1" label="Minimum alignment coverage"/>
+        <param name="do_defrag" type="boolean" checked="true" label="Gene family defragmentation" truevalue="" falsevalue="--no_defrag" help="Realign gene families to link fragments with their non-fragmented gene family. (--no_defrag)"/>
+    </xml>
+    <xml name="inputs_nb_of_partitions">
+        <param argument="--nb_of_partitions" type="integer" max="20" label="Number of classes used to partition the pangenome" optional="true" help="If empty value (default), it will be automatically determined. Otherwise, the value needs to be between 2 and 20 and it is advised to set a value of 3. See the link for the documentation in the help section for more details.">
+            <validator type="expression" message="Value needs to be empty or an integer between 2 and 20">value == "" or int(value) >= 2</validator>
+        </param>
+    </xml>
+    <xml name="inputs_translation_table">
+        <param argument="--translation_table" type="select" label="Translation table">
+            <option value="1" selected="true">1 - Standard Code</option>
+            <option value="2">2 - Vertebrate Mitochondrial</option>
+            <option value="3">3 - Yeast Mitochondrial</option>
+            <option value="4">4 - Mold, Protozoan, and Coelenterate Mitochondrial</option>
+            <option value="5">5 - Invertebrate Mitochondrial</option>
+            <option value="6">6 - Ciliate Nuclear</option>
+            <option value="9">9 - Echinoderm Mitochondrial</option>
+            <option value="10">10 - Euplotid Nuclear</option>
+            <option value="11">11 - Bacterial and Plant Plastid</option>
+            <option value="12">12 - Alternative Yeast Nuclear</option>
+            <option value="13">13 - Ascidian Mitochondrial</option>
+            <option value="14">14 - Flatworm Mitochondrial</option>
+            <option value="15">15 - Blepharisma Nuclear</option>
+            <option value="16">16 - Chlorophycean Mitochondrial</option>
+            <option value="21">21 - Trematode Mitochondrial</option>
+            <option value="22">22 - Scenedesmus obliquus Mitochondrial</option>
+            <option value="23">23 - Thraustochytrium Mitochondrial</option>
+            <option value="24">24 - Pterobranchia Mitochondrial</option>
+            <option value="25">25 - Candidate Division SR1 and Gracilibacteria</option>
+            <option value="26">26 - Pachysolen tannophilus Nuclear</option>
+            <option value="27">27 - Karyorelict Nuclear</option>
+            <option value="28">28 - Condylostoma Nuclear</option>
+            <option value="29">29 - Mesodinium Nuclear</option>
+            <option value="30">30 - Peritrich Nuclear</option>
+            <option value="31">31 - Blastocrithidia Nuclear</option>
+            <option value="32">32 - Balanophoraceae Plastid</option>
+            <option value="33">33 - Cephalodiscidae Mitochondrial</option>
+        </param>
+    </xml>
+    <xml name="inputs_pangenome">
+        <param argument="--pangenome" name="pangenome_h5" type="data" format="h5" label="Input pangenome h5 file"/>
+    </xml>
+    <xml name="inputs_soft_core">
+        <param argument="--soft_core" type="float" value="0.95" min="0" max="1" label="Soft core threshold used when generating general statistics on the projected genome" help="Default=0.95 .This threshold does not influence PPanGGOLiN's partitioning. The value determines the minimum fraction of genomes that must possess a gene family for it to be considered part of the soft core."/>
+    </xml>
+    <xml name="inputs_genomes" token_min="1" token_extratexthelp="">
+        <param name="genomes" type="data" multiple="true"  min="@MIN@" label="Genome files" format="fasta,genbank" help="All the genome files must be of the same format, either all genbank files or all fasta files. A minimum of @MIN@ genome file(s) is mandatory.@EXTRATEXTHELP@ Special characters and spaces are replaced by underscore."/>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ppanggolin_projection.xml	Thu Aug 14 17:09:45 2025 +0000
@@ -0,0 +1,538 @@
+<tool id="ppanggolin_projection" name="PPanGGOLiN projection" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
+    <description>annotates external genomes using an existing pangenome</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+
+        mkdir -p ./tmp_ppanggolin/projection &&
+        mkdir -p ./tmp_ppanggolin/tmpdir_projection &&
+        mkdir -p ./tmp_ppanggolin/organism_list &&
+
+        @ORGANISM_LIST@
+
+        ppanggolin projection
+        --pangenome '$pangenome_h5'
+
+        #if $extension_input_files == "fasta":
+            --fasta
+        #elif $extension_input_files == "genbank":
+            --anno
+        #end if
+        ./tmp_ppanggolin/organism_list/organism.list
+
+        --output ./tmp_ppanggolin/projection
+        --tmpdir ./tmp_ppanggolin/tmpdir_projection
+        --force
+        --cpu "\${GALAXY_SLOTS:-4}"
+        --disable_prog_bar
+
+        --coverage $coverage
+        --identity $identity
+        --table
+
+        $do_defrag
+
+        --soft_core $soft_core
+
+        #if "output_genome_gff" in $advanced_pangenome_optional_files:
+		    --gff
+        #end if
+        #if "output_genome_proksee" in $advanced_pangenome_optional_files:
+		    --proksee
+        #end if
+
+        #for $counter_input_files, $file in enumerate($genomes):
+            #set base_name = re.sub('[^\w\-_\.]', '_', str($file.element_identifier))
+
+            #if "output_gene_to_gene_family" in $advanced_pangenome_optional_files:
+                && mv './tmp_ppanggolin/projection/${base_name}/gene_to_gene_family.tsv' './tmp_ppanggolin/projection/${base_name}_gene_to_gene_family.tsv'
+            #end if
+
+            #if "output_input_genome_rgp_to_spot" in $advanced_pangenome_optional_files:
+                && mv './tmp_ppanggolin/projection/${base_name}/input_genome_rgp_to_spot.tsv' './tmp_ppanggolin/projection/${base_name}_input_genome_rgp_to_spot.tsv'
+            #end if
+
+            #if "output_modules_in_input_genome" in $advanced_pangenome_optional_files:
+                && mv './tmp_ppanggolin/projection/${base_name}/modules_in_input_genome.tsv' './tmp_ppanggolin/projection/${base_name}_modules_in_input_genome.tsv'
+            #end if
+
+            #if "output_genome_gff" in $advanced_pangenome_optional_files:
+                && mv './tmp_ppanggolin/projection/${base_name}/${base_name}.gff' './tmp_ppanggolin/projection/${base_name}_genome_gff.gff'
+            #end if
+
+            && mv './tmp_ppanggolin/projection/${base_name}/${base_name}.tsv' './tmp_ppanggolin/projection/${base_name}_genome_tsv.tsv'
+
+            #if "output_genome_proksee" in $advanced_pangenome_optional_files:
+                && mv './tmp_ppanggolin/projection/${base_name}/${base_name}_proksee.json' './tmp_ppanggolin/projection/${base_name}_genome_proksee.json'
+            #end if
+
+            #if "output_projection_summary" in $advanced_pangenome_optional_files:
+                && mv './tmp_ppanggolin/projection/${base_name}/projection_summary.yaml' './tmp_ppanggolin/projection/${base_name}_projection_summary.yaml'
+            #end if
+
+            #if "output_regions_of_genomic_plasticity" in $advanced_pangenome_optional_files:
+                && mv './tmp_ppanggolin/projection/${base_name}/regions_of_genomic_plasticity.tsv' './tmp_ppanggolin/projection/${base_name}_regions_of_genomic_plasticity.tsv'
+            #end if
+
+            #if "output_sequences_partition_projection" in $advanced_pangenome_optional_files:
+                && mv './tmp_ppanggolin/projection/${base_name}/sequences_partition_projection.tsv' './tmp_ppanggolin/projection/${base_name}_sequences_partition_projection.tsv'
+            #end if
+
+            #if "output_specific_genes" in $advanced_pangenome_optional_files:
+                && mv './tmp_ppanggolin/projection/${base_name}/specific_genes.tsv' './tmp_ppanggolin/projection/${base_name}_specific_genes.tsv'
+            #end if
+
+        #end for
+
+        && cat './tmp_ppanggolin/projection/summary_projection.tsv' > '${summary_projection}'
+
+    ]]></command>
+
+    <inputs>
+
+        <expand macro="inputs_pangenome"/>
+
+        <expand macro="inputs_genomes" />
+
+        <expand macro="inputs_identity_coverage_do_defrag"/>
+
+        <expand macro="inputs_soft_core"/>
+
+        <param name="advanced_pangenome_optional_files" type="select" label="Add the following output files in the Galaxy history" multiple="true" optional="true" display="checkboxes" >
+            <option value="output_gene_to_gene_family" selected="true">gene_to_gene_family.tsv: provide the mapping of genes to gene families of the pangenome</option>
+            <option value="output_input_genome_rgp_to_spot" selected="true">input_genome_rgp_to_spot.tsv: provide information about the association between RGPs and insertion spots in the input genome</option>
+            <option value="output_modules_in_input_genome" selected="true">modules_in_input_genome.tsv: list the modules that have been found in the input genome</option>
+            <option value="output_genome_gff" selected="true">genome.gff: generate GFF files with projected pangenome annotations for each input genome</option>
+            <option value="output_genome_proksee" selected="true">genome_proksee.json: Generate JSON map files for PROKSEE with projected pangenome annotations for each input genome ; PROKSEE (https://proksee.ca) is a user friendly interface to visualize your ppanggolin analysed genome as a circular genome plot</option>
+            <option value="output_projection_summary" selected="true">projection_summary.yaml: provide an overview of the projection in the input genome</option>
+            <option value="output_regions_of_genomic_plasticity" selected="true">regions_of_genomic_plasticity.tsv: contain information about RGPs within the input genome</option>
+            <option value="output_sequences_partition_projection" selected="true">sequences_partition_projection.tsv: map the input genes to its partition (Persistent, Shell or Cloud)</option>
+            <option value="output_specific_genes" selected="true">specific_genes.tsv: list the gene of the input genomes that do not align to any gene of the pangenome. These genes are assigned to Cloud parititon</option>
+        </param>
+
+    </inputs>
+
+    <outputs>
+
+        <collection name="gene_to_gene_family" type="list" label="PPanGGOLiN projection on ${on_string}: Gene to gene family">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_gene_to_gene_family.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/>
+            <filter>advanced_pangenome_optional_files and "output_gene_to_gene_family" in advanced_pangenome_optional_files</filter>
+        </collection>
+        <collection name="input_genome_rgp_to_spot" type="list" label="PPanGGOLiN projection on ${on_string}: Input genome rgp to spot">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_input_genome_rgp_to_spot.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/>
+            <filter>advanced_pangenome_optional_files and "output_input_genome_rgp_to_spot" in advanced_pangenome_optional_files</filter>
+        </collection>
+        <collection name="modules_in_input_genome" type="list" label="PPanGGOLiN projection on ${on_string}: Modules in input genome">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_modules_in_input_genome.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/>
+            <filter>advanced_pangenome_optional_files and "output_modules_in_input_genome" in advanced_pangenome_optional_files</filter>
+        </collection>
+        <collection name="genome_gff" type="list" label="PPanGGOLiN projection on ${on_string}: Genome gff">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_genome_gff.gff" format="gff" directory="./tmp_ppanggolin/projection"/>
+            <filter>advanced_pangenome_optional_files and "output_genome_gff" in advanced_pangenome_optional_files</filter>
+        </collection>
+        <collection name="genome_tsv" type="list" label="PPanGGOLiN projection on ${on_string}: Genome tsv">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_genome_tsv.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/>
+        </collection>
+        <collection name="genome_proksee" type="list" label="PPanGGOLiN projection on ${on_string}: Genome proksee">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_genome_proksee.json" format="json" directory="./tmp_ppanggolin/projection"/>
+            <filter>advanced_pangenome_optional_files and "output_genome_proksee" in advanced_pangenome_optional_files</filter>
+        </collection>
+        <collection name="projection_summary" type="list" label="PPanGGOLiN projection on ${on_string}: Genome projection summary">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_projection_summary.yaml" format="yaml" directory="./tmp_ppanggolin/projection"/>
+            <filter>advanced_pangenome_optional_files and "output_projection_summary" in advanced_pangenome_optional_files</filter>
+        </collection>
+        <collection name="regions_of_genomic_plasticity" type="list" label="PPanGGOLiN projection on ${on_string}: Regions of genomic plasticity">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_regions_of_genomic_plasticity.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/>
+            <filter>advanced_pangenome_optional_files and "output_regions_of_genomic_plasticity" in advanced_pangenome_optional_files</filter>
+        </collection>
+        <collection name="sequences_partition_projection" type="list" label="PPanGGOLiN projection on ${on_string}: Sequences partition projection">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_sequences_partition_projection.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/>
+            <filter>advanced_pangenome_optional_files and "output_sequences_partition_projection" in advanced_pangenome_optional_files</filter>
+        </collection>
+        <collection name="specific_genes" type="list" label="PPanGGOLiN projection on ${on_string}: Specific genes">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_specific_genes.tsv" format="tabular" directory="./tmp_ppanggolin/projection"/>
+            <filter>advanced_pangenome_optional_files and "output_specific_genes" in advanced_pangenome_optional_files</filter>
+        </collection>
+
+        <data name="summary_projection" format="tabular" label="PPanGGOLiN projection on ${on_string}: Global summary projection" />
+
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="11">
+            <param name="pangenome_h5" value="h5/test_data.h5" ftype="h5"/>
+            <param name="genomes" value="fasta/PROJECT1_984801_1194801.fasta.gz,fasta/PROJECT5_1290693_1440693.fasta.gz,fasta/PROJECT8_1022972_1232972.fasta.gz" ftype="fasta"/>
+            <param name="coverage" value="0.8"/>
+            <param name="identity" value="0.8"/>
+            <param name="soft_core" value="0.95"/>
+            <output_collection name="gene_to_gene_family" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="CDS_0062" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="CDS_0093" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="CDS_0152" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="input_genome_rgp_to_spot" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="modules_in_input_genome" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="module_id" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="module_id" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="module_id" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="genome_gff" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="gff">
+                    <assert_contents>
+                        <has_text text="sequence-region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="gff">
+                    <assert_contents>
+                        <has_text text="sequence-region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="gff">
+                    <assert_contents>
+                        <has_text text="sequence-region" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="genome_tsv" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="gene" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="gene" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="gene" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="genome_proksee" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="json">
+                    <assert_contents>
+                        <has_text text="cgview" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="json">
+                    <assert_contents>
+                        <has_text text="cgview" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="json">
+                    <assert_contents>
+                        <has_text text="cgview" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="projection_summary" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="yaml">
+                    <assert_contents>
+                        <has_text text="Projection_summary" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="yaml">
+                    <assert_contents>
+                        <has_text text="Projection_summary" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="yaml">
+                    <assert_contents>
+                        <has_text text="Projection_summary" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="regions_of_genomic_plasticity" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="sequences_partition_projection" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="shell" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="shell" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="shell" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="specific_genes" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="CDS_0112" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="CDS_0137" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.fasta.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="CDS_0017" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="summary_projection" >
+                <assert_contents>
+                    <has_text text="Genome_name" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="11">
+            <param name="pangenome_h5" value="h5/test_data.h5" ftype="h5"/>
+            <param name="genomes" value="genbank/PROJECT1_984801_1194801.gb.gz,genbank/PROJECT5_1290693_1440693.gb.gz,genbank/PROJECT8_1022972_1232972.gb.gz" ftype="genbank"/>
+            <param name="coverage" value="0.8"/>
+            <param name="identity" value="0.8"/>
+            <param name="soft_core" value="0.95"/>
+            <output_collection name="gene_to_gene_family" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="TKY121527_11890" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="OTU47_07365" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="ODS73_05675" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="input_genome_rgp_to_spot" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="modules_in_input_genome" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="module_id" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="module_id" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="module_id" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="genome_gff" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.gb.gz" ftype="gff">
+                    <assert_contents>
+                        <has_text text="sequence-region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.gb.gz" ftype="gff">
+                    <assert_contents>
+                        <has_text text="sequence-region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.gb.gz" ftype="gff">
+                    <assert_contents>
+                        <has_text text="sequence-region" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="genome_tsv" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="gene" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="gene" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="gene" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="genome_proksee" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.gb.gz" ftype="json">
+                    <assert_contents>
+                        <has_text text="cgview" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.gb.gz" ftype="json">
+                    <assert_contents>
+                        <has_text text="cgview" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.gb.gz" ftype="json">
+                    <assert_contents>
+                        <has_text text="cgview" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="projection_summary" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.gb.gz" ftype="yaml">
+                    <assert_contents>
+                        <has_text text="Projection_summary" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.gb.gz" ftype="yaml">
+                    <assert_contents>
+                        <has_text text="Projection_summary" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.gb.gz" ftype="yaml">
+                    <assert_contents>
+                        <has_text text="Projection_summary" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="regions_of_genomic_plasticity" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="region" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="sequences_partition_projection" type="list" count="3">
+                <element name="PROJECT1_984801_1194801.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="shell" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT5_1290693_1440693.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="shell" />
+                    </assert_contents>
+                </element>
+                <element name="PROJECT8_1022972_1232972.gb.gz" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="shell" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="summary_projection" >
+                <assert_contents>
+                    <has_text text="Genome_name" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+        PPanGGOLiN_ (Gautreau et al. 2020) is a software suite used to create and manipulate prokaryotic pangenomes from a set of either assembled
+        genomic DNA sequences or provided genome annotations. PPanGGOLiN builds pangenomes through a graphical model and a statistical method to partition gene
+        families in persistent, shell and cloud genomes. It integrates both information on protein-coding genes and their genomic neighborhood to build a graph
+        of gene families where each node is a gene family, and each edge is a relation of genetic contiguity.
+
+	The `ppanggolin projection` command annotates external genomes using an existing pangenome. This process eliminates the need to recompute all components, streamlining the annotation process. Input genomes are expected to belong to the same species.
+
+	Genes within the input genome are aligned with genes in the pangenome to determine their gene families and partitions. Genes that do not align with any existing gene in the pangenome are considered specific to the input genome and are assigned to the “Cloud” partition. The number of this specific cloud families are detailed in the summary table.
+
+	Based on the alignment and partition assignment, Regions of Plasticity (RGPs) within the input genome are predicted. Each RGP that is not located on a contig border is assigned to a spot of insertion. Finally, conserved modules of the pangenome found in the input genome are reported in the output files.
+
+        .. _PPanGGOLiN: https://github.com/labgem/PPanGGOLiN
+        .. _documentation: https://ppanggolin.readthedocs.io/en/latest/user/projection.html#projection
+
+    ]]></help>
+
+    <expand macro="citation"/>
+
+</tool>
+
+
Binary file test-data/fasta/AP028611_984801_1194801.fasta.gz has changed
Binary file test-data/fasta/CP107038_1022972_1232972.fasta.gz has changed
Binary file test-data/fasta/CP113115_1290693_1440693.fasta.gz has changed
Binary file test-data/fasta/LN831051_1254175_1464175.fasta.gz has changed
Binary file test-data/fasta/NC_012467_959209_1169209.fasta.gz has changed
Binary file test-data/fasta/PROJECT1_984801_1194801.fasta.gz has changed
Binary file test-data/fasta/PROJECT5_1290693_1440693.fasta.gz has changed
Binary file test-data/fasta/PROJECT8_1022972_1232972.fasta.gz has changed
Binary file test-data/genbank/AP028611_984801_1194801.gb.gz has changed
Binary file test-data/genbank/CP107038_1022972_1232972.gb.gz has changed
Binary file test-data/genbank/CP113115_1290693_1440693.gb.gz has changed
Binary file test-data/genbank/LN831051_1254175_1464175.gb.gz has changed
Binary file test-data/genbank/NC_012467_959209_1169209.gb.gz has changed
Binary file test-data/genbank/PROJECT1_984801_1194801.gb.gz has changed
Binary file test-data/genbank/PROJECT5_1290693_1440693.gb.gz has changed
Binary file test-data/genbank/PROJECT8_1022972_1232972.gb.gz has changed
Binary file test-data/h5/test_data.h5 has changed