changeset 5:d2b0073ef8d6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ppanggolin commit e21d81020b381293e58e72ad54b782ce4a30ab56
author iuc
date Thu, 14 Aug 2025 17:09:17 +0000
parents 79f2d2f0ca2c
children
files macros.xml ppanggolin_all.xml test-data/fasta/PROJECT1_984801_1194801.fasta.gz test-data/fasta/PROJECT5_1290693_1440693.fasta.gz test-data/fasta/PROJECT8_1022972_1232972.fasta.gz test-data/genbank/PROJECT1_984801_1194801.gb.gz test-data/genbank/PROJECT5_1290693_1440693.gb.gz test-data/genbank/PROJECT8_1022972_1232972.gb.gz
diffstat 8 files changed, 106 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Mon Jul 14 07:48:53 2025 +0000
+++ b/macros.xml	Thu Aug 14 17:09:17 2025 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">2.2.4</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <xml name="citation">
         <citations>
             <citation type="doi">10.1371/journal.pcbi.1007732</citation> 
@@ -17,4 +17,70 @@
             <xref type="bio.tools">ppanggolin</xref>
         </xrefs>
     </xml>
+    <token name="@ORGANISM_LIST@"><![CDATA[
+        touch ./tmp_ppanggolin/organism_list/organism.list &&
+        #set extension_input_files = ""
+        #for $counter_input_files, $file in enumerate($genomes):
+            #if $counter_input_files == 0:
+            	#set extension_input_files = $file.ext
+            #else:
+                #if $file.ext != $extension_input_files:
+                    #raise Exception("All the genome files must be of the same datatype, either all genbank files or all fasta files.")
+            	#end if
+            #end if
+            
+            #set base_name = re.sub('[^\w\-_\.]', '_', str($file.element_identifier))
+            echo -e '${base_name}\t${file}' >> ./tmp_ppanggolin/organism_list/organism.list &&
+        #end for
+    ]]></token>
+    <xml name="inputs_identity_coverage_do_defrag">
+        <param argument="--identity" type="float" value="0.8" min="0" max="1" label="Minimum alignment identity"/>
+        <param argument="--coverage" type="float" value="0.8" min="0" max="1" label="Minimum alignment coverage"/>
+        <param name="do_defrag" type="boolean" checked="true" label="Gene family defragmentation" truevalue="" falsevalue="--no_defrag" help="Realign gene families to link fragments with their non-fragmented gene family. (--no_defrag)"/>
+    </xml>
+    <xml name="inputs_nb_of_partitions">
+        <param argument="--nb_of_partitions" type="integer" max="20" label="Number of classes used to partition the pangenome" optional="true" help="If empty value (default), it will be automatically determined. Otherwise, the value needs to be between 2 and 20 and it is advised to set a value of 3. See the link for the documentation in the help section for more details.">
+            <validator type="expression" message="Value needs to be empty or an integer between 2 and 20">value == "" or int(value) >= 2</validator>
+        </param>
+    </xml>
+    <xml name="inputs_translation_table">
+        <param argument="--translation_table" type="select" label="Translation table">
+            <option value="1" selected="true">1 - Standard Code</option>
+            <option value="2">2 - Vertebrate Mitochondrial</option>
+            <option value="3">3 - Yeast Mitochondrial</option>
+            <option value="4">4 - Mold, Protozoan, and Coelenterate Mitochondrial</option>
+            <option value="5">5 - Invertebrate Mitochondrial</option>
+            <option value="6">6 - Ciliate Nuclear</option>
+            <option value="9">9 - Echinoderm Mitochondrial</option>
+            <option value="10">10 - Euplotid Nuclear</option>
+            <option value="11">11 - Bacterial and Plant Plastid</option>
+            <option value="12">12 - Alternative Yeast Nuclear</option>
+            <option value="13">13 - Ascidian Mitochondrial</option>
+            <option value="14">14 - Flatworm Mitochondrial</option>
+            <option value="15">15 - Blepharisma Nuclear</option>
+            <option value="16">16 - Chlorophycean Mitochondrial</option>
+            <option value="21">21 - Trematode Mitochondrial</option>
+            <option value="22">22 - Scenedesmus obliquus Mitochondrial</option>
+            <option value="23">23 - Thraustochytrium Mitochondrial</option>
+            <option value="24">24 - Pterobranchia Mitochondrial</option>
+            <option value="25">25 - Candidate Division SR1 and Gracilibacteria</option>
+            <option value="26">26 - Pachysolen tannophilus Nuclear</option>
+            <option value="27">27 - Karyorelict Nuclear</option>
+            <option value="28">28 - Condylostoma Nuclear</option>
+            <option value="29">29 - Mesodinium Nuclear</option>
+            <option value="30">30 - Peritrich Nuclear</option>
+            <option value="31">31 - Blastocrithidia Nuclear</option>
+            <option value="32">32 - Balanophoraceae Plastid</option>
+            <option value="33">33 - Cephalodiscidae Mitochondrial</option>
+        </param>
+    </xml>
+    <xml name="inputs_pangenome">
+        <param argument="--pangenome" name="pangenome_h5" type="data" format="h5" label="Input pangenome h5 file"/>
+    </xml>
+    <xml name="inputs_soft_core">
+        <param argument="--soft_core" type="float" value="0.95" min="0" max="1" label="Soft core threshold used when generating general statistics on the projected genome" help="Default=0.95 .This threshold does not influence PPanGGOLiN's partitioning. The value determines the minimum fraction of genomes that must possess a gene family for it to be considered part of the soft core."/>
+    </xml>
+    <xml name="inputs_genomes" token_min="1" token_extratexthelp="">
+        <param name="genomes" type="data" multiple="true"  min="@MIN@" label="Genome files" format="fasta,genbank" help="All the genome files must be of the same format, either all genbank files or all fasta files. A minimum of @MIN@ genome file(s) is mandatory.@EXTRATEXTHELP@ Special characters and spaces are replaced by underscore."/>
+    </xml>
 </macros>
--- a/ppanggolin_all.xml	Mon Jul 14 07:48:53 2025 +0000
+++ b/ppanggolin_all.xml	Thu Aug 14 17:09:17 2025 +0000
@@ -7,27 +7,12 @@
     <expand macro="requirements"/>
 
     <command detect_errors="exit_code"><![CDATA[
-        
-        mkdir -p "./tmp_ppanggolin/all" &&
-        mkdir -p "./tmp_ppanggolin/organism_list" &&
-        mkdir -p "./tmp_ppanggolin/ln_input_genomes" &&
-        
-        touch "./tmp_ppanggolin/organism_list/organism.list" &&
+        #import re
         
-        #set extension_input_files = ""
-        #for $counter_input_files, $file in enumerate($genomes):
-            #if $counter_input_files == 0:
-            	#set extension_input_files = $file.ext
-            #else:
-                #if $file.ext != $extension_input_files:
-                    #raise Exception("All the genome files must be of the same datatype, either all genbank files or all fasta files.")
-            	#end if
-            #end if
-            
-            #set base_name = str($file.element_identifier).replace(" ", "_")
-            echo -e '${base_name}\t${file}' >> "./tmp_ppanggolin/organism_list/organism.list" &&
-            
-        #end for
+        mkdir -p ./tmp_ppanggolin/all &&
+        mkdir -p ./tmp_ppanggolin/organism_list &&
+        
+        @ORGANISM_LIST@
 
         ppanggolin all
 
@@ -36,11 +21,12 @@
         #elif $extension_input_files == "genbank":
             --anno
         #end if
-        "./tmp_ppanggolin/organism_list/organism.list"
+        ./tmp_ppanggolin/organism_list/organism.list
         
-        -o ./tmp_ppanggolin/all
+        --output ./tmp_ppanggolin/all
         --force
         --cpu "\${GALAXY_SLOTS:-4}"
+        --disable_prog_bar
         
         --coverage $coverage
         --identity $identity
@@ -53,6 +39,11 @@
         
         $do_defrag
         
+        
+        && ppanggolin info
+        --pangenome ./tmp_ppanggolin/all/pangenome.h5
+        > ./tmp_ppanggolin/all/ppanggolin_info.txt
+        
 
         #if "output_functional_modules" in $advanced_pangenome_optional_files:
 		&& cat ./tmp_ppanggolin/all/functional_modules.tsv > '${functional_modules}'
@@ -85,7 +76,7 @@
         #if "output_tile_plot" in $advanced_pangenome_optional_files:
         	&& cat ./tmp_ppanggolin/all/tile_plot.html > '${tile_plot}'
         #end if
-        #if "output_ushaped_plot" in $advanced_pangenome_optional_files:
+        #if "output_Ushaped_plot" in $advanced_pangenome_optional_files:
         	&& cat ./tmp_ppanggolin/all/Ushaped_plot.html > '${Ushaped_plot}'
         #end if
         
@@ -112,7 +103,7 @@
 		&& cat ./tmp_ppanggolin/all/gene_presence_absence.Rtab > '${gene_presence_absence}'
         #end if
         
-        
+        && cat ./tmp_ppanggolin/all/ppanggolin_info.txt > '${ppanggolin_info}'
         && cat ./tmp_ppanggolin/all/regions_of_genomic_plasticity.tsv > '${regions_of_genomic_plasticity}'
         && cat ./tmp_ppanggolin/all/pangenome.h5 > '${pangenome_h5}'
         && cat ./tmp_ppanggolin/all/genomes_statistics.tsv > '${genomes_statistics}'
@@ -121,49 +112,15 @@
     ]]></command>
 
     <inputs>
-    
-        <param name="genomes" type="data" multiple="true"  min="2" label="Select genome files" format="fasta,genbank" help="All the genome files must be of similar format, either all genbank files or all fasta files. Processing of at least 15 genomes files is recommended, a minimum of 2 genomes files is mandatory. Space is not allowed in filename.">
-        </param>
         
-        <param argument="--identity" type="float" value="0.8" min="0" max="1" label="Minimum alignment identity"/>
-        <param argument="--coverage" type="float" value="0.8" min="0" max="1" label="Minimum alignment coverage"/>
+        <expand macro="inputs_genomes" min="2" extratexthelp=" Processing of at least 15 genomes files is recommended." />
+        
+        <expand macro="inputs_identity_coverage_do_defrag"/>
         
-        <param argument="--nb_of_partitions" type="integer" max="20" label="Number of classes used to partition the pangenome" optional="true" help="If empty value (default), it will be automatically determined. Otherwise, the value needs to be between 2 and 20 and it is advised to set a value of 3. See the link for the documentation in the help section for more details.">
-            <validator type="expression" message="Value needs to be empty or an integer between 2 and 20">value == "" or int(value) >= 2</validator>
-        </param>
-         
-        <param argument="--translation_table" type="select" label="Translation table">
-            <option value="1">1 - Standard Code</option>
-            <option value="2">2 - Vertebrate Mitochondrial</option>
-            <option value="3">3 - Yeast Mitochondrial</option>
-            <option value="4">4 - Mold, Protozoan, and Coelenterate Mitochondrial</option>
-            <option value="5">5 - Invertebrate Mitochondrial</option>
-            <option value="6">6 - Ciliate Nuclear</option>
-            <option value="9">9 - Echinoderm Mitochondrial</option>
-            <option value="10">10 - Euplotid Nuclear</option>
-            <option value="11">11 - Bacterial and Plant Plastid</option>
-            <option value="12">12 - Alternative Yeast Nuclear</option>
-            <option value="13">13 - Ascidian Mitochondrial</option>
-            <option value="14">14 - Flatworm Mitochondrial</option>
-            <option value="15">15 - Blepharisma Nuclear</option>
-            <option value="16">16 - Chlorophycean Mitochondrial</option>
-            <option value="21">21 - Trematode Mitochondrial</option>
-            <option value="22">22 - Scenedesmus obliquus Mitochondrial</option>
-            <option value="23">23 - Thraustochytrium Mitochondrial</option>
-            <option value="24">24 - Pterobranchia Mitochondrial</option>
-            <option value="25">25 - Candidate Division SR1 and Gracilibacteria</option>
-            <option value="26">26 - Pachysolen tannophilus Nuclear</option>
-            <option value="27">27 - Karyorelict Nuclear</option>
-            <option value="28">28 - Condylostoma Nuclear</option>
-            <option value="29">29 - Mesodinium Nuclear</option>
-            <option value="30">30 - Peritrich Nuclear</option>
-            <option value="31">31 - Blastocrithidia Nuclear</option>
-            <option value="32">32 - Balanophoraceae Plastid</option>
-            <option value="33">33 - Cephalodiscidae Mitochondrial</option>
-        </param>
-        <param name="do_defrag" type="boolean" checked="true" label="Gene family defragmentation" truevalue="" falsevalue="--no_defrag"/>
+        <expand macro="inputs_nb_of_partitions"/>
         
-
+        <expand macro="inputs_translation_table"/>
+        
         <param name="advanced_pangenome_optional_files" type="select" label="Add the following pangenome output files in the Galaxy history" multiple="true" optional="true" display="checkboxes" >
         	<!-- Basic files -->
             <option value="output_gene_presence_absence" selected="true">Gene presence absence</option>
@@ -252,7 +209,7 @@
           <filter>advanced_pangenome_optional_files and "output_gene_presence_absence" in advanced_pangenome_optional_files</filter>
         </data>
         
-        
+        <data name="ppanggolin_info" format="txt" label="PPanGGOLiN all on ${on_string}: PPanGGOLiN info" />
         <data name="regions_of_genomic_plasticity" format="tsv" label="PPanGGOLiN all on ${on_string}: Regions of genomic plasticity" />
         <data name="pangenome_h5" format="h5" label="PPanGGOLiN all on ${on_string}: PanGenome HDF5 file" />
         <data name="genomes_statistics" format="tsv" label="PPanGGOLiN all on ${on_string}: Genome statistics" />
@@ -260,7 +217,7 @@
     </outputs>
 
     <tests>
-        <test expect_num_outputs="21">
+        <test expect_num_outputs="22">
             <param name="nb_of_partitions" value="3"/>
             <param name="coverage" value="0.8"/>
             <param name="identity" value="0.8"/>
@@ -277,8 +234,18 @@
                     <has_text text="region" />
                 </assert_contents>
             </output>
+            <output name="ppanggolin_info" >
+                <assert_contents>
+                    <has_text text="Content:" />
+                </assert_contents>
+            </output>
+            <output name="Ushaped_plot" >
+                <assert_contents>
+                    <has_text text="html" />
+                </assert_contents>
+            </output>
         </test>
-        <test expect_num_outputs="21">
+        <test expect_num_outputs="22">
             <param name="nb_of_partitions" value="3"/>
             <param name="coverage" value="0.8"/>
             <param name="identity" value="0.8"/>
@@ -295,6 +262,11 @@
                     <has_text text="region" />
                 </assert_contents>
             </output>
+            <output name="ppanggolin_info" >
+                <assert_contents>
+                    <has_text text="Content:" />
+                </assert_contents>
+            </output>
         </test>
         <test expect_failure="true">
             <param name="nb_of_partitions" value="3"/>
Binary file test-data/fasta/PROJECT1_984801_1194801.fasta.gz has changed
Binary file test-data/fasta/PROJECT5_1290693_1440693.fasta.gz has changed
Binary file test-data/fasta/PROJECT8_1022972_1232972.fasta.gz has changed
Binary file test-data/genbank/PROJECT1_984801_1194801.gb.gz has changed
Binary file test-data/genbank/PROJECT5_1290693_1440693.gb.gz has changed
Binary file test-data/genbank/PROJECT8_1022972_1232972.gb.gz has changed