Repository 'ppanggolin_all'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ppanggolin_all

Changeset 5:d2b0073ef8d6 (2025-08-14)
Previous changeset 4:79f2d2f0ca2c (2025-07-14) Next changeset 6:ce6377ab5aca (2025-09-16)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ppanggolin commit e21d81020b381293e58e72ad54b782ce4a30ab56
modified:
macros.xml
ppanggolin_all.xml
added:
test-data/fasta/PROJECT1_984801_1194801.fasta.gz
test-data/fasta/PROJECT5_1290693_1440693.fasta.gz
test-data/fasta/PROJECT8_1022972_1232972.fasta.gz
test-data/genbank/PROJECT1_984801_1194801.gb.gz
test-data/genbank/PROJECT5_1290693_1440693.gb.gz
test-data/genbank/PROJECT8_1022972_1232972.gb.gz
b
diff -r 79f2d2f0ca2c -r d2b0073ef8d6 macros.xml
--- a/macros.xml Mon Jul 14 07:48:53 2025 +0000
+++ b/macros.xml Thu Aug 14 17:09:17 2025 +0000
[
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">2.2.4</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <xml name="citation">
         <citations>
             <citation type="doi">10.1371/journal.pcbi.1007732</citation> 
@@ -17,4 +17,70 @@
             <xref type="bio.tools">ppanggolin</xref>
         </xrefs>
     </xml>
+    <token name="@ORGANISM_LIST@"><![CDATA[
+        touch ./tmp_ppanggolin/organism_list/organism.list &&
+        #set extension_input_files = ""
+        #for $counter_input_files, $file in enumerate($genomes):
+            #if $counter_input_files == 0:
+             #set extension_input_files = $file.ext
+            #else:
+                #if $file.ext != $extension_input_files:
+                    #raise Exception("All the genome files must be of the same datatype, either all genbank files or all fasta files.")
+             #end if
+            #end if
+            
+            #set base_name = re.sub('[^\w\-_\.]', '_', str($file.element_identifier))
+            echo -e '${base_name}\t${file}' >> ./tmp_ppanggolin/organism_list/organism.list &&
+        #end for
+    ]]></token>
+    <xml name="inputs_identity_coverage_do_defrag">
+        <param argument="--identity" type="float" value="0.8" min="0" max="1" label="Minimum alignment identity"/>
+        <param argument="--coverage" type="float" value="0.8" min="0" max="1" label="Minimum alignment coverage"/>
+        <param name="do_defrag" type="boolean" checked="true" label="Gene family defragmentation" truevalue="" falsevalue="--no_defrag" help="Realign gene families to link fragments with their non-fragmented gene family. (--no_defrag)"/>
+    </xml>
+    <xml name="inputs_nb_of_partitions">
+        <param argument="--nb_of_partitions" type="integer" max="20" label="Number of classes used to partition the pangenome" optional="true" help="If empty value (default), it will be automatically determined. Otherwise, the value needs to be between 2 and 20 and it is advised to set a value of 3. See the link for the documentation in the help section for more details.">
+            <validator type="expression" message="Value needs to be empty or an integer between 2 and 20">value == "" or int(value) >= 2</validator>
+        </param>
+    </xml>
+    <xml name="inputs_translation_table">
+        <param argument="--translation_table" type="select" label="Translation table">
+            <option value="1" selected="true">1 - Standard Code</option>
+            <option value="2">2 - Vertebrate Mitochondrial</option>
+            <option value="3">3 - Yeast Mitochondrial</option>
+            <option value="4">4 - Mold, Protozoan, and Coelenterate Mitochondrial</option>
+            <option value="5">5 - Invertebrate Mitochondrial</option>
+            <option value="6">6 - Ciliate Nuclear</option>
+            <option value="9">9 - Echinoderm Mitochondrial</option>
+            <option value="10">10 - Euplotid Nuclear</option>
+            <option value="11">11 - Bacterial and Plant Plastid</option>
+            <option value="12">12 - Alternative Yeast Nuclear</option>
+            <option value="13">13 - Ascidian Mitochondrial</option>
+            <option value="14">14 - Flatworm Mitochondrial</option>
+            <option value="15">15 - Blepharisma Nuclear</option>
+            <option value="16">16 - Chlorophycean Mitochondrial</option>
+            <option value="21">21 - Trematode Mitochondrial</option>
+            <option value="22">22 - Scenedesmus obliquus Mitochondrial</option>
+            <option value="23">23 - Thraustochytrium Mitochondrial</option>
+            <option value="24">24 - Pterobranchia Mitochondrial</option>
+            <option value="25">25 - Candidate Division SR1 and Gracilibacteria</option>
+            <option value="26">26 - Pachysolen tannophilus Nuclear</option>
+            <option value="27">27 - Karyorelict Nuclear</option>
+            <option value="28">28 - Condylostoma Nuclear</option>
+            <option value="29">29 - Mesodinium Nuclear</option>
+            <option value="30">30 - Peritrich Nuclear</option>
+            <option value="31">31 - Blastocrithidia Nuclear</option>
+            <option value="32">32 - Balanophoraceae Plastid</option>
+            <option value="33">33 - Cephalodiscidae Mitochondrial</option>
+        </param>
+    </xml>
+    <xml name="inputs_pangenome">
+        <param argument="--pangenome" name="pangenome_h5" type="data" format="h5" label="Input pangenome h5 file"/>
+    </xml>
+    <xml name="inputs_soft_core">
+        <param argument="--soft_core" type="float" value="0.95" min="0" max="1" label="Soft core threshold used when generating general statistics on the projected genome" help="Default=0.95 .This threshold does not influence PPanGGOLiN's partitioning. The value determines the minimum fraction of genomes that must possess a gene family for it to be considered part of the soft core."/>
+    </xml>
+    <xml name="inputs_genomes" token_min="1" token_extratexthelp="">
+        <param name="genomes" type="data" multiple="true"  min="@MIN@" label="Genome files" format="fasta,genbank" help="All the genome files must be of the same format, either all genbank files or all fasta files. A minimum of @MIN@ genome file(s) is mandatory.@EXTRATEXTHELP@ Special characters and spaces are replaced by underscore."/>
+    </xml>
 </macros>
b
diff -r 79f2d2f0ca2c -r d2b0073ef8d6 ppanggolin_all.xml
--- a/ppanggolin_all.xml Mon Jul 14 07:48:53 2025 +0000
+++ b/ppanggolin_all.xml Thu Aug 14 17:09:17 2025 +0000
[
b'@@ -7,27 +7,12 @@\n     <expand macro="requirements"/>\n \n     <command detect_errors="exit_code"><![CDATA[\n-        \n-        mkdir -p "./tmp_ppanggolin/all" &&\n-        mkdir -p "./tmp_ppanggolin/organism_list" &&\n-        mkdir -p "./tmp_ppanggolin/ln_input_genomes" &&\n-        \n-        touch "./tmp_ppanggolin/organism_list/organism.list" &&\n+        #import re\n         \n-        #set extension_input_files = ""\n-        #for $counter_input_files, $file in enumerate($genomes):\n-            #if $counter_input_files == 0:\n-            \t#set extension_input_files = $file.ext\n-            #else:\n-                #if $file.ext != $extension_input_files:\n-                    #raise Exception("All the genome files must be of the same datatype, either all genbank files or all fasta files.")\n-            \t#end if\n-            #end if\n-            \n-            #set base_name = str($file.element_identifier).replace(" ", "_")\n-            echo -e \'${base_name}\\t${file}\' >> "./tmp_ppanggolin/organism_list/organism.list" &&\n-            \n-        #end for\n+        mkdir -p ./tmp_ppanggolin/all &&\n+        mkdir -p ./tmp_ppanggolin/organism_list &&\n+        \n+        @ORGANISM_LIST@\n \n         ppanggolin all\n \n@@ -36,11 +21,12 @@\n         #elif $extension_input_files == "genbank":\n             --anno\n         #end if\n-        "./tmp_ppanggolin/organism_list/organism.list"\n+        ./tmp_ppanggolin/organism_list/organism.list\n         \n-        -o ./tmp_ppanggolin/all\n+        --output ./tmp_ppanggolin/all\n         --force\n         --cpu "\\${GALAXY_SLOTS:-4}"\n+        --disable_prog_bar\n         \n         --coverage $coverage\n         --identity $identity\n@@ -53,6 +39,11 @@\n         \n         $do_defrag\n         \n+        \n+        && ppanggolin info\n+        --pangenome ./tmp_ppanggolin/all/pangenome.h5\n+        > ./tmp_ppanggolin/all/ppanggolin_info.txt\n+        \n \n         #if "output_functional_modules" in $advanced_pangenome_optional_files:\n \t\t&& cat ./tmp_ppanggolin/all/functional_modules.tsv > \'${functional_modules}\'\n@@ -85,7 +76,7 @@\n         #if "output_tile_plot" in $advanced_pangenome_optional_files:\n         \t&& cat ./tmp_ppanggolin/all/tile_plot.html > \'${tile_plot}\'\n         #end if\n-        #if "output_ushaped_plot" in $advanced_pangenome_optional_files:\n+        #if "output_Ushaped_plot" in $advanced_pangenome_optional_files:\n         \t&& cat ./tmp_ppanggolin/all/Ushaped_plot.html > \'${Ushaped_plot}\'\n         #end if\n         \n@@ -112,7 +103,7 @@\n \t\t&& cat ./tmp_ppanggolin/all/gene_presence_absence.Rtab > \'${gene_presence_absence}\'\n         #end if\n         \n-        \n+        && cat ./tmp_ppanggolin/all/ppanggolin_info.txt > \'${ppanggolin_info}\'\n         && cat ./tmp_ppanggolin/all/regions_of_genomic_plasticity.tsv > \'${regions_of_genomic_plasticity}\'\n         && cat ./tmp_ppanggolin/all/pangenome.h5 > \'${pangenome_h5}\'\n         && cat ./tmp_ppanggolin/all/genomes_statistics.tsv > \'${genomes_statistics}\'\n@@ -121,49 +112,15 @@\n     ]]></command>\n \n     <inputs>\n-    \n-        <param name="genomes" type="data" multiple="true"  min="2" label="Select genome files" format="fasta,genbank" help="All the genome files must be of similar format, either all genbank files or all fasta files. Processing of at least 15 genomes files is recommended, a minimum of 2 genomes files is mandatory. Space is not allowed in filename.">\n-        </param>\n         \n-        <param argument="--identity" type="float" value="0.8" min="0" max="1" label="Minimum alignment identity"/>\n-        <param argument="--coverage" type="float" value="0.8" min="0" max="1" label="Minimum alignment coverage"/>\n+        <expand macro="inputs_genomes" min="2" extratexthelp=" Processing of at least 15 genomes files is recommended." />\n+        \n+        <expand macro="inputs_identity_coverage_do_defrag"/>\n         \n-        <param argument="--nb_of_partitions" type="integer" max="20" label="Number of classes used to partition the pangenome" optional="true" help="If emp'..b've Yeast Nuclear</option>\n-            <option value="13">13 - Ascidian Mitochondrial</option>\n-            <option value="14">14 - Flatworm Mitochondrial</option>\n-            <option value="15">15 - Blepharisma Nuclear</option>\n-            <option value="16">16 - Chlorophycean Mitochondrial</option>\n-            <option value="21">21 - Trematode Mitochondrial</option>\n-            <option value="22">22 - Scenedesmus obliquus Mitochondrial</option>\n-            <option value="23">23 - Thraustochytrium Mitochondrial</option>\n-            <option value="24">24 - Pterobranchia Mitochondrial</option>\n-            <option value="25">25 - Candidate Division SR1 and Gracilibacteria</option>\n-            <option value="26">26 - Pachysolen tannophilus Nuclear</option>\n-            <option value="27">27 - Karyorelict Nuclear</option>\n-            <option value="28">28 - Condylostoma Nuclear</option>\n-            <option value="29">29 - Mesodinium Nuclear</option>\n-            <option value="30">30 - Peritrich Nuclear</option>\n-            <option value="31">31 - Blastocrithidia Nuclear</option>\n-            <option value="32">32 - Balanophoraceae Plastid</option>\n-            <option value="33">33 - Cephalodiscidae Mitochondrial</option>\n-        </param>\n-        <param name="do_defrag" type="boolean" checked="true" label="Gene family defragmentation" truevalue="" falsevalue="--no_defrag"/>\n+        <expand macro="inputs_nb_of_partitions"/>\n         \n-\n+        <expand macro="inputs_translation_table"/>\n+        \n         <param name="advanced_pangenome_optional_files" type="select" label="Add the following pangenome output files in the Galaxy history" multiple="true" optional="true" display="checkboxes" >\n         \t<!-- Basic files -->\n             <option value="output_gene_presence_absence" selected="true">Gene presence absence</option>\n@@ -252,7 +209,7 @@\n           <filter>advanced_pangenome_optional_files and "output_gene_presence_absence" in advanced_pangenome_optional_files</filter>\n         </data>\n         \n-        \n+        <data name="ppanggolin_info" format="txt" label="PPanGGOLiN all on ${on_string}: PPanGGOLiN info" />\n         <data name="regions_of_genomic_plasticity" format="tsv" label="PPanGGOLiN all on ${on_string}: Regions of genomic plasticity" />\n         <data name="pangenome_h5" format="h5" label="PPanGGOLiN all on ${on_string}: PanGenome HDF5 file" />\n         <data name="genomes_statistics" format="tsv" label="PPanGGOLiN all on ${on_string}: Genome statistics" />\n@@ -260,7 +217,7 @@\n     </outputs>\n \n     <tests>\n-        <test expect_num_outputs="21">\n+        <test expect_num_outputs="22">\n             <param name="nb_of_partitions" value="3"/>\n             <param name="coverage" value="0.8"/>\n             <param name="identity" value="0.8"/>\n@@ -277,8 +234,18 @@\n                     <has_text text="region" />\n                 </assert_contents>\n             </output>\n+            <output name="ppanggolin_info" >\n+                <assert_contents>\n+                    <has_text text="Content:" />\n+                </assert_contents>\n+            </output>\n+            <output name="Ushaped_plot" >\n+                <assert_contents>\n+                    <has_text text="html" />\n+                </assert_contents>\n+            </output>\n         </test>\n-        <test expect_num_outputs="21">\n+        <test expect_num_outputs="22">\n             <param name="nb_of_partitions" value="3"/>\n             <param name="coverage" value="0.8"/>\n             <param name="identity" value="0.8"/>\n@@ -295,6 +262,11 @@\n                     <has_text text="region" />\n                 </assert_contents>\n             </output>\n+            <output name="ppanggolin_info" >\n+                <assert_contents>\n+                    <has_text text="Content:" />\n+                </assert_contents>\n+            </output>\n         </test>\n         <test expect_failure="true">\n             <param name="nb_of_partitions" value="3"/>\n'
b
diff -r 79f2d2f0ca2c -r d2b0073ef8d6 test-data/fasta/PROJECT1_984801_1194801.fasta.gz
b
Binary file test-data/fasta/PROJECT1_984801_1194801.fasta.gz has changed
b
diff -r 79f2d2f0ca2c -r d2b0073ef8d6 test-data/fasta/PROJECT5_1290693_1440693.fasta.gz
b
Binary file test-data/fasta/PROJECT5_1290693_1440693.fasta.gz has changed
b
diff -r 79f2d2f0ca2c -r d2b0073ef8d6 test-data/fasta/PROJECT8_1022972_1232972.fasta.gz
b
Binary file test-data/fasta/PROJECT8_1022972_1232972.fasta.gz has changed
b
diff -r 79f2d2f0ca2c -r d2b0073ef8d6 test-data/genbank/PROJECT1_984801_1194801.gb.gz
b
Binary file test-data/genbank/PROJECT1_984801_1194801.gb.gz has changed
b
diff -r 79f2d2f0ca2c -r d2b0073ef8d6 test-data/genbank/PROJECT5_1290693_1440693.gb.gz
b
Binary file test-data/genbank/PROJECT5_1290693_1440693.gb.gz has changed
b
diff -r 79f2d2f0ca2c -r d2b0073ef8d6 test-data/genbank/PROJECT8_1022972_1232972.gb.gz
b
Binary file test-data/genbank/PROJECT8_1022972_1232972.gb.gz has changed