Mercurial > repos > iuc > ppanggolin_all
changeset 5:d2b0073ef8d6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ppanggolin commit e21d81020b381293e58e72ad54b782ce4a30ab56
author | iuc |
---|---|
date | Thu, 14 Aug 2025 17:09:17 +0000 |
parents | 79f2d2f0ca2c |
children | |
files | macros.xml ppanggolin_all.xml test-data/fasta/PROJECT1_984801_1194801.fasta.gz test-data/fasta/PROJECT5_1290693_1440693.fasta.gz test-data/fasta/PROJECT8_1022972_1232972.fasta.gz test-data/genbank/PROJECT1_984801_1194801.gb.gz test-data/genbank/PROJECT5_1290693_1440693.gb.gz test-data/genbank/PROJECT8_1022972_1232972.gb.gz |
diffstat | 8 files changed, 106 insertions(+), 68 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Mon Jul 14 07:48:53 2025 +0000 +++ b/macros.xml Thu Aug 14 17:09:17 2025 +0000 @@ -1,6 +1,6 @@ <macros> <token name="@TOOL_VERSION@">2.2.4</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> <xml name="citation"> <citations> <citation type="doi">10.1371/journal.pcbi.1007732</citation> @@ -17,4 +17,70 @@ <xref type="bio.tools">ppanggolin</xref> </xrefs> </xml> + <token name="@ORGANISM_LIST@"><![CDATA[ + touch ./tmp_ppanggolin/organism_list/organism.list && + #set extension_input_files = "" + #for $counter_input_files, $file in enumerate($genomes): + #if $counter_input_files == 0: + #set extension_input_files = $file.ext + #else: + #if $file.ext != $extension_input_files: + #raise Exception("All the genome files must be of the same datatype, either all genbank files or all fasta files.") + #end if + #end if + + #set base_name = re.sub('[^\w\-_\.]', '_', str($file.element_identifier)) + echo -e '${base_name}\t${file}' >> ./tmp_ppanggolin/organism_list/organism.list && + #end for + ]]></token> + <xml name="inputs_identity_coverage_do_defrag"> + <param argument="--identity" type="float" value="0.8" min="0" max="1" label="Minimum alignment identity"/> + <param argument="--coverage" type="float" value="0.8" min="0" max="1" label="Minimum alignment coverage"/> + <param name="do_defrag" type="boolean" checked="true" label="Gene family defragmentation" truevalue="" falsevalue="--no_defrag" help="Realign gene families to link fragments with their non-fragmented gene family. (--no_defrag)"/> + </xml> + <xml name="inputs_nb_of_partitions"> + <param argument="--nb_of_partitions" type="integer" max="20" label="Number of classes used to partition the pangenome" optional="true" help="If empty value (default), it will be automatically determined. Otherwise, the value needs to be between 2 and 20 and it is advised to set a value of 3. See the link for the documentation in the help section for more details."> + <validator type="expression" message="Value needs to be empty or an integer between 2 and 20">value == "" or int(value) >= 2</validator> + </param> + </xml> + <xml name="inputs_translation_table"> + <param argument="--translation_table" type="select" label="Translation table"> + <option value="1" selected="true">1 - Standard Code</option> + <option value="2">2 - Vertebrate Mitochondrial</option> + <option value="3">3 - Yeast Mitochondrial</option> + <option value="4">4 - Mold, Protozoan, and Coelenterate Mitochondrial</option> + <option value="5">5 - Invertebrate Mitochondrial</option> + <option value="6">6 - Ciliate Nuclear</option> + <option value="9">9 - Echinoderm Mitochondrial</option> + <option value="10">10 - Euplotid Nuclear</option> + <option value="11">11 - Bacterial and Plant Plastid</option> + <option value="12">12 - Alternative Yeast Nuclear</option> + <option value="13">13 - Ascidian Mitochondrial</option> + <option value="14">14 - Flatworm Mitochondrial</option> + <option value="15">15 - Blepharisma Nuclear</option> + <option value="16">16 - Chlorophycean Mitochondrial</option> + <option value="21">21 - Trematode Mitochondrial</option> + <option value="22">22 - Scenedesmus obliquus Mitochondrial</option> + <option value="23">23 - Thraustochytrium Mitochondrial</option> + <option value="24">24 - Pterobranchia Mitochondrial</option> + <option value="25">25 - Candidate Division SR1 and Gracilibacteria</option> + <option value="26">26 - Pachysolen tannophilus Nuclear</option> + <option value="27">27 - Karyorelict Nuclear</option> + <option value="28">28 - Condylostoma Nuclear</option> + <option value="29">29 - Mesodinium Nuclear</option> + <option value="30">30 - Peritrich Nuclear</option> + <option value="31">31 - Blastocrithidia Nuclear</option> + <option value="32">32 - Balanophoraceae Plastid</option> + <option value="33">33 - Cephalodiscidae Mitochondrial</option> + </param> + </xml> + <xml name="inputs_pangenome"> + <param argument="--pangenome" name="pangenome_h5" type="data" format="h5" label="Input pangenome h5 file"/> + </xml> + <xml name="inputs_soft_core"> + <param argument="--soft_core" type="float" value="0.95" min="0" max="1" label="Soft core threshold used when generating general statistics on the projected genome" help="Default=0.95 .This threshold does not influence PPanGGOLiN's partitioning. The value determines the minimum fraction of genomes that must possess a gene family for it to be considered part of the soft core."/> + </xml> + <xml name="inputs_genomes" token_min="1" token_extratexthelp=""> + <param name="genomes" type="data" multiple="true" min="@MIN@" label="Genome files" format="fasta,genbank" help="All the genome files must be of the same format, either all genbank files or all fasta files. A minimum of @MIN@ genome file(s) is mandatory.@EXTRATEXTHELP@ Special characters and spaces are replaced by underscore."/> + </xml> </macros>
--- a/ppanggolin_all.xml Mon Jul 14 07:48:53 2025 +0000 +++ b/ppanggolin_all.xml Thu Aug 14 17:09:17 2025 +0000 @@ -7,27 +7,12 @@ <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ - - mkdir -p "./tmp_ppanggolin/all" && - mkdir -p "./tmp_ppanggolin/organism_list" && - mkdir -p "./tmp_ppanggolin/ln_input_genomes" && - - touch "./tmp_ppanggolin/organism_list/organism.list" && + #import re - #set extension_input_files = "" - #for $counter_input_files, $file in enumerate($genomes): - #if $counter_input_files == 0: - #set extension_input_files = $file.ext - #else: - #if $file.ext != $extension_input_files: - #raise Exception("All the genome files must be of the same datatype, either all genbank files or all fasta files.") - #end if - #end if - - #set base_name = str($file.element_identifier).replace(" ", "_") - echo -e '${base_name}\t${file}' >> "./tmp_ppanggolin/organism_list/organism.list" && - - #end for + mkdir -p ./tmp_ppanggolin/all && + mkdir -p ./tmp_ppanggolin/organism_list && + + @ORGANISM_LIST@ ppanggolin all @@ -36,11 +21,12 @@ #elif $extension_input_files == "genbank": --anno #end if - "./tmp_ppanggolin/organism_list/organism.list" + ./tmp_ppanggolin/organism_list/organism.list - -o ./tmp_ppanggolin/all + --output ./tmp_ppanggolin/all --force --cpu "\${GALAXY_SLOTS:-4}" + --disable_prog_bar --coverage $coverage --identity $identity @@ -53,6 +39,11 @@ $do_defrag + + && ppanggolin info + --pangenome ./tmp_ppanggolin/all/pangenome.h5 + > ./tmp_ppanggolin/all/ppanggolin_info.txt + #if "output_functional_modules" in $advanced_pangenome_optional_files: && cat ./tmp_ppanggolin/all/functional_modules.tsv > '${functional_modules}' @@ -85,7 +76,7 @@ #if "output_tile_plot" in $advanced_pangenome_optional_files: && cat ./tmp_ppanggolin/all/tile_plot.html > '${tile_plot}' #end if - #if "output_ushaped_plot" in $advanced_pangenome_optional_files: + #if "output_Ushaped_plot" in $advanced_pangenome_optional_files: && cat ./tmp_ppanggolin/all/Ushaped_plot.html > '${Ushaped_plot}' #end if @@ -112,7 +103,7 @@ && cat ./tmp_ppanggolin/all/gene_presence_absence.Rtab > '${gene_presence_absence}' #end if - + && cat ./tmp_ppanggolin/all/ppanggolin_info.txt > '${ppanggolin_info}' && cat ./tmp_ppanggolin/all/regions_of_genomic_plasticity.tsv > '${regions_of_genomic_plasticity}' && cat ./tmp_ppanggolin/all/pangenome.h5 > '${pangenome_h5}' && cat ./tmp_ppanggolin/all/genomes_statistics.tsv > '${genomes_statistics}' @@ -121,49 +112,15 @@ ]]></command> <inputs> - - <param name="genomes" type="data" multiple="true" min="2" label="Select genome files" format="fasta,genbank" help="All the genome files must be of similar format, either all genbank files or all fasta files. Processing of at least 15 genomes files is recommended, a minimum of 2 genomes files is mandatory. Space is not allowed in filename."> - </param> - <param argument="--identity" type="float" value="0.8" min="0" max="1" label="Minimum alignment identity"/> - <param argument="--coverage" type="float" value="0.8" min="0" max="1" label="Minimum alignment coverage"/> + <expand macro="inputs_genomes" min="2" extratexthelp=" Processing of at least 15 genomes files is recommended." /> + + <expand macro="inputs_identity_coverage_do_defrag"/> - <param argument="--nb_of_partitions" type="integer" max="20" label="Number of classes used to partition the pangenome" optional="true" help="If empty value (default), it will be automatically determined. Otherwise, the value needs to be between 2 and 20 and it is advised to set a value of 3. See the link for the documentation in the help section for more details."> - <validator type="expression" message="Value needs to be empty or an integer between 2 and 20">value == "" or int(value) >= 2</validator> - </param> - - <param argument="--translation_table" type="select" label="Translation table"> - <option value="1">1 - Standard Code</option> - <option value="2">2 - Vertebrate Mitochondrial</option> - <option value="3">3 - Yeast Mitochondrial</option> - <option value="4">4 - Mold, Protozoan, and Coelenterate Mitochondrial</option> - <option value="5">5 - Invertebrate Mitochondrial</option> - <option value="6">6 - Ciliate Nuclear</option> - <option value="9">9 - Echinoderm Mitochondrial</option> - <option value="10">10 - Euplotid Nuclear</option> - <option value="11">11 - Bacterial and Plant Plastid</option> - <option value="12">12 - Alternative Yeast Nuclear</option> - <option value="13">13 - Ascidian Mitochondrial</option> - <option value="14">14 - Flatworm Mitochondrial</option> - <option value="15">15 - Blepharisma Nuclear</option> - <option value="16">16 - Chlorophycean Mitochondrial</option> - <option value="21">21 - Trematode Mitochondrial</option> - <option value="22">22 - Scenedesmus obliquus Mitochondrial</option> - <option value="23">23 - Thraustochytrium Mitochondrial</option> - <option value="24">24 - Pterobranchia Mitochondrial</option> - <option value="25">25 - Candidate Division SR1 and Gracilibacteria</option> - <option value="26">26 - Pachysolen tannophilus Nuclear</option> - <option value="27">27 - Karyorelict Nuclear</option> - <option value="28">28 - Condylostoma Nuclear</option> - <option value="29">29 - Mesodinium Nuclear</option> - <option value="30">30 - Peritrich Nuclear</option> - <option value="31">31 - Blastocrithidia Nuclear</option> - <option value="32">32 - Balanophoraceae Plastid</option> - <option value="33">33 - Cephalodiscidae Mitochondrial</option> - </param> - <param name="do_defrag" type="boolean" checked="true" label="Gene family defragmentation" truevalue="" falsevalue="--no_defrag"/> + <expand macro="inputs_nb_of_partitions"/> - + <expand macro="inputs_translation_table"/> + <param name="advanced_pangenome_optional_files" type="select" label="Add the following pangenome output files in the Galaxy history" multiple="true" optional="true" display="checkboxes" > <!-- Basic files --> <option value="output_gene_presence_absence" selected="true">Gene presence absence</option> @@ -252,7 +209,7 @@ <filter>advanced_pangenome_optional_files and "output_gene_presence_absence" in advanced_pangenome_optional_files</filter> </data> - + <data name="ppanggolin_info" format="txt" label="PPanGGOLiN all on ${on_string}: PPanGGOLiN info" /> <data name="regions_of_genomic_plasticity" format="tsv" label="PPanGGOLiN all on ${on_string}: Regions of genomic plasticity" /> <data name="pangenome_h5" format="h5" label="PPanGGOLiN all on ${on_string}: PanGenome HDF5 file" /> <data name="genomes_statistics" format="tsv" label="PPanGGOLiN all on ${on_string}: Genome statistics" /> @@ -260,7 +217,7 @@ </outputs> <tests> - <test expect_num_outputs="21"> + <test expect_num_outputs="22"> <param name="nb_of_partitions" value="3"/> <param name="coverage" value="0.8"/> <param name="identity" value="0.8"/> @@ -277,8 +234,18 @@ <has_text text="region" /> </assert_contents> </output> + <output name="ppanggolin_info" > + <assert_contents> + <has_text text="Content:" /> + </assert_contents> + </output> + <output name="Ushaped_plot" > + <assert_contents> + <has_text text="html" /> + </assert_contents> + </output> </test> - <test expect_num_outputs="21"> + <test expect_num_outputs="22"> <param name="nb_of_partitions" value="3"/> <param name="coverage" value="0.8"/> <param name="identity" value="0.8"/> @@ -295,6 +262,11 @@ <has_text text="region" /> </assert_contents> </output> + <output name="ppanggolin_info" > + <assert_contents> + <has_text text="Content:" /> + </assert_contents> + </output> </test> <test expect_failure="true"> <param name="nb_of_partitions" value="3"/>