Mercurial > repos > iuc > ppanggolin_all
changeset 0:6fdee3720de7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ppanggolin commit f51871dfce652ad0bbb9d4967d02338d03b9e2a3
author | iuc |
---|---|
date | Wed, 22 Jan 2025 10:34:50 +0000 |
parents | |
children | a83148dac159 |
files | macros.xml ppanggolin_all.xml test-data/fasta/AP028611_984801_1194801.fasta.gz test-data/fasta/CP107038_1022972_1232972.fasta.gz test-data/fasta/CP113115_1290693_1440693.fasta.gz test-data/fasta/LN831051_1254175_1464175.fasta.gz test-data/fasta/NC_012467_959209_1169209.fasta.gz test-data/genbank/AP028611_984801_1194801.gb.gz test-data/genbank/CP107038_1022972_1232972.gb.gz test-data/genbank/CP113115_1290693_1440693.gb.gz test-data/genbank/LN831051_1254175_1464175.gb.gz test-data/genbank/NC_012467_959209_1169209.gb.gz test-data/h5/test_data.h5 |
diffstat | 13 files changed, 343 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Jan 22 10:34:50 2025 +0000 @@ -0,0 +1,19 @@ +<macros> + <token name="@TOOL_VERSION@">2.2.1</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="citation"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1007732</citation> + </citations> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">ppanggolin</requirement> + </requirements> + </xml> + <xml name="xrefs"> + <xrefs> + <xref type="bio.tools">ppanggolin</xref> + </xrefs> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ppanggolin_all.xml Wed Jan 22 10:34:50 2025 +0000 @@ -0,0 +1,324 @@ +<tool id="ppanggolin_all" name="PPanGGOLiN all" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0"> + <description>generates a partitioned pangenome</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xrefs"/> + <expand macro="requirements"/> + + <command detect_errors="exit_code"><![CDATA[ + + mkdir -p "./tmp_ppanggolin/all" && + mkdir -p "./tmp_ppanggolin/organism_list" && + mkdir -p "./tmp_ppanggolin/ln_input_genomes" && + + touch "./tmp_ppanggolin/organism_list/organism.list" && + + #set extension_input_files = "" + #for $counter_input_files, $file in enumerate($genomes): + #if $counter_input_files == 0: + #set extension_input_files = $file.ext + #else: + #if $file.ext != $extension_input_files: + #raise Exception("All the genome files must be of the same datatype, either all genbank files or all fasta files.") + #end if + #end if + + #set base_name = str($file.element_identifier).replace(" ", "_") + echo -e '${base_name}\t${file}' >> "./tmp_ppanggolin/organism_list/organism.list" && + + #end for + + ppanggolin all + + #if $extension_input_files == "fasta": + --fasta + #elif $extension_input_files == "genbank": + --anno + #end if + "./tmp_ppanggolin/organism_list/organism.list" + + -o ./tmp_ppanggolin/all + --force + --cpu "\${GALAXY_SLOTS:-4}" + + --coverage $coverage + --identity $identity + --translation_table $translation_table + + + #if str($nb_of_partitions) != "": + --nb_of_partitions $nb_of_partitions + #end if + + $do_defrag + + + #if "output_functional_modules" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/functional_modules.tsv > '${functional_modules}' + #end if + #if "output_modules_RGP_lists" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/modules_RGP_lists.tsv > '${modules_RGP_lists}' + #end if + #if "output_modules_spots" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/modules_spots.tsv > '${modules_spots}' + #end if + #if "output_modules_in_genomes" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/modules_in_genomes.tsv > '${modules_in_genomes}' + #end if + #if "output_modules_summary" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/modules_summary.tsv > '${modules_summary}' + #end if + #if "output_spot_borders" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/spot_borders.tsv > '${spot_borders}' + #end if + #if "output_spots" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/spots.tsv > '${spots}' + #end if + #if "output_summarize_spots" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/summarize_spots.tsv > '${summarize_spots}' + #end if + #if "output_border_protein_genes" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/border_protein_genes.fasta > '${border_protein_genes}' + #end if + + #if "output_tile_plot" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/tile_plot.html > '${tile_plot}' + #end if + #if "output_ushaped_plot" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/Ushaped_plot.html > '${Ushaped_plot}' + #end if + + #if "output_pangenomeGraph_json" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/pangenomeGraph.json > '${pangenomeGraph_json}' + #end if + #if "output_pangenomeGraph_gexf" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/pangenomeGraph.gexf > '${pangenomeGraph_gexf}' + #end if + #if "output_pangenomeGraph_light_gexf" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/pangenomeGraph_light.gexf > '${pangenomeGraph_light_gexf}' + #end if + + #if "output_matrix" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/matrix.csv > '${matrix}' + #end if + #if "output_mean_persistent_duplication" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/mean_persistent_duplication.tsv > '${mean_persistent_duplication}' + #end if + #if "output_gene_families" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/gene_families.tsv > '${gene_families}' + #end if + #if "output_gene_presence_absence" in $advanced_pangenome_optional_files: + && cat ./tmp_ppanggolin/all/gene_presence_absence.Rtab > '${gene_presence_absence}' + #end if + + + && cat ./tmp_ppanggolin/all/regions_of_genomic_plasticity.tsv > '${regions_of_genomic_plasticity}' + && cat ./tmp_ppanggolin/all/pangenome.h5 > '${pangenome_h5}' + && cat ./tmp_ppanggolin/all/genomes_statistics.tsv > '${genomes_statistics}' + + + ]]></command> + + <inputs> + + <param name="genomes" type="data" multiple="true" min="2" label="Select genome files" format="fasta,genbank" help="All the genome files must be of similar format, either all genbank files or all fasta files. Processing of at least 15 genomes files is recommended, a minimum of 2 genomes files is mandatory. Space is not allowed in filename."> + </param> + + <param argument="--identity" type="float" value="0.8" min="0" max="1" label="Minimum alignment identity"/> + <param argument="--coverage" type="float" value="0.8" min="0" max="1" label="Minimum alignment coverage"/> + + <param argument="--nb_of_partitions" type="integer" max="20" label="Number of classes used to partition the pangenome" optional="true" help="If empty value (default), it will be automatically determined. Otherwise, the value needs to be between 2 and 20 and it is advised to set a value of 3. See the link for the documentation in the help section for more details."> + <validator type="expression" message="Value needs to be empty or an integer between 2 and 20">value == "" or int(value) >= 2</validator> + </param> + + <param argument="--translation_table" type="select" label="Translation table"> + <option value="1">1 - Standard Code</option> + <option value="2">2 - Vertebrate Mitochondrial</option> + <option value="3">3 - Yeast Mitochondrial</option> + <option value="4">4 - Mold, Protozoan, and Coelenterate Mitochondrial</option> + <option value="5">5 - Invertebrate Mitochondrial</option> + <option value="6">6 - Ciliate Nuclear</option> + <option value="9">9 - Echinoderm Mitochondrial</option> + <option value="10">10 - Euplotid Nuclear</option> + <option value="11">11 - Bacterial and Plant Plastid</option> + <option value="12">12 - Alternative Yeast Nuclear</option> + <option value="13">13 - Ascidian Mitochondrial</option> + <option value="14">14 - Flatworm Mitochondrial</option> + <option value="15">15 - Blepharisma Nuclear</option> + <option value="16">16 - Chlorophycean Mitochondrial</option> + <option value="21">21 - Trematode Mitochondrial</option> + <option value="22">22 - Scenedesmus obliquus Mitochondrial</option> + <option value="23">23 - Thraustochytrium Mitochondrial</option> + <option value="24">24 - Pterobranchia Mitochondrial</option> + <option value="25">25 - Candidate Division SR1 and Gracilibacteria</option> + <option value="26">26 - Pachysolen tannophilus Nuclear</option> + <option value="27">27 - Karyorelict Nuclear</option> + <option value="28">28 - Condylostoma Nuclear</option> + <option value="29">29 - Mesodinium Nuclear</option> + <option value="30">30 - Peritrich Nuclear</option> + <option value="31">31 - Blastocrithidia Nuclear</option> + <option value="32">32 - Balanophoraceae Plastid</option> + <option value="33">33 - Cephalodiscidae Mitochondrial</option> + </param> + <param name="do_defrag" type="boolean" checked="true" label="Gene family defragmentation" truevalue="" falsevalue="--no_defrag"/> + + + <param name="advanced_pangenome_optional_files" type="select" label="Add the following pangenome output files in the Galaxy history" multiple="true" optional="true" display="checkboxes" > + <!-- Basic files --> + <option value="output_gene_presence_absence" selected="true">Gene presence absence</option> + <option value="output_gene_families" selected="true">Gene families</option> + <option value="output_mean_persistent_duplication" selected="true">Mean persistent duplication</option> + <option value="output_matrix" selected="true">Matrix</option> + <!-- plot files --> + <option value="output_Ushaped_plot" selected="true">Ushaped plot</option> + <option value="output_tile_plot" selected="true">Tile plot</option> + <!-- graph files --> + <option value="output_pangenomeGraph_light_gexf" selected="true">PanGenome Graph Light (Gexf)</option> + <option value="output_pangenomeGraph_gexf" selected="true">PanGenome Graph (Gexf)</option> + <option value="output_pangenomeGraph_json" selected="true">PanGenome Graph (Json)</option> + <!-- advanced files --> + <option value="output_summarize_spots" selected="true">Summarize spots</option> + <option value="output_spots" selected="true">Spots</option> + <option value="output_spot_borders" selected="true">Spot borders</option> + <option value="output_border_protein_genes" selected="true">Border protein genes</option> + <option value="output_modules_summary" selected="true">Modules summary</option> + <option value="output_modules_in_genomes" selected="true">Modules in genomes</option> + <option value="output_modules_spots" selected="true">Modules spots</option> + <option value="output_modules_RGP_lists" selected="true">Modules RGP lists</option> + <option value="output_functional_modules" selected="true">Functional modules</option> + </param> + + </inputs> + + <outputs> + + <data name="functional_modules" format="tsv" label="PPanGGOLiN all on ${on_string}: Functional modules" > + <filter>advanced_pangenome_optional_files and "output_functional_modules" in advanced_pangenome_optional_files</filter> + </data> + <data name="modules_RGP_lists" format="tsv" label="PPanGGOLiN all on ${on_string}: Modules RGP lists" > + <filter>advanced_pangenome_optional_files and "output_modules_RGP_lists" in advanced_pangenome_optional_files</filter> + </data> + <data name="modules_spots" format="tsv" label="PPanGGOLiN all on ${on_string}: Modules spots" > + <filter>advanced_pangenome_optional_files and "output_modules_spots" in advanced_pangenome_optional_files</filter> + </data> + <data name="modules_in_genomes" format="tsv" label="PPanGGOLiN all on ${on_string}: Modules in genomes" > + <filter>advanced_pangenome_optional_files and "output_modules_in_genomes" in advanced_pangenome_optional_files</filter> + </data> + <data name="modules_summary" format="tsv" label="PPanGGOLiN all on ${on_string}: Modules summary" > + <filter>advanced_pangenome_optional_files and "output_modules_summary" in advanced_pangenome_optional_files</filter> + </data> + <data name="border_protein_genes" format="fasta" label="PPanGGOLiN all on ${on_string}: Border protein genes" > + <filter>advanced_pangenome_optional_files and "output_border_protein_genes" in advanced_pangenome_optional_files</filter> + </data> + <data name="spot_borders" format="tsv" label="PPanGGOLiN all on ${on_string}: Spot borders" > + <filter>advanced_pangenome_optional_files and "output_spot_borders" in advanced_pangenome_optional_files</filter> + </data> + <data name="spots" format="tsv" label="PPanGGOLiN all on ${on_string}: Spots" > + <filter>advanced_pangenome_optional_files and "output_spots" in advanced_pangenome_optional_files</filter> + </data> + <data name="summarize_spots" format="tsv" label="PPanGGOLiN all on ${on_string}: Summarized spots" > + <filter>advanced_pangenome_optional_files and "output_summarize_spots" in advanced_pangenome_optional_files</filter> + </data> + + <data name="pangenomeGraph_json" format="json" label="PPanGGOLiN all on ${on_string}: PanGenome Graph (JSON)" > + <filter>advanced_pangenome_optional_files and "output_pangenomeGraph_json" in advanced_pangenome_optional_files</filter> + </data> + <data name="pangenomeGraph_gexf" format="xml" label="PPanGGOLiN all on ${on_string}: PanGenome Graph (GEXF)" > + <filter>advanced_pangenome_optional_files and "output_pangenomeGraph_gexf" in advanced_pangenome_optional_files</filter> + </data> + <data name="pangenomeGraph_light_gexf" format="xml" label="PPanGGOLiN all on ${on_string}: PanGenome Graph Light (GEXF)" > + <filter>advanced_pangenome_optional_files and "output_pangenomeGraph_light_gexf" in advanced_pangenome_optional_files</filter> + </data> + + + <data name="tile_plot" format="html" label="PPanGGOLiN all on ${on_string}: Tile plot" > + <filter>advanced_pangenome_optional_files and "output_tile_plot" in advanced_pangenome_optional_files</filter> + </data> + <data name="Ushaped_plot" format="html" label="PPanGGOLiN all on ${on_string}: U-shaped plot" > + <filter>advanced_pangenome_optional_files and "output_Ushaped_plot" in advanced_pangenome_optional_files</filter> + </data> + + <data name="matrix" format="csv" label="PPanGGOLiN all on ${on_string}: Matrix" > + <filter>advanced_pangenome_optional_files and "output_matrix" in advanced_pangenome_optional_files</filter> + </data> + <data name="mean_persistent_duplication" format="tsv" label="PPanGGOLiN all on ${on_string}: Mean persistent duplication" > + <filter>advanced_pangenome_optional_files and "output_mean_persistent_duplication" in advanced_pangenome_optional_files</filter> + </data> + <data name="gene_families" format="tsv" label="PPanGGOLiN all on ${on_string}: Gene families" > + <filter>advanced_pangenome_optional_files and "output_gene_families" in advanced_pangenome_optional_files</filter> + </data> + <data name="gene_presence_absence" format="tsv" label="PPanGGOLiN all on ${on_string}: Gene presence/absence matrix" > + <filter>advanced_pangenome_optional_files and "output_gene_presence_absence" in advanced_pangenome_optional_files</filter> + </data> + + + <data name="regions_of_genomic_plasticity" format="tsv" label="PPanGGOLiN all on ${on_string}: Regions of genomic plasticity" /> + <data name="pangenome_h5" format="h5" label="PPanGGOLiN all on ${on_string}: PanGenome HDF5 file" /> + <data name="genomes_statistics" format="tsv" label="PPanGGOLiN all on ${on_string}: Genome statistics" /> + + </outputs> + + <tests> + <test expect_num_outputs="21"> + <param name="nb_of_partitions" value="3"/> + <param name="coverage" value="0.8"/> + <param name="identity" value="0.8"/> + <param name="translation_table" value="1"/> + <param name="genomes" value="fasta/AP028611_984801_1194801.fasta.gz,fasta/CP107038_1022972_1232972.fasta.gz,fasta/CP113115_1290693_1440693.fasta.gz,fasta/LN831051_1254175_1464175.fasta.gz,fasta/NC_012467_959209_1169209.fasta.gz" ftype="fasta"/> + <output name="genomes_statistics" > + <assert_contents> + <has_text text="#soft_core=" /> + <has_text text="#duplication_margin=" /> + </assert_contents> + </output> + <output name="regions_of_genomic_plasticity" > + <assert_contents> + <has_text text="region" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="21"> + <param name="nb_of_partitions" value="3"/> + <param name="coverage" value="0.8"/> + <param name="identity" value="0.8"/> + <param name="translation_table" value="1"/> + <param name="genomes" value="genbank/AP028611_984801_1194801.gb.gz,genbank/CP107038_1022972_1232972.gb.gz,genbank/CP113115_1290693_1440693.gb.gz,genbank/LN831051_1254175_1464175.gb.gz,genbank/NC_012467_959209_1169209.gb.gz" ftype="genbank"/> + <output name="genomes_statistics" > + <assert_contents> + <has_text text="#soft_core=" /> + <has_text text="#duplication_margin=" /> + </assert_contents> + </output> + <output name="regions_of_genomic_plasticity" > + <assert_contents> + <has_text text="region" /> + </assert_contents> + </output> + </test> + <test expect_failure="true"> + <param name="nb_of_partitions" value="3"/> + <param name="coverage" value="0.8"/> + <param name="identity" value="0.8"/> + <param name="translation_table" value="1"/> + <param name="genomes" value="genbank/AP028611_984801_1194801.gb.gz,genbank/CP107038_1022972_1232972.gb.gz,genbank/CP113115_1290693_1440693.gb.gz,fasta/AP028611_984801_1194801.fasta.gz,fasta/CP107038_1022972_1232972.fasta.gz,fasta/CP113115_1290693_1440693.fasta.gz" ftype="genbank"/> + </test> + </tests> + + <help><![CDATA[ + + PPanGGOLiN_ (Gautreau et al. 2020) is a software suite used to create and manipulate prokaryotic pangenomes from a set of either assembled + genomic DNA sequences or provided genome annotations. PPanGGOLiN builds pangenomes through a graphical model and a statistical method to partition gene + families in persistent, shell and cloud genomes. It integrates both information on protein-coding genes and their genomic neighborhood to build a graph + of gene families where each node is a gene family, and each edge is a relation of genetic contiguity. + + The `ppanggolin all` command generates a partitioned pangenome graph with predicted RGP, spots and modules. Please see the documentation_ on how parameters can be tuned for this command. + + .. _PPanGGOLiN: https://github.com/labgem/PPanGGOLiN + .. _documentation: https://ppanggolin.readthedocs.io/en/latest/user/QuickUsage/quickAnalyses.html + + ]]></help> + + <expand macro="citation"/> + +</tool>