changeset 11:5bec47dfe99a draft

planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 544ef29cd524d03b10fdc60769d5d0f2a93d3a67
author richard-burhans
date Thu, 14 Nov 2024 15:28:04 +0000
parents 7fa335454224
children 649483462da3
files macros.xml ncbi_egapx.xml test-data/input.yaml
diffstat 3 files changed, 80 insertions(+), 64 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Mon Oct 14 19:01:46 2024 +0000
+++ b/macros.xml	Thu Nov 14 15:28:04 2024 +0000
@@ -1,11 +1,11 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <container type="docker">quay.io/richard-burhans/egapx:@TOOL_VERSION@</container>
+            <container type="docker">quay.io/galaxy/egapx:@TOOL_VERSION@</container>
         </requirements>
     </xml>
-    <token name="@TOOL_VERSION@">0.2-alpha</token>
-    <token name="@VERSION_SUFFIX@">4</token>
+    <token name="@TOOL_VERSION@">0.3.0-alpha</token>
+    <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">22.05</token>
     <xml name="edam_ontology">
         <edam_operations>
@@ -14,7 +14,22 @@
     </xml>
     <xml name="citations">
         <citations>
-            <citation type="doi">10.1093/bioinformatics/bts573</citation>
+            <citation type="bibtex">
+                @misc{githubegapx,
+                    author = {NCBI},
+                    year = "2024",
+                    title = {Eukaryotic Genome Annotation Pipeline - External (EGAPx)},
+                    publisher = {GitHub},
+                    journal = {GitHub repository},
+                    url = {https://github.com/ncbi/egapxm
+                }
+            </citation>
         </citations>
     </xml>
+    <xml name="creators">
+        <creator>
+            <person givenName="Ross" familyName="Lazarus" url="https://orcid.org/0000-0003-3939-1961"/>
+            <person givenName="Richard" familyName="Burhans" url="https://science.psu.edu/bmb/people/rcb112"/>
+        </creator>
+    </xml>
 </macros>
--- a/ncbi_egapx.xml	Mon Oct 14 19:01:46 2024 +0000
+++ b/ncbi_egapx.xml	Thu Nov 14 15:28:04 2024 +0000
@@ -6,49 +6,51 @@
     <expand macro="edam_ontology"/>
     <expand macro="requirements"/>
     <command detect_errors="aggressive"><![CDATA[
-    #if str($cond_input_style.input_style) == "fillform":
-        #set yamlconfig = "egapx.yaml"
-        echo '# yaml generated by ncbi_egapx.xml' > '$yamlconfig' &&
-        #if str($reference_genome.genome_type_select) == "history"
-            echo 'genome: $reference_genome.genome' >> '$yamlconfig' &&
-        #elif str($reference_genome.genome_type_select) == "indexed":
-            echo 'genome: $reference_genome.genome.fields.path' >> '$yamlconfig' &&
-        #else:
-            echo 'genome: $reference_genome.uri' >> '$yamlconfig' &&
-        #end if
-        echo 'taxid: $taxid' >> '$yamlconfig' &&
-        echo 'reads:' >> '$yamlconfig' &&
-        #if str($condrnaseq.rna_type_select) == "list":
-            #set rs = $rnaseq.split()
-            #set rsplit = [x.strip() for x in $rs]
-            #for $r in $rsplit:
-                echo '  - $r'  >> '$yamlconfig' &&
-            #end for
-        #else:
-            #for $r in $rnaseq:
-                echo '  - $r'  >> '$yamlconfig' &&
-            #end for
-        #end if
-        #if $proteins:
-            echo 'proteins: $proteins' >> '$yamlconfig' &&
-        #end if
-        #if len($xtra.strip()) > 0:
-            #set lxtra = $xtra.split("\n")
-            #for row in $lxtra:
-                echo '$row' >> '$yamlconfig' &&
-            #end for
-        #end if
-        echo '' >> '$yamlconfig' &&
-        echo "Calculated contents of egapx yaml" &&
-        cat '$yamlconfig' &&
-    #else:
+    #if str($cond_input_style.input_style) == "fillform"
+        #set yamlconfig = $egapx_config
+    #else
         #set yamlconfig = $yamlin
     #end if
+    ## activate the following
+    ##   - nextflow conda environment
+    ##   - EGPAx python virtual environment
     source /galaxy/env.bash &&
-    echo \${PATH} &&
+    ## use the augmented container EGAPx config
     ln -s /galaxy/egapx/egapx_config &&
+    ## run EGAPx
     python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out'
     ]]></command>
+    <configfiles>
+        <configfile name="egapx_config"><![CDATA[
+#if str($cond_input_style.input_style) == "fillform"
+# yaml generated by ncbi_egapx.xml
+    #if str($reference_genome.genome_type_select) == "history"
+        #set genome_value = $reference_genome.genome
+    #elif str($reference_genome.genome_type_select) == "indexed"
+        #set genome_value = $reference_genome.genome.fields.path
+    #else
+        #set genome_value = $reference_genome.uri
+    #end if
+genome: $genome_value
+taxid: $taxid
+    #if str($condrnaseq.rna_type_select) == "list"
+        #set $reads_values = $rnaseq.split()
+    #else
+        #set $reads_values = $rnaseq
+    #end if
+reads:
+    #for r in [x.strip() for x in $reads_values]
+  - $r
+    #end for
+    #if str($proteins) != "None"
+proteins: $proteins
+    #end if
+    #for row in $xtra.strip().split("\n")
+$row
+    #end for
+#end if
+        ]]></configfile>
+    </configfiles>
     <inputs>
         <conditional name="cond_input_style">
             <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?"
@@ -59,13 +61,13 @@
             <when value="fillform">
                 <conditional name="reference_genome">
                     <param name="genome_type_select" type="select" label="Reference genome source for mapping supplied RNA-seq reads"
-                        help="Select a built in, history or remote URI for the reference genome fasta">
-                        <option value="history" selected="True">Use a genome fasta file from the current history</option>
+                        help="Select a built in, history or remote URI for the reference genome FASTA">
+                        <option value="history" selected="True">Use a genome FASTA file from the current history</option>
                         <option value="indexed">Use a Galaxy server built-in genome</option>
-                        <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference fasta file</option>
+                        <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference FASTA file</option>
                     </param>
                     <when value="history">
-                        <param name="genome" type="data" format="fasta" label="Select the reference genome fasta from the current history"/>
+                        <param name="genome" type="data" format="fasta" label="Select the reference genome FASTA from the current history"/>
                     </when>
                     <when value="indexed">
                         <param name="genome" type="select" label="Select a built in reference genome or custom genome"
@@ -76,19 +78,19 @@
                         </param>
                     </when>
                     <when value="uri">
-                        <param name="uri" type="text" label="URI pointing to the reference genome fasta file"/>
+                        <param name="uri" type="text" label="URI pointing to the reference genome FASTA file"/>
                     </when>
                 </conditional>
-                <param name="taxid" type="text" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/>
+                <param name="taxid" type="integer" min="0" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/>
                 <conditional name="condrnaseq">
                     <param name="rna_type_select" type="select" label="RNA sequence data source"
                         help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI">
-                        <option value="list" selected="True">Type in a list of SRA identifiers and/or remote RNA-seq fasta URI</option>
+                        <option value="list" selected="True">Type in a list of SRA identifiers and/or remote RNA-seq FASTA URI</option>
                         <option value="history">Select one or more RNA-seq fastq datasets from the current history</option>
                     </param>
                     <when value="list">
                         <param name="rnaseq" type="text" area="true" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines"
-                            help="Either a working URI for a RNA-seq fasta, or a bare SRA identifier will work - can be mixed">
+                            help="Either a working URI for a RNA-seq FASTA, or a bare SRA identifier will work - can be mixed">
                             <validator type="empty_field"/>
                         </param>
                     </when>
@@ -97,7 +99,7 @@
                             help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/>
                     </when>
                 </conditional>
-                <param name="proteins" type="data" format="fasta,tasta.gz" optional="true" label="Select a protein set"/>
+                <param name="proteins" type="data" format="fasta,fasta.gz" optional="true" label="Select a protein set"/>
                 <param name="xtra" type="text" area="true" label="Additional yaml to append to the egapx.yaml configuration"
                     help="Not normally needed but useful for testing additional configuration elements">
                     <sanitizer invalid_char="">
@@ -106,7 +108,7 @@
                 </param>
             </when>
             <when value="history">
-                <param name="yamlin" type="data" format="yaml,txt" label="egapx configuration yaml file to pass to Nextflow"/>
+                <param name="yamlin" type="data" format="yaml" label="egapx configuration yaml file to pass to Nextflow"/>
             </when>
         </conditional>
     </inputs>
@@ -122,8 +124,13 @@
     </outputs>
     <tests>
         <test expect_test_failure="true">
-            <param name="input_style" value="history"/>
-            <param name="yamlin" value="input.yaml"/>
+            <param name="input_style" value="fillform"/>
+            <param name="taxid" value="6954"/>
+            <param name="genome_type_select" value="uri"/>
+            <param name="uri" value="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz"/>
+            <param name="rna_type_select" value="list"/>
+            <param name="rnaseq" value="https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2"/>
+            <param name="xtra" value="hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params&#10;tasks:&#10;  star_wnode:&#10;    star_wnode: -cpus-per-worker 4"/>
             <output name="output"><assert_contents><has_size min="1"/></assert_contents></output>
             <output_collection name="nextflow_stats" type="list">
                 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element>
@@ -134,13 +141,8 @@
             </output_collection>
         </test>
         <test expect_test_failure="true">
-            <param name="input_style" value="fillform"/>
-            <param name="taxid" value="6954"/>
-            <param name="genome_type_select" value="uri"/>
-            <param name="uri" value="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz"/>
-            <param name="rna_type_select" value="list"/>
-            <param name="rnaseq" value="https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2"/>
-            <param name="xtra" value="proteins: []&#10;hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params&#10;tasks:&#10;  star_wnode:&#10;    star_wnode: -cpus-per-worker 4"/>
+            <param name="input_style" value="history"/>
+            <param name="yamlin" value="input.yaml"/>
             <output name="output"><assert_contents><has_size min="1"/></assert_contents></output>
             <output_collection name="nextflow_stats" type="list">
                 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element>
@@ -242,7 +244,7 @@
 
 EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/).
 
-EGAPx takes an assembly fasta file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models.
+EGAPx takes an assembly FASTA file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models.
 In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file.
 
 **Security Notice:**
@@ -310,4 +312,5 @@
  2024-03-27 11:20:24   17127134 aligns.paf
     ]]></help>
     <expand macro="citations"/>
+    <expand macro="creators"/>
 </tool>
--- a/test-data/input.yaml	Mon Oct 14 19:01:46 2024 +0000
+++ b/test-data/input.yaml	Thu Nov 14 15:28:04 2024 +0000
@@ -3,15 +3,13 @@
 # To limit the requirements you also need to use -e docker_minimal
 
 genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz
+taxid: 6954
 reads:
   - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1
   - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2
   - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1
   - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2
-taxid: 6954
-proteins: []
 hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params
 tasks:
   star_wnode:
     star_wnode: -cpus-per-worker 4
-