# HG changeset patch
# User richard-burhans
# Date 1731598084 0
# Node ID 5bec47dfe99ac463405f972d28742e28eaecba3c
# Parent 7fa3354542242b96106c39e01abf19415118695e
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 544ef29cd524d03b10fdc60769d5d0f2a93d3a67
diff -r 7fa335454224 -r 5bec47dfe99a macros.xml
--- a/macros.xml Mon Oct 14 19:01:46 2024 +0000
+++ b/macros.xml Thu Nov 14 15:28:04 2024 +0000
@@ -1,11 +1,11 @@
- quay.io/richard-burhans/egapx:@TOOL_VERSION@
+ quay.io/galaxy/egapx:@TOOL_VERSION@
- 0.2-alpha
- 4
+ 0.3.0-alpha
+ 022.05
@@ -14,7 +14,22 @@
- 10.1093/bioinformatics/bts573
+
+ @misc{githubegapx,
+ author = {NCBI},
+ year = "2024",
+ title = {Eukaryotic Genome Annotation Pipeline - External (EGAPx)},
+ publisher = {GitHub},
+ journal = {GitHub repository},
+ url = {https://github.com/ncbi/egapxm
+ }
+
+
+
+
+
+
+
diff -r 7fa335454224 -r 5bec47dfe99a ncbi_egapx.xml
--- a/ncbi_egapx.xml Mon Oct 14 19:01:46 2024 +0000
+++ b/ncbi_egapx.xml Thu Nov 14 15:28:04 2024 +0000
@@ -6,49 +6,51 @@
'$yamlconfig' &&
- #if str($reference_genome.genome_type_select) == "history"
- echo 'genome: $reference_genome.genome' >> '$yamlconfig' &&
- #elif str($reference_genome.genome_type_select) == "indexed":
- echo 'genome: $reference_genome.genome.fields.path' >> '$yamlconfig' &&
- #else:
- echo 'genome: $reference_genome.uri' >> '$yamlconfig' &&
- #end if
- echo 'taxid: $taxid' >> '$yamlconfig' &&
- echo 'reads:' >> '$yamlconfig' &&
- #if str($condrnaseq.rna_type_select) == "list":
- #set rs = $rnaseq.split()
- #set rsplit = [x.strip() for x in $rs]
- #for $r in $rsplit:
- echo ' - $r' >> '$yamlconfig' &&
- #end for
- #else:
- #for $r in $rnaseq:
- echo ' - $r' >> '$yamlconfig' &&
- #end for
- #end if
- #if $proteins:
- echo 'proteins: $proteins' >> '$yamlconfig' &&
- #end if
- #if len($xtra.strip()) > 0:
- #set lxtra = $xtra.split("\n")
- #for row in $lxtra:
- echo '$row' >> '$yamlconfig' &&
- #end for
- #end if
- echo '' >> '$yamlconfig' &&
- echo "Calculated contents of egapx yaml" &&
- cat '$yamlconfig' &&
- #else:
+ #if str($cond_input_style.input_style) == "fillform"
+ #set yamlconfig = $egapx_config
+ #else
#set yamlconfig = $yamlin
#end if
+ ## activate the following
+ ## - nextflow conda environment
+ ## - EGPAx python virtual environment
source /galaxy/env.bash &&
- echo \${PATH} &&
+ ## use the augmented container EGAPx config
ln -s /galaxy/egapx/egapx_config &&
+ ## run EGAPx
python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out'
]]>
+
+
+
-
+ help="Select a built in, history or remote URI for the reference genome FASTA">
+
-
+
-
+
-
+
-
+
-
+
+ help="Either a working URI for a RNA-seq FASTA, or a bare SRA identifier will work - can be mixed">
@@ -97,7 +99,7 @@
help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/>
-
+
@@ -106,7 +108,7 @@
-
+
@@ -122,8 +124,13 @@
-
-
+
+
+
+
+
+
+
@@ -134,13 +141,8 @@
-
-
-
-
-
-
-
+
+
@@ -242,7 +244,7 @@
EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/).
-EGAPx takes an assembly fasta file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models.
+EGAPx takes an assembly FASTA file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models.
In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file.
**Security Notice:**
@@ -310,4 +312,5 @@
2024-03-27 11:20:24 17127134 aligns.paf
]]>
+
diff -r 7fa335454224 -r 5bec47dfe99a test-data/input.yaml
--- a/test-data/input.yaml Mon Oct 14 19:01:46 2024 +0000
+++ b/test-data/input.yaml Thu Nov 14 15:28:04 2024 +0000
@@ -3,15 +3,13 @@
# To limit the requirements you also need to use -e docker_minimal
genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz
+taxid: 6954
reads:
- https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1
- https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2
- https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1
- https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2
-taxid: 6954
-proteins: []
hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params
tasks:
star_wnode:
star_wnode: -cpus-per-worker 4
-