# HG changeset patch # User richard-burhans # Date 1731598084 0 # Node ID 5bec47dfe99ac463405f972d28742e28eaecba3c # Parent 7fa3354542242b96106c39e01abf19415118695e planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 544ef29cd524d03b10fdc60769d5d0f2a93d3a67 diff -r 7fa335454224 -r 5bec47dfe99a macros.xml --- a/macros.xml Mon Oct 14 19:01:46 2024 +0000 +++ b/macros.xml Thu Nov 14 15:28:04 2024 +0000 @@ -1,11 +1,11 @@ - quay.io/richard-burhans/egapx:@TOOL_VERSION@ + quay.io/galaxy/egapx:@TOOL_VERSION@ - 0.2-alpha - 4 + 0.3.0-alpha + 0 22.05 @@ -14,7 +14,22 @@ - 10.1093/bioinformatics/bts573 + + @misc{githubegapx, + author = {NCBI}, + year = "2024", + title = {Eukaryotic Genome Annotation Pipeline - External (EGAPx)}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/ncbi/egapxm + } + + + + + + + diff -r 7fa335454224 -r 5bec47dfe99a ncbi_egapx.xml --- a/ncbi_egapx.xml Mon Oct 14 19:01:46 2024 +0000 +++ b/ncbi_egapx.xml Thu Nov 14 15:28:04 2024 +0000 @@ -6,49 +6,51 @@ '$yamlconfig' && - #if str($reference_genome.genome_type_select) == "history" - echo 'genome: $reference_genome.genome' >> '$yamlconfig' && - #elif str($reference_genome.genome_type_select) == "indexed": - echo 'genome: $reference_genome.genome.fields.path' >> '$yamlconfig' && - #else: - echo 'genome: $reference_genome.uri' >> '$yamlconfig' && - #end if - echo 'taxid: $taxid' >> '$yamlconfig' && - echo 'reads:' >> '$yamlconfig' && - #if str($condrnaseq.rna_type_select) == "list": - #set rs = $rnaseq.split() - #set rsplit = [x.strip() for x in $rs] - #for $r in $rsplit: - echo ' - $r' >> '$yamlconfig' && - #end for - #else: - #for $r in $rnaseq: - echo ' - $r' >> '$yamlconfig' && - #end for - #end if - #if $proteins: - echo 'proteins: $proteins' >> '$yamlconfig' && - #end if - #if len($xtra.strip()) > 0: - #set lxtra = $xtra.split("\n") - #for row in $lxtra: - echo '$row' >> '$yamlconfig' && - #end for - #end if - echo '' >> '$yamlconfig' && - echo "Calculated contents of egapx yaml" && - cat '$yamlconfig' && - #else: + #if str($cond_input_style.input_style) == "fillform" + #set yamlconfig = $egapx_config + #else #set yamlconfig = $yamlin #end if + ## activate the following + ## - nextflow conda environment + ## - EGPAx python virtual environment source /galaxy/env.bash && - echo \${PATH} && + ## use the augmented container EGAPx config ln -s /galaxy/egapx/egapx_config && + ## run EGAPx python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out' ]]> + + + - + help="Select a built in, history or remote URI for the reference genome FASTA"> + - + - + - + - + - + + help="Either a working URI for a RNA-seq FASTA, or a bare SRA identifier will work - can be mixed"> @@ -97,7 +99,7 @@ help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/> - + @@ -106,7 +108,7 @@ - + @@ -122,8 +124,13 @@ - - + + + + + + + @@ -134,13 +141,8 @@ - - - - - - - + + @@ -242,7 +244,7 @@ EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/). -EGAPx takes an assembly fasta file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. +EGAPx takes an assembly FASTA file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file. **Security Notice:** @@ -310,4 +312,5 @@ 2024-03-27 11:20:24 17127134 aligns.paf ]]> + diff -r 7fa335454224 -r 5bec47dfe99a test-data/input.yaml --- a/test-data/input.yaml Mon Oct 14 19:01:46 2024 +0000 +++ b/test-data/input.yaml Thu Nov 14 15:28:04 2024 +0000 @@ -3,15 +3,13 @@ # To limit the requirements you also need to use -e docker_minimal genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz +taxid: 6954 reads: - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2 -taxid: 6954 -proteins: [] hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params tasks: star_wnode: star_wnode: -cpus-per-worker 4 -