Mercurial > repos > richard-burhans > ncbi_egapx
changeset 11:5bec47dfe99a draft
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 544ef29cd524d03b10fdc60769d5d0f2a93d3a67
author | richard-burhans |
---|---|
date | Thu, 14 Nov 2024 15:28:04 +0000 |
parents | 7fa335454224 |
children | 649483462da3 |
files | macros.xml ncbi_egapx.xml test-data/input.yaml |
diffstat | 3 files changed, 80 insertions(+), 64 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Mon Oct 14 19:01:46 2024 +0000 +++ b/macros.xml Thu Nov 14 15:28:04 2024 +0000 @@ -1,11 +1,11 @@ <macros> <xml name="requirements"> <requirements> - <container type="docker">quay.io/richard-burhans/egapx:@TOOL_VERSION@</container> + <container type="docker">quay.io/galaxy/egapx:@TOOL_VERSION@</container> </requirements> </xml> - <token name="@TOOL_VERSION@">0.2-alpha</token> - <token name="@VERSION_SUFFIX@">4</token> + <token name="@TOOL_VERSION@">0.3.0-alpha</token> + <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">22.05</token> <xml name="edam_ontology"> <edam_operations> @@ -14,7 +14,22 @@ </xml> <xml name="citations"> <citations> - <citation type="doi">10.1093/bioinformatics/bts573</citation> + <citation type="bibtex"> + @misc{githubegapx, + author = {NCBI}, + year = "2024", + title = {Eukaryotic Genome Annotation Pipeline - External (EGAPx)}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/ncbi/egapxm + } + </citation> </citations> </xml> + <xml name="creators"> + <creator> + <person givenName="Ross" familyName="Lazarus" url="https://orcid.org/0000-0003-3939-1961"/> + <person givenName="Richard" familyName="Burhans" url="https://science.psu.edu/bmb/people/rcb112"/> + </creator> + </xml> </macros>
--- a/ncbi_egapx.xml Mon Oct 14 19:01:46 2024 +0000 +++ b/ncbi_egapx.xml Thu Nov 14 15:28:04 2024 +0000 @@ -6,49 +6,51 @@ <expand macro="edam_ontology"/> <expand macro="requirements"/> <command detect_errors="aggressive"><![CDATA[ - #if str($cond_input_style.input_style) == "fillform": - #set yamlconfig = "egapx.yaml" - echo '# yaml generated by ncbi_egapx.xml' > '$yamlconfig' && - #if str($reference_genome.genome_type_select) == "history" - echo 'genome: $reference_genome.genome' >> '$yamlconfig' && - #elif str($reference_genome.genome_type_select) == "indexed": - echo 'genome: $reference_genome.genome.fields.path' >> '$yamlconfig' && - #else: - echo 'genome: $reference_genome.uri' >> '$yamlconfig' && - #end if - echo 'taxid: $taxid' >> '$yamlconfig' && - echo 'reads:' >> '$yamlconfig' && - #if str($condrnaseq.rna_type_select) == "list": - #set rs = $rnaseq.split() - #set rsplit = [x.strip() for x in $rs] - #for $r in $rsplit: - echo ' - $r' >> '$yamlconfig' && - #end for - #else: - #for $r in $rnaseq: - echo ' - $r' >> '$yamlconfig' && - #end for - #end if - #if $proteins: - echo 'proteins: $proteins' >> '$yamlconfig' && - #end if - #if len($xtra.strip()) > 0: - #set lxtra = $xtra.split("\n") - #for row in $lxtra: - echo '$row' >> '$yamlconfig' && - #end for - #end if - echo '' >> '$yamlconfig' && - echo "Calculated contents of egapx yaml" && - cat '$yamlconfig' && - #else: + #if str($cond_input_style.input_style) == "fillform" + #set yamlconfig = $egapx_config + #else #set yamlconfig = $yamlin #end if + ## activate the following + ## - nextflow conda environment + ## - EGPAx python virtual environment source /galaxy/env.bash && - echo \${PATH} && + ## use the augmented container EGAPx config ln -s /galaxy/egapx/egapx_config && + ## run EGAPx python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out' ]]></command> + <configfiles> + <configfile name="egapx_config"><![CDATA[ +#if str($cond_input_style.input_style) == "fillform" +# yaml generated by ncbi_egapx.xml + #if str($reference_genome.genome_type_select) == "history" + #set genome_value = $reference_genome.genome + #elif str($reference_genome.genome_type_select) == "indexed" + #set genome_value = $reference_genome.genome.fields.path + #else + #set genome_value = $reference_genome.uri + #end if +genome: $genome_value +taxid: $taxid + #if str($condrnaseq.rna_type_select) == "list" + #set $reads_values = $rnaseq.split() + #else + #set $reads_values = $rnaseq + #end if +reads: + #for r in [x.strip() for x in $reads_values] + - $r + #end for + #if str($proteins) != "None" +proteins: $proteins + #end if + #for row in $xtra.strip().split("\n") +$row + #end for +#end if + ]]></configfile> + </configfiles> <inputs> <conditional name="cond_input_style"> <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?" @@ -59,13 +61,13 @@ <when value="fillform"> <conditional name="reference_genome"> <param name="genome_type_select" type="select" label="Reference genome source for mapping supplied RNA-seq reads" - help="Select a built in, history or remote URI for the reference genome fasta"> - <option value="history" selected="True">Use a genome fasta file from the current history</option> + help="Select a built in, history or remote URI for the reference genome FASTA"> + <option value="history" selected="True">Use a genome FASTA file from the current history</option> <option value="indexed">Use a Galaxy server built-in genome</option> - <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference fasta file</option> + <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference FASTA file</option> </param> <when value="history"> - <param name="genome" type="data" format="fasta" label="Select the reference genome fasta from the current history"/> + <param name="genome" type="data" format="fasta" label="Select the reference genome FASTA from the current history"/> </when> <when value="indexed"> <param name="genome" type="select" label="Select a built in reference genome or custom genome" @@ -76,19 +78,19 @@ </param> </when> <when value="uri"> - <param name="uri" type="text" label="URI pointing to the reference genome fasta file"/> + <param name="uri" type="text" label="URI pointing to the reference genome FASTA file"/> </when> </conditional> - <param name="taxid" type="text" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/> + <param name="taxid" type="integer" min="0" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/> <conditional name="condrnaseq"> <param name="rna_type_select" type="select" label="RNA sequence data source" help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI"> - <option value="list" selected="True">Type in a list of SRA identifiers and/or remote RNA-seq fasta URI</option> + <option value="list" selected="True">Type in a list of SRA identifiers and/or remote RNA-seq FASTA URI</option> <option value="history">Select one or more RNA-seq fastq datasets from the current history</option> </param> <when value="list"> <param name="rnaseq" type="text" area="true" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines" - help="Either a working URI for a RNA-seq fasta, or a bare SRA identifier will work - can be mixed"> + help="Either a working URI for a RNA-seq FASTA, or a bare SRA identifier will work - can be mixed"> <validator type="empty_field"/> </param> </when> @@ -97,7 +99,7 @@ help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/> </when> </conditional> - <param name="proteins" type="data" format="fasta,tasta.gz" optional="true" label="Select a protein set"/> + <param name="proteins" type="data" format="fasta,fasta.gz" optional="true" label="Select a protein set"/> <param name="xtra" type="text" area="true" label="Additional yaml to append to the egapx.yaml configuration" help="Not normally needed but useful for testing additional configuration elements"> <sanitizer invalid_char=""> @@ -106,7 +108,7 @@ </param> </when> <when value="history"> - <param name="yamlin" type="data" format="yaml,txt" label="egapx configuration yaml file to pass to Nextflow"/> + <param name="yamlin" type="data" format="yaml" label="egapx configuration yaml file to pass to Nextflow"/> </when> </conditional> </inputs> @@ -122,8 +124,13 @@ </outputs> <tests> <test expect_test_failure="true"> - <param name="input_style" value="history"/> - <param name="yamlin" value="input.yaml"/> + <param name="input_style" value="fillform"/> + <param name="taxid" value="6954"/> + <param name="genome_type_select" value="uri"/> + <param name="uri" value="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz"/> + <param name="rna_type_select" value="list"/> + <param name="rnaseq" value="https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2"/> + <param name="xtra" value="hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params tasks: star_wnode: star_wnode: -cpus-per-worker 4"/> <output name="output"><assert_contents><has_size min="1"/></assert_contents></output> <output_collection name="nextflow_stats" type="list"> <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element> @@ -134,13 +141,8 @@ </output_collection> </test> <test expect_test_failure="true"> - <param name="input_style" value="fillform"/> - <param name="taxid" value="6954"/> - <param name="genome_type_select" value="uri"/> - <param name="uri" value="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz"/> - <param name="rna_type_select" value="list"/> - <param name="rnaseq" value="https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2"/> - <param name="xtra" value="proteins: [] hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params tasks: star_wnode: star_wnode: -cpus-per-worker 4"/> + <param name="input_style" value="history"/> + <param name="yamlin" value="input.yaml"/> <output name="output"><assert_contents><has_size min="1"/></assert_contents></output> <output_collection name="nextflow_stats" type="list"> <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element> @@ -242,7 +244,7 @@ EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/). -EGAPx takes an assembly fasta file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. +EGAPx takes an assembly FASTA file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file. **Security Notice:** @@ -310,4 +312,5 @@ 2024-03-27 11:20:24 17127134 aligns.paf ]]></help> <expand macro="citations"/> + <expand macro="creators"/> </tool>
--- a/test-data/input.yaml Mon Oct 14 19:01:46 2024 +0000 +++ b/test-data/input.yaml Thu Nov 14 15:28:04 2024 +0000 @@ -3,15 +3,13 @@ # To limit the requirements you also need to use -e docker_minimal genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz +taxid: 6954 reads: - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2 -taxid: 6954 -proteins: [] hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params tasks: star_wnode: star_wnode: -cpus-per-worker 4 -