comparison ncbi_egapx.xml @ 11:5bec47dfe99a draft

planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 544ef29cd524d03b10fdc60769d5d0f2a93d3a67
author richard-burhans
date Thu, 14 Nov 2024 15:28:04 +0000
parents 28ab345ebab3
children 649483462da3
comparison
equal deleted inserted replaced
10:7fa335454224 11:5bec47dfe99a
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="edam_ontology"/> 6 <expand macro="edam_ontology"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command detect_errors="aggressive"><![CDATA[ 8 <command detect_errors="aggressive"><![CDATA[
9 #if str($cond_input_style.input_style) == "fillform": 9 #if str($cond_input_style.input_style) == "fillform"
10 #set yamlconfig = "egapx.yaml" 10 #set yamlconfig = $egapx_config
11 echo '# yaml generated by ncbi_egapx.xml' > '$yamlconfig' && 11 #else
12 #if str($reference_genome.genome_type_select) == "history"
13 echo 'genome: $reference_genome.genome' >> '$yamlconfig' &&
14 #elif str($reference_genome.genome_type_select) == "indexed":
15 echo 'genome: $reference_genome.genome.fields.path' >> '$yamlconfig' &&
16 #else:
17 echo 'genome: $reference_genome.uri' >> '$yamlconfig' &&
18 #end if
19 echo 'taxid: $taxid' >> '$yamlconfig' &&
20 echo 'reads:' >> '$yamlconfig' &&
21 #if str($condrnaseq.rna_type_select) == "list":
22 #set rs = $rnaseq.split()
23 #set rsplit = [x.strip() for x in $rs]
24 #for $r in $rsplit:
25 echo ' - $r' >> '$yamlconfig' &&
26 #end for
27 #else:
28 #for $r in $rnaseq:
29 echo ' - $r' >> '$yamlconfig' &&
30 #end for
31 #end if
32 #if $proteins:
33 echo 'proteins: $proteins' >> '$yamlconfig' &&
34 #end if
35 #if len($xtra.strip()) > 0:
36 #set lxtra = $xtra.split("\n")
37 #for row in $lxtra:
38 echo '$row' >> '$yamlconfig' &&
39 #end for
40 #end if
41 echo '' >> '$yamlconfig' &&
42 echo "Calculated contents of egapx yaml" &&
43 cat '$yamlconfig' &&
44 #else:
45 #set yamlconfig = $yamlin 12 #set yamlconfig = $yamlin
46 #end if 13 #end if
14 ## activate the following
15 ## - nextflow conda environment
16 ## - EGPAx python virtual environment
47 source /galaxy/env.bash && 17 source /galaxy/env.bash &&
48 echo \${PATH} && 18 ## use the augmented container EGAPx config
49 ln -s /galaxy/egapx/egapx_config && 19 ln -s /galaxy/egapx/egapx_config &&
20 ## run EGAPx
50 python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out' 21 python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out'
51 ]]></command> 22 ]]></command>
23 <configfiles>
24 <configfile name="egapx_config"><![CDATA[
25 #if str($cond_input_style.input_style) == "fillform"
26 # yaml generated by ncbi_egapx.xml
27 #if str($reference_genome.genome_type_select) == "history"
28 #set genome_value = $reference_genome.genome
29 #elif str($reference_genome.genome_type_select) == "indexed"
30 #set genome_value = $reference_genome.genome.fields.path
31 #else
32 #set genome_value = $reference_genome.uri
33 #end if
34 genome: $genome_value
35 taxid: $taxid
36 #if str($condrnaseq.rna_type_select) == "list"
37 #set $reads_values = $rnaseq.split()
38 #else
39 #set $reads_values = $rnaseq
40 #end if
41 reads:
42 #for r in [x.strip() for x in $reads_values]
43 - $r
44 #end for
45 #if str($proteins) != "None"
46 proteins: $proteins
47 #end if
48 #for row in $xtra.strip().split("\n")
49 $row
50 #end for
51 #end if
52 ]]></configfile>
53 </configfiles>
52 <inputs> 54 <inputs>
53 <conditional name="cond_input_style"> 55 <conditional name="cond_input_style">
54 <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?" 56 <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?"
55 help="Use the tool form to select inputs from the history, or use a pre-prepared yaml file."> 57 help="Use the tool form to select inputs from the history, or use a pre-prepared yaml file.">
56 <option value="fillform" selected="True">Provide configuration details for conversion into a configuration yaml</option> 58 <option value="fillform" selected="True">Provide configuration details for conversion into a configuration yaml</option>
57 <option value="history">Use a pre-prepared yaml egapx configuration</option> 59 <option value="history">Use a pre-prepared yaml egapx configuration</option>
58 </param> 60 </param>
59 <when value="fillform"> 61 <when value="fillform">
60 <conditional name="reference_genome"> 62 <conditional name="reference_genome">
61 <param name="genome_type_select" type="select" label="Reference genome source for mapping supplied RNA-seq reads" 63 <param name="genome_type_select" type="select" label="Reference genome source for mapping supplied RNA-seq reads"
62 help="Select a built in, history or remote URI for the reference genome fasta"> 64 help="Select a built in, history or remote URI for the reference genome FASTA">
63 <option value="history" selected="True">Use a genome fasta file from the current history</option> 65 <option value="history" selected="True">Use a genome FASTA file from the current history</option>
64 <option value="indexed">Use a Galaxy server built-in genome</option> 66 <option value="indexed">Use a Galaxy server built-in genome</option>
65 <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference fasta file</option> 67 <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference FASTA file</option>
66 </param> 68 </param>
67 <when value="history"> 69 <when value="history">
68 <param name="genome" type="data" format="fasta" label="Select the reference genome fasta from the current history"/> 70 <param name="genome" type="data" format="fasta" label="Select the reference genome FASTA from the current history"/>
69 </when> 71 </when>
70 <when value="indexed"> 72 <when value="indexed">
71 <param name="genome" type="select" label="Select a built in reference genome or custom genome" 73 <param name="genome" type="select" label="Select a built in reference genome or custom genome"
72 help="If not listed, add a custom genome or use a reference genome from the history"> 74 help="If not listed, add a custom genome or use a reference genome from the history">
73 <options from_data_table="all_fasta"> 75 <options from_data_table="all_fasta">
74 <validator message="No genomes are available " type="no_options"/> 76 <validator message="No genomes are available " type="no_options"/>
75 </options> 77 </options>
76 </param> 78 </param>
77 </when> 79 </when>
78 <when value="uri"> 80 <when value="uri">
79 <param name="uri" type="text" label="URI pointing to the reference genome fasta file"/> 81 <param name="uri" type="text" label="URI pointing to the reference genome FASTA file"/>
80 </when> 82 </when>
81 </conditional> 83 </conditional>
82 <param name="taxid" type="text" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/> 84 <param name="taxid" type="integer" min="0" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/>
83 <conditional name="condrnaseq"> 85 <conditional name="condrnaseq">
84 <param name="rna_type_select" type="select" label="RNA sequence data source" 86 <param name="rna_type_select" type="select" label="RNA sequence data source"
85 help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI"> 87 help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI">
86 <option value="list" selected="True">Type in a list of SRA identifiers and/or remote RNA-seq fasta URI</option> 88 <option value="list" selected="True">Type in a list of SRA identifiers and/or remote RNA-seq FASTA URI</option>
87 <option value="history">Select one or more RNA-seq fastq datasets from the current history</option> 89 <option value="history">Select one or more RNA-seq fastq datasets from the current history</option>
88 </param> 90 </param>
89 <when value="list"> 91 <when value="list">
90 <param name="rnaseq" type="text" area="true" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines" 92 <param name="rnaseq" type="text" area="true" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines"
91 help="Either a working URI for a RNA-seq fasta, or a bare SRA identifier will work - can be mixed"> 93 help="Either a working URI for a RNA-seq FASTA, or a bare SRA identifier will work - can be mixed">
92 <validator type="empty_field"/> 94 <validator type="empty_field"/>
93 </param> 95 </param>
94 </when> 96 </when>
95 <when value="history"> 97 <when value="history">
96 <param name="rnaseq" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Select multiple RNA-seq fastqsanger inputs from the current history" 98 <param name="rnaseq" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Select multiple RNA-seq fastqsanger inputs from the current history"
97 help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/> 99 help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/>
98 </when> 100 </when>
99 </conditional> 101 </conditional>
100 <param name="proteins" type="data" format="fasta,tasta.gz" optional="true" label="Select a protein set"/> 102 <param name="proteins" type="data" format="fasta,fasta.gz" optional="true" label="Select a protein set"/>
101 <param name="xtra" type="text" area="true" label="Additional yaml to append to the egapx.yaml configuration" 103 <param name="xtra" type="text" area="true" label="Additional yaml to append to the egapx.yaml configuration"
102 help="Not normally needed but useful for testing additional configuration elements"> 104 help="Not normally needed but useful for testing additional configuration elements">
103 <sanitizer invalid_char=""> 105 <sanitizer invalid_char="">
104 <valid initial="string.printable"/> 106 <valid initial="string.printable"/>
105 </sanitizer> 107 </sanitizer>
106 </param> 108 </param>
107 </when> 109 </when>
108 <when value="history"> 110 <when value="history">
109 <param name="yamlin" type="data" format="yaml,txt" label="egapx configuration yaml file to pass to Nextflow"/> 111 <param name="yamlin" type="data" format="yaml" label="egapx configuration yaml file to pass to Nextflow"/>
110 </when> 112 </when>
111 </conditional> 113 </conditional>
112 </inputs> 114 </inputs>
113 <outputs> 115 <outputs>
114 <data name="output" format="gff" label="EGAPx annotation for ${on_string}" from_work_dir="egapx_out/accept.gff"/> 116 <data name="output" format="gff" label="EGAPx annotation for ${on_string}" from_work_dir="egapx_out/accept.gff"/>
120 <data name="nf_params" format="yaml" label="Nextflow run parameters" from_work_dir="egapx_out/run_params.yaml"/> 122 <data name="nf_params" format="yaml" label="Nextflow run parameters" from_work_dir="egapx_out/run_params.yaml"/>
121 </collection> 123 </collection>
122 </outputs> 124 </outputs>
123 <tests> 125 <tests>
124 <test expect_test_failure="true"> 126 <test expect_test_failure="true">
125 <param name="input_style" value="history"/> 127 <param name="input_style" value="fillform"/>
126 <param name="yamlin" value="input.yaml"/> 128 <param name="taxid" value="6954"/>
129 <param name="genome_type_select" value="uri"/>
130 <param name="uri" value="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz"/>
131 <param name="rna_type_select" value="list"/>
132 <param name="rnaseq" value="https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2"/>
133 <param name="xtra" value="hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params&#10;tasks:&#10; star_wnode:&#10; star_wnode: -cpus-per-worker 4"/>
127 <output name="output"><assert_contents><has_size min="1"/></assert_contents></output> 134 <output name="output"><assert_contents><has_size min="1"/></assert_contents></output>
128 <output_collection name="nextflow_stats" type="list"> 135 <output_collection name="nextflow_stats" type="list">
129 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element> 136 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element>
130 <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element> 137 <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element>
131 <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element> 138 <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element>
132 <element name="nf_timeline"><assert_contents><has_size min="1"/></assert_contents></element> 139 <element name="nf_timeline"><assert_contents><has_size min="1"/></assert_contents></element>
133 <element name="nf_params"><assert_contents><has_size min="1"/></assert_contents></element> 140 <element name="nf_params"><assert_contents><has_size min="1"/></assert_contents></element>
134 </output_collection> 141 </output_collection>
135 </test> 142 </test>
136 <test expect_test_failure="true"> 143 <test expect_test_failure="true">
137 <param name="input_style" value="fillform"/> 144 <param name="input_style" value="history"/>
138 <param name="taxid" value="6954"/> 145 <param name="yamlin" value="input.yaml"/>
139 <param name="genome_type_select" value="uri"/>
140 <param name="uri" value="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz"/>
141 <param name="rna_type_select" value="list"/>
142 <param name="rnaseq" value="https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2"/>
143 <param name="xtra" value="proteins: []&#10;hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params&#10;tasks:&#10; star_wnode:&#10; star_wnode: -cpus-per-worker 4"/>
144 <output name="output"><assert_contents><has_size min="1"/></assert_contents></output> 146 <output name="output"><assert_contents><has_size min="1"/></assert_contents></output>
145 <output_collection name="nextflow_stats" type="list"> 147 <output_collection name="nextflow_stats" type="list">
146 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element> 148 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element>
147 <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element> 149 <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element>
148 <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element> 150 <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element>
240 **Warning:** 242 **Warning:**
241 The current version is an alpha release with limited features and organism scope to collect initial feedback on execution. Outputs are not yet complete and not intended for production use. Please open a GitHub [Issue](https://github.com/ncbi/egapx/issues) if you encounter any problems with EGAPx. You can also write to cgr@nlm.nih.gov to give us your feedback or if you have any questions. 243 The current version is an alpha release with limited features and organism scope to collect initial feedback on execution. Outputs are not yet complete and not intended for production use. Please open a GitHub [Issue](https://github.com/ncbi/egapx/issues) if you encounter any problems with EGAPx. You can also write to cgr@nlm.nih.gov to give us your feedback or if you have any questions.
242 244
243 EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/). 245 EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/).
244 246
245 EGAPx takes an assembly fasta file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. 247 EGAPx takes an assembly FASTA file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models.
246 In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file. 248 In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file.
247 249
248 **Security Notice:** 250 **Security Notice:**
249 251
250 EGAPx has dependencies in and outside of its execution path that include several thousand files from the [NCBI C++ toolkit](https://www.ncbi.nlm.nih.gov/toolkit), and more than a million total lines of code. Static Application Security Testing has shown a small number of verified buffer overrun security vulnerabilities. Users should consult with their organizational security team on risk and if there is concern, consider mitigating options like running via VM or cloud instance. 252 EGAPx has dependencies in and outside of its execution path that include several thousand files from the [NCBI C++ toolkit](https://www.ncbi.nlm.nih.gov/toolkit), and more than a million total lines of code. Static Application Security Testing has shown a small number of verified buffer overrun security vulnerabilities. Users should consult with their organizational security team on risk and if there is concern, consider mitigating options like running via VM or cloud instance.
308 2024-03-27 11:20:25 1 .exitcode 310 2024-03-27 11:20:25 1 .exitcode
309 $ aws s3 ls s3://temp_datapath/D_farinae/96/621c4ba4e6e87a4d869c696fe50034/output/ 311 $ aws s3 ls s3://temp_datapath/D_farinae/96/621c4ba4e6e87a4d869c696fe50034/output/
310 2024-03-27 11:20:24 17127134 aligns.paf 312 2024-03-27 11:20:24 17127134 aligns.paf
311 ]]></help> 313 ]]></help>
312 <expand macro="citations"/> 314 <expand macro="citations"/>
315 <expand macro="creators"/>
313 </tool> 316 </tool>