Mercurial > repos > richard-burhans > ncbi_egapx
comparison ncbi_egapx.xml @ 11:5bec47dfe99a draft
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 544ef29cd524d03b10fdc60769d5d0f2a93d3a67
author | richard-burhans |
---|---|
date | Thu, 14 Nov 2024 15:28:04 +0000 |
parents | 28ab345ebab3 |
children | 649483462da3 |
comparison
equal
deleted
inserted
replaced
10:7fa335454224 | 11:5bec47dfe99a |
---|---|
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="edam_ontology"/> | 6 <expand macro="edam_ontology"/> |
7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
8 <command detect_errors="aggressive"><![CDATA[ | 8 <command detect_errors="aggressive"><![CDATA[ |
9 #if str($cond_input_style.input_style) == "fillform": | 9 #if str($cond_input_style.input_style) == "fillform" |
10 #set yamlconfig = "egapx.yaml" | 10 #set yamlconfig = $egapx_config |
11 echo '# yaml generated by ncbi_egapx.xml' > '$yamlconfig' && | 11 #else |
12 #if str($reference_genome.genome_type_select) == "history" | |
13 echo 'genome: $reference_genome.genome' >> '$yamlconfig' && | |
14 #elif str($reference_genome.genome_type_select) == "indexed": | |
15 echo 'genome: $reference_genome.genome.fields.path' >> '$yamlconfig' && | |
16 #else: | |
17 echo 'genome: $reference_genome.uri' >> '$yamlconfig' && | |
18 #end if | |
19 echo 'taxid: $taxid' >> '$yamlconfig' && | |
20 echo 'reads:' >> '$yamlconfig' && | |
21 #if str($condrnaseq.rna_type_select) == "list": | |
22 #set rs = $rnaseq.split() | |
23 #set rsplit = [x.strip() for x in $rs] | |
24 #for $r in $rsplit: | |
25 echo ' - $r' >> '$yamlconfig' && | |
26 #end for | |
27 #else: | |
28 #for $r in $rnaseq: | |
29 echo ' - $r' >> '$yamlconfig' && | |
30 #end for | |
31 #end if | |
32 #if $proteins: | |
33 echo 'proteins: $proteins' >> '$yamlconfig' && | |
34 #end if | |
35 #if len($xtra.strip()) > 0: | |
36 #set lxtra = $xtra.split("\n") | |
37 #for row in $lxtra: | |
38 echo '$row' >> '$yamlconfig' && | |
39 #end for | |
40 #end if | |
41 echo '' >> '$yamlconfig' && | |
42 echo "Calculated contents of egapx yaml" && | |
43 cat '$yamlconfig' && | |
44 #else: | |
45 #set yamlconfig = $yamlin | 12 #set yamlconfig = $yamlin |
46 #end if | 13 #end if |
14 ## activate the following | |
15 ## - nextflow conda environment | |
16 ## - EGPAx python virtual environment | |
47 source /galaxy/env.bash && | 17 source /galaxy/env.bash && |
48 echo \${PATH} && | 18 ## use the augmented container EGAPx config |
49 ln -s /galaxy/egapx/egapx_config && | 19 ln -s /galaxy/egapx/egapx_config && |
20 ## run EGAPx | |
50 python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out' | 21 python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out' |
51 ]]></command> | 22 ]]></command> |
23 <configfiles> | |
24 <configfile name="egapx_config"><![CDATA[ | |
25 #if str($cond_input_style.input_style) == "fillform" | |
26 # yaml generated by ncbi_egapx.xml | |
27 #if str($reference_genome.genome_type_select) == "history" | |
28 #set genome_value = $reference_genome.genome | |
29 #elif str($reference_genome.genome_type_select) == "indexed" | |
30 #set genome_value = $reference_genome.genome.fields.path | |
31 #else | |
32 #set genome_value = $reference_genome.uri | |
33 #end if | |
34 genome: $genome_value | |
35 taxid: $taxid | |
36 #if str($condrnaseq.rna_type_select) == "list" | |
37 #set $reads_values = $rnaseq.split() | |
38 #else | |
39 #set $reads_values = $rnaseq | |
40 #end if | |
41 reads: | |
42 #for r in [x.strip() for x in $reads_values] | |
43 - $r | |
44 #end for | |
45 #if str($proteins) != "None" | |
46 proteins: $proteins | |
47 #end if | |
48 #for row in $xtra.strip().split("\n") | |
49 $row | |
50 #end for | |
51 #end if | |
52 ]]></configfile> | |
53 </configfiles> | |
52 <inputs> | 54 <inputs> |
53 <conditional name="cond_input_style"> | 55 <conditional name="cond_input_style"> |
54 <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?" | 56 <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?" |
55 help="Use the tool form to select inputs from the history, or use a pre-prepared yaml file."> | 57 help="Use the tool form to select inputs from the history, or use a pre-prepared yaml file."> |
56 <option value="fillform" selected="True">Provide configuration details for conversion into a configuration yaml</option> | 58 <option value="fillform" selected="True">Provide configuration details for conversion into a configuration yaml</option> |
57 <option value="history">Use a pre-prepared yaml egapx configuration</option> | 59 <option value="history">Use a pre-prepared yaml egapx configuration</option> |
58 </param> | 60 </param> |
59 <when value="fillform"> | 61 <when value="fillform"> |
60 <conditional name="reference_genome"> | 62 <conditional name="reference_genome"> |
61 <param name="genome_type_select" type="select" label="Reference genome source for mapping supplied RNA-seq reads" | 63 <param name="genome_type_select" type="select" label="Reference genome source for mapping supplied RNA-seq reads" |
62 help="Select a built in, history or remote URI for the reference genome fasta"> | 64 help="Select a built in, history or remote URI for the reference genome FASTA"> |
63 <option value="history" selected="True">Use a genome fasta file from the current history</option> | 65 <option value="history" selected="True">Use a genome FASTA file from the current history</option> |
64 <option value="indexed">Use a Galaxy server built-in genome</option> | 66 <option value="indexed">Use a Galaxy server built-in genome</option> |
65 <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference fasta file</option> | 67 <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference FASTA file</option> |
66 </param> | 68 </param> |
67 <when value="history"> | 69 <when value="history"> |
68 <param name="genome" type="data" format="fasta" label="Select the reference genome fasta from the current history"/> | 70 <param name="genome" type="data" format="fasta" label="Select the reference genome FASTA from the current history"/> |
69 </when> | 71 </when> |
70 <when value="indexed"> | 72 <when value="indexed"> |
71 <param name="genome" type="select" label="Select a built in reference genome or custom genome" | 73 <param name="genome" type="select" label="Select a built in reference genome or custom genome" |
72 help="If not listed, add a custom genome or use a reference genome from the history"> | 74 help="If not listed, add a custom genome or use a reference genome from the history"> |
73 <options from_data_table="all_fasta"> | 75 <options from_data_table="all_fasta"> |
74 <validator message="No genomes are available " type="no_options"/> | 76 <validator message="No genomes are available " type="no_options"/> |
75 </options> | 77 </options> |
76 </param> | 78 </param> |
77 </when> | 79 </when> |
78 <when value="uri"> | 80 <when value="uri"> |
79 <param name="uri" type="text" label="URI pointing to the reference genome fasta file"/> | 81 <param name="uri" type="text" label="URI pointing to the reference genome FASTA file"/> |
80 </when> | 82 </when> |
81 </conditional> | 83 </conditional> |
82 <param name="taxid" type="text" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/> | 84 <param name="taxid" type="integer" min="0" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/> |
83 <conditional name="condrnaseq"> | 85 <conditional name="condrnaseq"> |
84 <param name="rna_type_select" type="select" label="RNA sequence data source" | 86 <param name="rna_type_select" type="select" label="RNA sequence data source" |
85 help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI"> | 87 help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI"> |
86 <option value="list" selected="True">Type in a list of SRA identifiers and/or remote RNA-seq fasta URI</option> | 88 <option value="list" selected="True">Type in a list of SRA identifiers and/or remote RNA-seq FASTA URI</option> |
87 <option value="history">Select one or more RNA-seq fastq datasets from the current history</option> | 89 <option value="history">Select one or more RNA-seq fastq datasets from the current history</option> |
88 </param> | 90 </param> |
89 <when value="list"> | 91 <when value="list"> |
90 <param name="rnaseq" type="text" area="true" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines" | 92 <param name="rnaseq" type="text" area="true" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines" |
91 help="Either a working URI for a RNA-seq fasta, or a bare SRA identifier will work - can be mixed"> | 93 help="Either a working URI for a RNA-seq FASTA, or a bare SRA identifier will work - can be mixed"> |
92 <validator type="empty_field"/> | 94 <validator type="empty_field"/> |
93 </param> | 95 </param> |
94 </when> | 96 </when> |
95 <when value="history"> | 97 <when value="history"> |
96 <param name="rnaseq" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Select multiple RNA-seq fastqsanger inputs from the current history" | 98 <param name="rnaseq" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Select multiple RNA-seq fastqsanger inputs from the current history" |
97 help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/> | 99 help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/> |
98 </when> | 100 </when> |
99 </conditional> | 101 </conditional> |
100 <param name="proteins" type="data" format="fasta,tasta.gz" optional="true" label="Select a protein set"/> | 102 <param name="proteins" type="data" format="fasta,fasta.gz" optional="true" label="Select a protein set"/> |
101 <param name="xtra" type="text" area="true" label="Additional yaml to append to the egapx.yaml configuration" | 103 <param name="xtra" type="text" area="true" label="Additional yaml to append to the egapx.yaml configuration" |
102 help="Not normally needed but useful for testing additional configuration elements"> | 104 help="Not normally needed but useful for testing additional configuration elements"> |
103 <sanitizer invalid_char=""> | 105 <sanitizer invalid_char=""> |
104 <valid initial="string.printable"/> | 106 <valid initial="string.printable"/> |
105 </sanitizer> | 107 </sanitizer> |
106 </param> | 108 </param> |
107 </when> | 109 </when> |
108 <when value="history"> | 110 <when value="history"> |
109 <param name="yamlin" type="data" format="yaml,txt" label="egapx configuration yaml file to pass to Nextflow"/> | 111 <param name="yamlin" type="data" format="yaml" label="egapx configuration yaml file to pass to Nextflow"/> |
110 </when> | 112 </when> |
111 </conditional> | 113 </conditional> |
112 </inputs> | 114 </inputs> |
113 <outputs> | 115 <outputs> |
114 <data name="output" format="gff" label="EGAPx annotation for ${on_string}" from_work_dir="egapx_out/accept.gff"/> | 116 <data name="output" format="gff" label="EGAPx annotation for ${on_string}" from_work_dir="egapx_out/accept.gff"/> |
120 <data name="nf_params" format="yaml" label="Nextflow run parameters" from_work_dir="egapx_out/run_params.yaml"/> | 122 <data name="nf_params" format="yaml" label="Nextflow run parameters" from_work_dir="egapx_out/run_params.yaml"/> |
121 </collection> | 123 </collection> |
122 </outputs> | 124 </outputs> |
123 <tests> | 125 <tests> |
124 <test expect_test_failure="true"> | 126 <test expect_test_failure="true"> |
125 <param name="input_style" value="history"/> | 127 <param name="input_style" value="fillform"/> |
126 <param name="yamlin" value="input.yaml"/> | 128 <param name="taxid" value="6954"/> |
129 <param name="genome_type_select" value="uri"/> | |
130 <param name="uri" value="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz"/> | |
131 <param name="rna_type_select" value="list"/> | |
132 <param name="rnaseq" value="https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2"/> | |
133 <param name="xtra" value="hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params tasks: star_wnode: star_wnode: -cpus-per-worker 4"/> | |
127 <output name="output"><assert_contents><has_size min="1"/></assert_contents></output> | 134 <output name="output"><assert_contents><has_size min="1"/></assert_contents></output> |
128 <output_collection name="nextflow_stats" type="list"> | 135 <output_collection name="nextflow_stats" type="list"> |
129 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element> | 136 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element> |
130 <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element> | 137 <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element> |
131 <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element> | 138 <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element> |
132 <element name="nf_timeline"><assert_contents><has_size min="1"/></assert_contents></element> | 139 <element name="nf_timeline"><assert_contents><has_size min="1"/></assert_contents></element> |
133 <element name="nf_params"><assert_contents><has_size min="1"/></assert_contents></element> | 140 <element name="nf_params"><assert_contents><has_size min="1"/></assert_contents></element> |
134 </output_collection> | 141 </output_collection> |
135 </test> | 142 </test> |
136 <test expect_test_failure="true"> | 143 <test expect_test_failure="true"> |
137 <param name="input_style" value="fillform"/> | 144 <param name="input_style" value="history"/> |
138 <param name="taxid" value="6954"/> | 145 <param name="yamlin" value="input.yaml"/> |
139 <param name="genome_type_select" value="uri"/> | |
140 <param name="uri" value="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz"/> | |
141 <param name="rna_type_select" value="list"/> | |
142 <param name="rnaseq" value="https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2"/> | |
143 <param name="xtra" value="proteins: [] hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params tasks: star_wnode: star_wnode: -cpus-per-worker 4"/> | |
144 <output name="output"><assert_contents><has_size min="1"/></assert_contents></output> | 146 <output name="output"><assert_contents><has_size min="1"/></assert_contents></output> |
145 <output_collection name="nextflow_stats" type="list"> | 147 <output_collection name="nextflow_stats" type="list"> |
146 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element> | 148 <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element> |
147 <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element> | 149 <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element> |
148 <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element> | 150 <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element> |
240 **Warning:** | 242 **Warning:** |
241 The current version is an alpha release with limited features and organism scope to collect initial feedback on execution. Outputs are not yet complete and not intended for production use. Please open a GitHub [Issue](https://github.com/ncbi/egapx/issues) if you encounter any problems with EGAPx. You can also write to cgr@nlm.nih.gov to give us your feedback or if you have any questions. | 243 The current version is an alpha release with limited features and organism scope to collect initial feedback on execution. Outputs are not yet complete and not intended for production use. Please open a GitHub [Issue](https://github.com/ncbi/egapx/issues) if you encounter any problems with EGAPx. You can also write to cgr@nlm.nih.gov to give us your feedback or if you have any questions. |
242 | 244 |
243 EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/). | 245 EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/). |
244 | 246 |
245 EGAPx takes an assembly fasta file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. | 247 EGAPx takes an assembly FASTA file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. |
246 In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file. | 248 In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file. |
247 | 249 |
248 **Security Notice:** | 250 **Security Notice:** |
249 | 251 |
250 EGAPx has dependencies in and outside of its execution path that include several thousand files from the [NCBI C++ toolkit](https://www.ncbi.nlm.nih.gov/toolkit), and more than a million total lines of code. Static Application Security Testing has shown a small number of verified buffer overrun security vulnerabilities. Users should consult with their organizational security team on risk and if there is concern, consider mitigating options like running via VM or cloud instance. | 252 EGAPx has dependencies in and outside of its execution path that include several thousand files from the [NCBI C++ toolkit](https://www.ncbi.nlm.nih.gov/toolkit), and more than a million total lines of code. Static Application Security Testing has shown a small number of verified buffer overrun security vulnerabilities. Users should consult with their organizational security team on risk and if there is concern, consider mitigating options like running via VM or cloud instance. |
308 2024-03-27 11:20:25 1 .exitcode | 310 2024-03-27 11:20:25 1 .exitcode |
309 $ aws s3 ls s3://temp_datapath/D_farinae/96/621c4ba4e6e87a4d869c696fe50034/output/ | 311 $ aws s3 ls s3://temp_datapath/D_farinae/96/621c4ba4e6e87a4d869c696fe50034/output/ |
310 2024-03-27 11:20:24 17127134 aligns.paf | 312 2024-03-27 11:20:24 17127134 aligns.paf |
311 ]]></help> | 313 ]]></help> |
312 <expand macro="citations"/> | 314 <expand macro="citations"/> |
315 <expand macro="creators"/> | |
313 </tool> | 316 </tool> |