comparison ncbi_egapx.xml @ 3:4420dd857c41 draft

planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 050f870384e004445d3dcfb56302b7894793bd23
author richard-burhans
date Mon, 09 Sep 2024 22:07:18 +0000
parents e7091c5a8495
children 539ea4dee35a
comparison
equal deleted inserted replaced
2:7c72d5c7e449 3:4420dd857c41
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="edam_ontology"/> 6 <expand macro="edam_ontology"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="aggressive"><![CDATA[
9 #if str($cond_input_style.input_style) == "history":
10 #set yamlconfig = $yamlin
11 #else:
12 #set yamlconfig = 'egapx.yaml'
13 rm -rf 'egapx.yaml' &&
14 touch 'egapx.yaml' &&
15 echo '# yaml generated by ncbi_egapx.xml' >> egapx.yaml &&
16 echo 'taxid: $taxid' >> egapx.yaml &&
17 #if str($reference_genome.genome_type_select) == "indexed":
18 echo 'genome: $reference_genome.genome.fields.path' >> 'egapx.yaml' &&
19 #elif str($reference_genome.genome_type_select) == "history"
20 echo 'genome: $reference_genome.genome' >> 'egapx.yaml' &&
21 #else:
22 echo 'genome: $reference_genome.uri' >> 'egapx.yaml' &&
23 #end if
24 echo 'reads:' >> 'egapx.yaml' &&
25 #if str($condrnaseq.rna_type_select) == "history":
26 #for $r in $rnaseq:
27 echo ' - $r' >> 'egapx.yaml' &&
28 #end for
29 #else:
30 #set rs = $rnaseq.split()
31 #set rsplit = [x.strip() for x in $rs]
32 #for $r in $rsplit:
33 echo ' - $r' >> 'egapx.yaml' &&
34 #end for
35 #end if
36 #if len($xtra.strip()) > 0:
37 #set lxtra = $xtra.split('\n')
38 #for row in $lxtra:
39 echo '$row' >> 'egapx.yaml' &&
40 #end for
41 #end if
42 echo '' >> 'egapx.yaml' &&
43 echo "Calculated contents of egapx yaml" &&
44 cat 'egapx.yaml' &&
45 #end if
9 source /galaxy/env.bash && 46 source /galaxy/env.bash &&
10 echo \${PATH} && 47 echo \${PATH} &&
11 ln -s /galaxy/egapx/egapx_config && 48 ln -s /galaxy/egapx/egapx_config &&
12 python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out' 49 python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out'
13 ]]></command> 50 ]]></command>
14 <inputs> 51 <inputs>
15 <param name="yamlconfig" type="data" optional="false" label="egapx configuration yaml file to execute" help="" format="yaml,txt" multiple="false"/> 52 <conditional name="cond_input_style">
53 <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?"
54 help="Use a pre-prepared yaml if available. Use the tool form if history files are needed as rna-seq or reference genome inputs for this job">
55 <option selected="True" value="history">Use a pre-prepared yaml egapx configuration</option>
56 <option value="fillform">Provide configuration details for conversion into a configuration yaml</option>
57 </param>
58 <when value="history">
59 <param name="yamlin" type="data" optional="false" label="egapx configuration yaml file to pass to Nextflow" help="" format="yaml,txt"/>
60 </when>
61 <when value="fillform">
62 <param name="taxid" type="text" optional="false" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/>
63 <conditional name="reference_genome">
64 <param name="genome_type_select" type="select" label="Reference genome source for mapping supplied RNA-seq reads"
65 help="Select a built in, history or remote URI for the reference genome fasta">
66 <option value="indexed">Use a Galaxy server built-in genome</option>
67 <option value="history" selected="True">Use a genome fasta file from the current history</option>
68 <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference fasta file</option>
69 </param>
70 <when value="indexed">
71 <param name="genome" type="select" optional="false" label="Select a built in reference genome or custom genome"
72 help="If not listed, add a custom genome or use a reference genome from the history">
73 <options from_data_table="all_fasta">
74 <validator message="No genomes are available " type="no_options"/>
75 </options>
76 </param>
77 </when>
78 <when value="history">
79 <param name="genome" type="data" format="fasta" optional="false" label="Select the reference genome fasta from the current history"/>
80 </when>
81 <when value="uri">
82 <param name="uri" type="text" label="URI pointing to the reference genome fasta file" help=""/>
83 </when>
84 </conditional>
85 <conditional name="condrnaseq">
86 <param name="rna_type_select" type="select" label="RNA sequence data source" help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI">
87 <option selected="True" value="list">Type in a list of SRA identifiers and/or remote RNA-seq fasta URI</option>
88 <option value="history">Select one or more RNA-seq fastq datasets from the current history</option>
89 </param>
90 <when value="history">
91 <param name="rnaseq" type="data" format="fastqsanger, fastqsanger.gz" optional="false" multiple="true"
92 label="Select multiple RNA-seq fastqsanger inputs from the current history" help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/>
93 </when>
94 <when value="list">
95 <param name="rnaseq" type="text" area="true" optional="false" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines"
96 help="Either a working URI for a RNA-seq fasta, or a bare SRA identifier will work - can be mixed">
97 <validator type="empty_field"/>
98 </param>
99 </when>
100 </conditional>
101 <param name="xtra" type="text" area="true" optional="true" label="Additional yaml to append to the egapx.yaml configuration"
102 help="Not normally needed but useful for testing additional configuration elements">
103 <sanitizer invalid_char="">
104 <valid initial="string.printable">
105 </valid>
106 </sanitizer>
107 </param>
108 </when>
109 </conditional>
16 </inputs> 110 </inputs>
17 <outputs> 111 <outputs>
18 <collection name="egapx_out" type="list" label="Outputs from egapx"> 112 <collection name="egapx_out" type="list" label="Outputs from egapx">
19 <discover_datasets pattern="__name_and_ext__" directory="egapx_out" visible="false"/> 113 <discover_datasets pattern="__name_and_ext__" directory="egapx_out" visible="false"/>
20 </collection> 114 </collection>
21 </outputs> 115 </outputs>
22 <tests> 116 <tests>
23 <test expect_test_failure="true"> 117 <test expect_test_failure="true">
24 <param name="yamlconfig" value="input.yaml"/> 118 <param name="yamlin" value="input.yaml"/>
25 <output_collection name="egapx_out" type="list" count="8"/> 119 <output_collection name="egapx_out" type="list" count="8"/>
26 </test> 120 </test>
27 </tests> 121 </tests>
28 <help><![CDATA[ 122 <help><![CDATA[
29 Galaxy tool wrapping the Eukaryotic Genome Annotation Pipeline (EGAPx) 123 Galaxy tool wrapping the Eukaryotic Genome Annotation Pipeline (EGAPx)
30 ================================================================================================= 124 =================================================================================================
31 125
32 .. class:: warningmark 126 .. class:: warningmark
33 127
34 **Proof of concept: a quick hack to run a NF workflow inside a specialised Galaxy tool wrapper** 128 **Proof of concept: a hack to run a NF workflow inside a specialised Galaxy tool wrapper**
35 129
36 EGAPx is a big, complicated Nextflow workflow, challenging and costly to re-implement **properly**, requiring dozens of new tools and replicating a lot of 130 EGAPx is a big, complicated Nextflow workflow, challenging and costly to re-implement **properly**, requiring dozens of new tools and replicating a lot of
37 complicated *groovy* workflow logic. 131 complicated *groovy* workflow logic.
38 132
39 It is also very new and in rapid development. Investing developer effort and keeping updated as EGAPx changes rapidly may be *inefficient of developer resources*. 133 It is also very new and in rapid development. Investing developer effort and keeping updated as EGAPx changes rapidly may be *inefficient of developer resources*.
40 134
41 This wrapper is designed to allow measuring how *inefficient* it is in terms of computing resource utilisation, in comparison to the developer effort 135 This wrapper is designed to allow measuring how *inefficient* it is in terms of computing resource utilisation, in comparison to the developer effort
42 required to convert Nextflow DDL into tools and WF logic. Balancing these competing requirements is a fundamental Galaxy challenge. 136 required to convert Nextflow DDL into tools and WF logic. Balancing these competing requirements is a fundamental Galaxy challenge.
43 137
44 138
45 EGAPx requires very substantial resources to run with real data. *128GB and 32 cores* are the minimum requirement; *256GB and 64 cores* are recommended. 139 EGAPx requires very substantial resources to run with real data. *132GB and 32 cores* are the minimum requirement; *256GB and 64 cores* are recommended.
46 140
47 A special minimal example that can be run in 6GB with 4 cores is provided as a yaml configuration and is used for the tool test. 141 A special minimal example that can be run in 6GB with 4 cores is provided as a yaml configuration and is used for the tool test.
48 142
49 In this implementation, the user must supply a yaml configuration file as initial proof of concept. 143 In this implementation, the user must supply a yaml configuration file as initial proof of concept.
50 History inputs and even a yaml editor might be provided in future. 144 History inputs and even a yaml editor might be provided in future.