Mercurial > repos > richard-burhans > ncbi_egapx
changeset 3:4420dd857c41 draft
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 050f870384e004445d3dcfb56302b7894793bd23
author | richard-burhans |
---|---|
date | Mon, 09 Sep 2024 22:07:18 +0000 |
parents | 7c72d5c7e449 |
children | 539ea4dee35a |
files | macros.xml ncbi_egapx.xml test-data/input.yaml |
diffstat | 3 files changed, 119 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue Aug 20 19:47:25 2024 +0000 +++ b/macros.xml Mon Sep 09 22:07:18 2024 +0000 @@ -5,8 +5,8 @@ </requirements> </xml> <token name="@TOOL_VERSION@">0.2-alpha</token> - <token name="@VERSION_SUFFIX@">0</token> - <token name="@PROFILE@">21.05</token> + <token name="@VERSION_SUFFIX@">1</token> + <token name="@PROFILE@">22.05</token> <xml name="edam_ontology"> <edam_operations> <edam_operation>operation_0362</edam_operation>
--- a/ncbi_egapx.xml Tue Aug 20 19:47:25 2024 +0000 +++ b/ncbi_egapx.xml Mon Sep 09 22:07:18 2024 +0000 @@ -5,14 +5,108 @@ </macros> <expand macro="edam_ontology"/> <expand macro="requirements"/> - <command detect_errors="exit_code"><![CDATA[ + <command detect_errors="aggressive"><![CDATA[ + #if str($cond_input_style.input_style) == "history": + #set yamlconfig = $yamlin + #else: + #set yamlconfig = 'egapx.yaml' + rm -rf 'egapx.yaml' && + touch 'egapx.yaml' && + echo '# yaml generated by ncbi_egapx.xml' >> egapx.yaml && + echo 'taxid: $taxid' >> egapx.yaml && + #if str($reference_genome.genome_type_select) == "indexed": + echo 'genome: $reference_genome.genome.fields.path' >> 'egapx.yaml' && + #elif str($reference_genome.genome_type_select) == "history" + echo 'genome: $reference_genome.genome' >> 'egapx.yaml' && + #else: + echo 'genome: $reference_genome.uri' >> 'egapx.yaml' && + #end if + echo 'reads:' >> 'egapx.yaml' && + #if str($condrnaseq.rna_type_select) == "history": + #for $r in $rnaseq: + echo ' - $r' >> 'egapx.yaml' && + #end for + #else: + #set rs = $rnaseq.split() + #set rsplit = [x.strip() for x in $rs] + #for $r in $rsplit: + echo ' - $r' >> 'egapx.yaml' && + #end for + #end if + #if len($xtra.strip()) > 0: + #set lxtra = $xtra.split('\n') + #for row in $lxtra: + echo '$row' >> 'egapx.yaml' && + #end for + #end if + echo '' >> 'egapx.yaml' && + echo "Calculated contents of egapx yaml" && + cat 'egapx.yaml' && + #end if source /galaxy/env.bash && echo \${PATH} && ln -s /galaxy/egapx/egapx_config && - python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out' + python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out' ]]></command> <inputs> - <param name="yamlconfig" type="data" optional="false" label="egapx configuration yaml file to execute" help="" format="yaml,txt" multiple="false"/> + <conditional name="cond_input_style"> + <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?" + help="Use a pre-prepared yaml if available. Use the tool form if history files are needed as rna-seq or reference genome inputs for this job"> + <option selected="True" value="history">Use a pre-prepared yaml egapx configuration</option> + <option value="fillform">Provide configuration details for conversion into a configuration yaml</option> + </param> + <when value="history"> + <param name="yamlin" type="data" optional="false" label="egapx configuration yaml file to pass to Nextflow" help="" format="yaml,txt"/> + </when> + <when value="fillform"> + <param name="taxid" type="text" optional="false" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/> + <conditional name="reference_genome"> + <param name="genome_type_select" type="select" label="Reference genome source for mapping supplied RNA-seq reads" + help="Select a built in, history or remote URI for the reference genome fasta"> + <option value="indexed">Use a Galaxy server built-in genome</option> + <option value="history" selected="True">Use a genome fasta file from the current history</option> + <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference fasta file</option> + </param> + <when value="indexed"> + <param name="genome" type="select" optional="false" label="Select a built in reference genome or custom genome" + help="If not listed, add a custom genome or use a reference genome from the history"> + <options from_data_table="all_fasta"> + <validator message="No genomes are available " type="no_options"/> + </options> + </param> + </when> + <when value="history"> + <param name="genome" type="data" format="fasta" optional="false" label="Select the reference genome fasta from the current history"/> + </when> + <when value="uri"> + <param name="uri" type="text" label="URI pointing to the reference genome fasta file" help=""/> + </when> + </conditional> + <conditional name="condrnaseq"> + <param name="rna_type_select" type="select" label="RNA sequence data source" help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI"> + <option selected="True" value="list">Type in a list of SRA identifiers and/or remote RNA-seq fasta URI</option> + <option value="history">Select one or more RNA-seq fastq datasets from the current history</option> + </param> + <when value="history"> + <param name="rnaseq" type="data" format="fastqsanger, fastqsanger.gz" optional="false" multiple="true" + label="Select multiple RNA-seq fastqsanger inputs from the current history" help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/> + </when> + <when value="list"> + <param name="rnaseq" type="text" area="true" optional="false" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines" + help="Either a working URI for a RNA-seq fasta, or a bare SRA identifier will work - can be mixed"> + <validator type="empty_field"/> + </param> + </when> + </conditional> + <param name="xtra" type="text" area="true" optional="true" label="Additional yaml to append to the egapx.yaml configuration" + help="Not normally needed but useful for testing additional configuration elements"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + </valid> + </sanitizer> + </param> + </when> + </conditional> </inputs> <outputs> <collection name="egapx_out" type="list" label="Outputs from egapx"> @@ -21,7 +115,7 @@ </outputs> <tests> <test expect_test_failure="true"> - <param name="yamlconfig" value="input.yaml"/> + <param name="yamlin" value="input.yaml"/> <output_collection name="egapx_out" type="list" count="8"/> </test> </tests> @@ -31,7 +125,7 @@ .. class:: warningmark -**Proof of concept: a quick hack to run a NF workflow inside a specialised Galaxy tool wrapper** +**Proof of concept: a hack to run a NF workflow inside a specialised Galaxy tool wrapper** EGAPx is a big, complicated Nextflow workflow, challenging and costly to re-implement **properly**, requiring dozens of new tools and replicating a lot of complicated *groovy* workflow logic. @@ -42,7 +136,7 @@ required to convert Nextflow DDL into tools and WF logic. Balancing these competing requirements is a fundamental Galaxy challenge. -EGAPx requires very substantial resources to run with real data. *128GB and 32 cores* are the minimum requirement; *256GB and 64 cores* are recommended. +EGAPx requires very substantial resources to run with real data. *132GB and 32 cores* are the minimum requirement; *256GB and 64 cores* are recommended. A special minimal example that can be run in 6GB with 4 cores is provided as a yaml configuration and is used for the tool test.
--- a/test-data/input.yaml Tue Aug 20 19:47:25 2024 +0000 +++ b/test-data/input.yaml Mon Sep 09 22:07:18 2024 +0000 @@ -1,1 +1,17 @@ -# +# This is a very minimal example of EGAPx, it fits into 4 CPU cores and 6GB of memory. +# To be able to do this, we culled the input files and some stages of execution. +# To limit the requirements you also need to use -e docker_minimal + +genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz +reads: + - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 + - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 + - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 + - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2 +taxid: 6954 +proteins: [] +hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params +tasks: + star_wnode: + star_wnode: -cpus-per-worker 4 +