changeset 3:4420dd857c41 draft

planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 050f870384e004445d3dcfb56302b7894793bd23
author richard-burhans
date Mon, 09 Sep 2024 22:07:18 +0000
parents 7c72d5c7e449
children 539ea4dee35a
files macros.xml ncbi_egapx.xml test-data/input.yaml
diffstat 3 files changed, 119 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Tue Aug 20 19:47:25 2024 +0000
+++ b/macros.xml	Mon Sep 09 22:07:18 2024 +0000
@@ -5,8 +5,8 @@
         </requirements>
     </xml>
     <token name="@TOOL_VERSION@">0.2-alpha</token>
-    <token name="@VERSION_SUFFIX@">0</token>
-    <token name="@PROFILE@">21.05</token>
+    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@PROFILE@">22.05</token>
     <xml name="edam_ontology">
         <edam_operations>
             <edam_operation>operation_0362</edam_operation>
--- a/ncbi_egapx.xml	Tue Aug 20 19:47:25 2024 +0000
+++ b/ncbi_egapx.xml	Mon Sep 09 22:07:18 2024 +0000
@@ -5,14 +5,108 @@
     </macros>
     <expand macro="edam_ontology"/>
     <expand macro="requirements"/>
-    <command detect_errors="exit_code"><![CDATA[
+    <command detect_errors="aggressive"><![CDATA[
+    #if str($cond_input_style.input_style) == "history":
+      #set yamlconfig = $yamlin
+    #else:
+      #set yamlconfig = 'egapx.yaml'
+      rm -rf 'egapx.yaml' &&
+      touch 'egapx.yaml' &&
+      echo '# yaml generated by ncbi_egapx.xml' >> egapx.yaml &&
+      echo 'taxid:  $taxid' >> egapx.yaml &&
+      #if str($reference_genome.genome_type_select) == "indexed":
+        echo 'genome:  $reference_genome.genome.fields.path' >> 'egapx.yaml' &&
+      #elif str($reference_genome.genome_type_select) == "history"
+        echo 'genome:  $reference_genome.genome'  >> 'egapx.yaml' &&
+      #else:
+        echo 'genome:  $reference_genome.uri' >> 'egapx.yaml' &&
+      #end if
+      echo 'reads:' >> 'egapx.yaml' &&
+      #if str($condrnaseq.rna_type_select) == "history":
+        #for $r in $rnaseq:
+          echo '  - $r'  >> 'egapx.yaml' &&
+        #end for
+      #else:
+        #set rs = $rnaseq.split()
+        #set rsplit = [x.strip() for x in $rs]
+        #for $r in $rsplit:
+          echo '  - $r'  >> 'egapx.yaml' &&
+        #end for
+      #end if
+      #if len($xtra.strip()) > 0:
+        #set lxtra = $xtra.split('\n')
+        #for row in $lxtra:
+            echo '$row' >> 'egapx.yaml' &&
+        #end for
+      #end if
+      echo '' >> 'egapx.yaml' &&
+      echo "Calculated contents of egapx yaml" &&
+      cat 'egapx.yaml' &&
+    #end if
     source /galaxy/env.bash &&
     echo \${PATH} &&
     ln -s /galaxy/egapx/egapx_config &&
-    python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out'
+    python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy  -o 'egapx_out'
     ]]></command>
     <inputs>
-      <param name="yamlconfig" type="data" optional="false" label="egapx configuration yaml file to execute" help="" format="yaml,txt" multiple="false"/>
+    <conditional name="cond_input_style">
+      <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?"
+        help="Use a pre-prepared yaml if available. Use the tool form if history files are needed as rna-seq or reference genome inputs for this job">
+        <option selected="True" value="history">Use a pre-prepared yaml egapx configuration</option>
+        <option value="fillform">Provide configuration details for conversion into a configuration yaml</option>
+      </param>
+      <when value="history">
+        <param name="yamlin" type="data" optional="false" label="egapx configuration yaml file to pass to Nextflow" help="" format="yaml,txt"/>
+      </when>
+      <when value="fillform">
+          <param name="taxid" type="text" optional="false" label="NCBI Taxon ID" help="Used to identify the HMM model files needed"/>
+          <conditional name="reference_genome">
+            <param name="genome_type_select" type="select" label="Reference genome source for mapping supplied RNA-seq reads" 
+              help="Select a built in, history or remote URI for the reference genome fasta">
+                <option value="indexed">Use a Galaxy server built-in genome</option>
+                <option value="history" selected="True">Use a genome fasta file from the current history</option>
+                <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference fasta file</option>
+            </param>
+            <when value="indexed">
+                <param name="genome" type="select" optional="false" label="Select a built in reference genome or custom genome" 
+                  help="If not listed, add a custom genome or use a reference genome from the history">
+                    <options from_data_table="all_fasta">
+                        <validator message="No genomes are available " type="no_options"/>
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome" type="data" format="fasta" optional="false" label="Select the reference genome fasta from the current history"/>
+            </when>
+            <when value="uri">
+                <param name="uri" type="text" label="URI pointing to the reference genome fasta file" help=""/>
+            </when>
+          </conditional>
+          <conditional name="condrnaseq">
+            <param name="rna_type_select" type="select" label="RNA sequence data source" help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI">
+                <option selected="True" value="list">Type in a list of SRA identifiers and/or remote RNA-seq fasta URI</option>
+                <option value="history">Select one or more RNA-seq fastq datasets from the current history</option>
+            </param>
+            <when value="history">
+              <param name="rnaseq" type="data" format="fastqsanger, fastqsanger.gz" optional="false" multiple="true" 
+                label="Select multiple RNA-seq fastqsanger inputs from the current history" help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/>
+            </when>
+            <when value="list">
+                <param name="rnaseq" type="text" area="true" optional="false" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines"
+                    help="Either a working URI for a RNA-seq fasta, or a bare SRA identifier will work - can be mixed">
+                    <validator type="empty_field"/>
+                </param>
+            </when>
+          </conditional>
+          <param name="xtra" type="text" area="true" optional="true" label="Additional yaml to append to the egapx.yaml configuration"
+                    help="Not normally needed but useful for testing additional configuration elements">
+            <sanitizer invalid_char="">
+                  <valid initial="string.printable">
+                </valid>
+            </sanitizer>
+          </param>
+      </when>
+    </conditional>
     </inputs>
     <outputs>
       <collection name="egapx_out" type="list" label="Outputs from egapx">
@@ -21,7 +115,7 @@
     </outputs>
     <tests>
       <test expect_test_failure="true">
-        <param name="yamlconfig" value="input.yaml"/>
+        <param name="yamlin" value="input.yaml"/>
         <output_collection name="egapx_out" type="list" count="8"/>
       </test>
     </tests>
@@ -31,7 +125,7 @@
 
 .. class:: warningmark
 
-**Proof of concept: a quick hack to run a NF workflow inside a specialised Galaxy tool wrapper**
+**Proof of concept: a hack to run a NF workflow inside a specialised Galaxy tool wrapper**
 
 EGAPx is a big, complicated Nextflow workflow, challenging and costly to re-implement **properly**, requiring dozens of new tools and replicating a lot of
 complicated *groovy* workflow logic.
@@ -42,7 +136,7 @@
 required to convert Nextflow DDL into tools and WF logic. Balancing these competing requirements is a fundamental Galaxy challenge.
 
 
-EGAPx requires very substantial resources to run with real data. *128GB and 32 cores* are the minimum requirement; *256GB and 64 cores* are recommended.
+EGAPx requires very substantial resources to run with real data. *132GB and 32 cores* are the minimum requirement; *256GB and 64 cores* are recommended.
 
 A special minimal example that can be run in 6GB with 4 cores is provided as a yaml configuration and is used for the tool test.
 
--- a/test-data/input.yaml	Tue Aug 20 19:47:25 2024 +0000
+++ b/test-data/input.yaml	Mon Sep 09 22:07:18 2024 +0000
@@ -1,1 +1,17 @@
-#
+# This is a very minimal example of EGAPx, it fits into 4 CPU cores and 6GB of memory.
+# To be able to do this, we culled the input files and some stages of execution.
+# To limit the requirements you also need to use -e docker_minimal
+
+genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz
+reads:
+  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1
+  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2
+  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1
+  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2
+taxid: 6954
+proteins: []
+hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params
+tasks:
+  star_wnode:
+    star_wnode: -cpus-per-worker 4
+