changeset 18:83f9b1d86951 draft

planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/ncbi_egapx commit 8214876a80a4416d2614c7227b22a436489f59cf
author richard-burhans
date Fri, 20 Dec 2024 21:26:46 +0000
parents e0de8669b340
children 5b24ea81a6f8
files macros.xml ncbi_egapx.xml test-data/input.yaml
diffstat 3 files changed, 162 insertions(+), 65 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Mon Nov 18 17:24:04 2024 +0000
+++ b/macros.xml	Fri Dec 20 21:26:46 2024 +0000
@@ -4,9 +4,9 @@
             <container type="docker">quay.io/galaxy/egapx:@TOOL_VERSION@</container>
         </requirements>
     </xml>
-    <token name="@TOOL_VERSION@">0.2-alpha</token>
-    <token name="@VERSION_SUFFIX@">7</token>
-    <token name="@PROFILE@">22.05</token>
+    <token name="@TOOL_VERSION@">0.3.1-alpha</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">24.2</token>
     <xml name="edam_ontology">
         <edam_operations>
             <edam_operation>operation_0362</edam_operation>
--- a/ncbi_egapx.xml	Mon Nov 18 17:24:04 2024 +0000
+++ b/ncbi_egapx.xml	Fri Dec 20 21:26:46 2024 +0000
@@ -6,8 +6,43 @@
     <expand macro="edam_ontology"/>
     <expand macro="requirements"/>
     <command detect_errors="aggressive"><![CDATA[
-    #if str($cond_input_style.input_style) == "fillform"
+    export NXF_DEBUG=3 &&
+    echo \${PWD} &&
+    #if $cond_input_style.input_style == "fillform"
         #set yamlconfig = $egapx_config
+        ## The EGAPx pipeline code determines that a file is gzipped if it has a '.gz' extension.
+        ## This code creates symlinks with the appropriate extension.
+        #if $cond_input_style.cond_genome_style.genome_style == "history"
+            #set $genome_pathname = "genome_" + re.sub('[^\w\-\s]', '_', str($genome.element_identifier)) + "." + $genome.ext
+            ln -s '$genome' '$genome_pathname' &&
+        #else if $cond_input_style.cond_genome_style.genome_style == "indexed"
+            #set $genome_pathname = "genome_" + re.sub('[^\w\-\s]', '_', str($genome.fields.element_identifier)) + "." + $genome.fields.ext
+            ln -s '$genome.fields.path' '$genome_pathname' &&
+        #end if
+        #if $cond_input_style.cond_rnaseq_style.rnaseq_style == "history"
+            #import re
+            mkdir -p reads &&
+            #for $idx, $read in enumerate($cond_input_style.cond_rnaseq_style.rnaseq)
+                #if $read
+                    #set $read_pathname = "reads/" + str($idx) + "_" + re.sub('[^\w\-\s]', '_', str($read.element_identifier)) + "." + $read.ext
+                    ln -s '$read' '$read_pathname' &&
+                #end if
+            #end for
+            #for $repeat_idx, $repeat_entry in enumerate($cond_input_style.cond_rnaseq_style.reads_lists)
+                #for $idx, $collection in enumerate($repeat_entry.rnaseq_single)
+                    #set $read_pathname = "reads/" + str($repeat_idx) + str($idx) + "_" + re.sub('[^\w\-\s]', '_', str($collection.element_identifier)) + "." + $collection.ext
+                    ln -s '$collection' '$read_pathname' &&
+                #end for
+            #end for
+            #for $repeat_idx, $repeat_entry in enumerate($cond_input_style.cond_rnaseq_style.reads_paired_lists)
+                #for $idx, $collection in enumerate($repeat_entry.rnaseq_paired)
+                    #set $read_pathname = "reads/" + str($repeat_idx) + str($idx) + "_" + $re.sub('[^\w\-\s]', '_', str($collection.forward.element_identifier)) + "." + $collection.forward.ext
+                    ln -s '${collection.forward}' '$read_pathname' &&
+                    #set $read_pathname = "reads/" + str($repeat_idx) + str($idx) + "_" + re.sub('[^\w\-\s]', '_', str($collection.reverse.element_identifier)) + "." + $collection.reverse.ext
+                    ln -s '${collection.reverse}' '$read_pathname' &&
+                #end for
+            #end for
+        #end if
     #else
         #set yamlconfig = $yamlin
     #end if
@@ -21,32 +56,56 @@
     python3 /galaxy/egapx/ui/egapx.py '$yamlconfig' -e galaxy -o 'egapx_out'
     ]]></command>
     <configfiles>
+        <configfile name="reads_config"><![CDATA[
+        #if $cond_input_style.input_style == "fillform" and $cond_input_style.cond_rnaseq_style.rnaseq_style == "history"
+            #import re
+            #for $idx, $read in enumerate($cond_input_style.cond_rnaseq_style.rnaseq)
+                #if $read
+                    #set $read_pathname = "reads/" + str($idx) + re.sub('[^\w\-\s]', '_', str($read.element_identifier)) + "." + $read.ext
+${idx}_${read.name} $read_pathname
+                #end if
+            #end for
+            #for $repeat_idx, $repeat_entry in enumerate($cond_input_style.cond_rnaseq_style.reads_lists)
+                #for $idx, $collection in enumerate($repeat_entry.rnaseq_single)
+                    #set $read_pathname = "reads/" + str($repeat_idx) + str($idx) + re.sub('[^\w\-\s]', '_', str($collection.element_identifier)) + "." + $collection.ext
+${repeat_idx}${idx}_${collection.name} $read_pathname
+                #end for
+            #end for
+            #for $repeat_idx, $repeat_entry in enumerate($cond_input_style.cond_rnaseq_style.reads_paired_lists)
+                #for $idx, $collection in enumerate($repeat_entry.rnaseq_paired)
+                    #set $forward_read_pathname = "reads/" + str($repeat_idx) + str($idx) + re.sub('[^\w\-\s]', '_', str($collection.forward.element_identifier)) + "." + $collection.forward.ext
+                    #set $reverse_read_pathname = "reads/" + str($repeat_idx) + str($idx) + re.sub('[^\w\-\s]', '_', str($collection.reverse.element_identifier)) + "." + $collection.reverse.ext
+${repeat_idx}${idx}_${collection.name} $forward_read_pathname $reverse_read_pathname
+                #end for
+            #end for
+        #end if
+        ]]></configfile>
         <configfile name="egapx_config"><![CDATA[
-#if str($cond_input_style.input_style) == "fillform"
+#if $cond_input_style.input_style == "fillform"
+    #import re
+    #if $cond_input_style.cond_genome_style.genome_style == "history"
+        #set genome_value = "genome_" + re.sub('[^\w\-\s]', '_', str($genome.element_identifier)) + "." + $genome.ext
+    #else if $cond_input_style.cond_genome_style.genome_style == "indexed"
+        #set genome_value = "genome_" + re.sub('[^\w\-\s]', '_', str($genome.fields.element_identifier)) + "." + $genomefields.ext
+    #else
+        #set genome_value = $uri
+    #end if
 # yaml generated by ncbi_egapx.xml
-    #if str($cond_input_style.cond_genome_style.genome_style) == "history"
-        #set genome_value = $cond_input_style.cond_genome_style.genome
-    #elif str($cond_input_style.cond_genome_style.genome_style) == "indexed"
-        #set genome_value = $cond_input_style.cond_genome_style.genome.fields.path
-    #else
-        #set genome_value = $cond_input_style.cond_genome_style.uri
-    #end if
 genome: $genome_value
-taxid: $cond_input_style.taxid
-    #if str($cond_input_style.cond_rnaseq_style.rnaseq_style) == "history"
-        #set reads_values = $cond_input_style.cond_rnaseq_style.rnaseq
+taxid: $taxid
+    #if $cond_input_style.cond_rnaseq_style.rnaseq_style == "history"
+reads: $reads_config
     #else
-        #set reads_values = $cond_input_style.cond_rnaseq_style.rnaseq.split()
+        #set reads_values = $rnaseq.split()
+        #for $read in [str(rv).strip() for rv in $reads_values]
+  - $read
+        #end for
     #end if
-reads:
-    #for r in [str(rv).strip() for rv in $reads_values]
-  - $r
-    #end for
     #if str($cond_input_style.proteins) != "None"
-proteins: $cond_input_styleproteins
+proteins: $cond_input_style.proteins
     #end if
-    #if str($cond_input_style.xtra) != "None"
-        #for row in str($cond_input_style.xtra).strip().split("\n")
+    #if str($cond_input_style.extra) != "None"
+        #for row in str($cond_input_style.extra).strip().split("\n")
 $row
         #end for
     #end if
@@ -55,15 +114,13 @@
     </configfiles>
     <inputs>
         <conditional name="cond_input_style">
-            <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?"
-                help="Use the tool form to select inputs from the history, or use a pre-prepared yaml file.">
+            <param name="input_style" type="select" label="Fill in a tool form or use an existing yaml configuration from the current history?" help="Use the tool form to select inputs from the history, or use a pre-prepared yaml file.">
                 <option value="fillform" selected="True">Provide configuration details for conversion into a configuration yaml</option>
                 <option value="history">Use a pre-prepared yaml egapx configuration</option>
-            </param> 
+            </param>
             <when value="fillform">
                 <conditional name="cond_genome_style">
-                    <param name="genome_style" type="select" label="Reference genome source for mapping supplied RNA-seq reads"
-                        help="Select a built in, history or remote URI for the reference genome FASTA">
+                    <param name="genome_style" type="select" label="Reference genome source for mapping supplied RNA-seq reads" help="Select a built in, history or remote URI for the reference genome FASTA">
                         <option value="history" selected="True">Use a genome FASTA file from the current history</option>
                         <option value="indexed">Use a Galaxy server built-in genome</option>
                         <option value="uri">Provide a remote web link URI ("https://...") pointing at the required genome reference FASTA file</option>
@@ -72,8 +129,7 @@
                         <param name="genome" type="data" format="fasta" label="Select the reference genome FASTA from the current history"/>
                     </when>
                     <when value="indexed">
-                        <param name="genome" type="select" label="Select a built in reference genome or custom genome"
-                            help="If not listed, add a custom genome or use a reference genome from the history">
+                        <param name="genome" type="select" label="Select a built in reference genome or custom genome" help="If not listed, add a custom genome or use a reference genome from the history">
                             <options from_data_table="all_fasta">
                                 <validator message="No genomes are available " type="no_options"/>
                             </options>
@@ -83,30 +139,31 @@
                         <param name="uri" type="text" label="URI pointing to the reference genome FASTA file"/>
                     </when>
                 </conditional>
-
                 <param name="taxid" type="text" label="NCBI Taxon ID" help="Used to identify the HMM model files needed">
                     <validator type="regex" message="Numeric">^[0-9]+$</validator>
                 </param>
-
                 <conditional name="cond_rnaseq_style">
-                    <param name="rnaseq_style" type="select" label="RNA sequence data source"
-                        help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI">
+                    <param name="rnaseq_style" type="select" label="RNA sequence data source" help="Select RNAseq input data from history or input a list of SRA identifiers or remote URI">
                         <option value="history" selected="True">Select one or more RNA-seq fastq datasets from the current history</option>
                         <option value="list">Type in a list of SRA identifiers and/or remote RNA-seq FASTA URI</option>
                     </param>
                     <when value="history">
-                        <param name="rnaseq" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Select multiple RNA-seq fastqsanger inputs from the current history"
-                            help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/>
+                        <param name="rnaseq" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" optional="true" label="Select multiple RNA-seq fastqsanger inputs from the current history" help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/>
+                        <repeat name="reads_lists" title="Single-end reads" min="0">
+                            <param name="rnaseq_single" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Select multiple RNA-seq fastqsanger inputs from the current history" help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/>
+                        </repeat>
+                        <repeat name="reads_paired_lists" title="Paired-end reads" min="0">
+                            <param name="rnaseq_paired" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list:paired" label="Select multiple RNA-seq fastqsanger inputs from the current history" help="All selected rna-seq fastqsanger will be added to the yaml for egapx configuration"/>
+                        </repeat>
                     </when>
                     <when value="list">
-                        <param name="rnaseq" type="text" area="true" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines"
-                            help="Either a working URI for a RNA-seq FASTA, or a bare SRA identifier will work - can be mixed">
+                        <param name="rnaseq" type="text" area="true" label="List all required individual RNA-seq URI or SRA identifiers, separated by spaces or newlines" help="Either a working URI for a RNA-seq FASTA, or a bare SRA identifier will work - can be mixed">
                             <validator type="empty_field"/>
                         </param>
                     </when>
                 </conditional>
                 <param name="proteins" type="data" format="fasta,fasta.gz" optional="true" label="Select a protein set"/>
-                <param name="xtra" type="text" area="true" optional="true" label="Additional yaml to append to the egapx.yaml configuration"
+                <param name="extra" type="text" area="true" optional="true" label="Additional yaml to append to the egapx.yaml configuration"
                     help="Not normally needed but useful for testing additional configuration elements">
                     <sanitizer invalid_char="">
                         <valid initial="string.printable"/>
@@ -136,26 +193,74 @@
             <param name="uri" value="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz"/>
             <param name="rnaseq_style" value="list"/>
             <param name="rnaseq" value="https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2"/>
-            <param name="xtra" value="hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params&#10;tasks:&#10;  star_wnode:&#10;    star_wnode: -cpus-per-worker 4"/>
-            <output name="output"><assert_contents><has_size min="1"/></assert_contents></output>
+            <param name="extra" value="hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params&#10;tasks:&#10;  star_wnode:&#10;    star_wnode: -cpus-per-worker 4"/>
+            <output name="output">
+                <assert_contents>
+                    <has_size min="1"/>
+                </assert_contents>
+            </output>
             <output_collection name="nextflow_stats" type="list">
-                <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element>
-                <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element>
-                <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element>
-                <element name="nf_timeline"><assert_contents><has_size min="1"/></assert_contents></element>
-                <element name="nf_params"><assert_contents><has_size min="1"/></assert_contents></element>
+                <element name="nf_log">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
+                <element name="nf_report">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
+                <element name="nf_trace">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
+                <element name="nf_timeline">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
+                <element name="nf_params">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <test expect_test_failure="true">
             <param name="input_style" value="history"/>
             <param name="yamlin" value="input.yaml"/>
-            <output name="output"><assert_contents><has_size min="1"/></assert_contents></output>
+            <output name="output">
+                <assert_contents>
+                    <has_size min="1"/>
+                </assert_contents>
+            </output>
             <output_collection name="nextflow_stats" type="list">
-                <element name="nf_log"><assert_contents><has_size min="1"/></assert_contents></element>
-                <element name="nf_report"><assert_contents><has_size min="1"/></assert_contents></element>
-                <element name="nf_trace"><assert_contents><has_size min="1"/></assert_contents></element>
-                <element name="nf_timeline"><assert_contents><has_size min="1"/></assert_contents></element>
-                <element name="nf_params"><assert_contents><has_size min="1"/></assert_contents></element>
+                <element name="nf_log">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
+                <element name="nf_report">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
+                <element name="nf_trace">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
+                <element name="nf_timeline">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
+                <element name="nf_params">
+                    <assert_contents>
+                        <has_size min="1"/>
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
     </tests>
--- a/test-data/input.yaml	Mon Nov 18 17:24:04 2024 +0000
+++ b/test-data/input.yaml	Fri Dec 20 21:26:46 2024 +0000
@@ -1,15 +1,7 @@
-# This is a very minimal example of EGAPx, it fits into 4 CPU cores and 6GB of memory.
-# To be able to do this, we culled the input files and some stages of execution.
-# To limit the requirements you also need to use -e docker_minimal
-
-genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz
+genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/020/809/275/GCA_020809275.1_ASM2080927v1/GCA_020809275.1_ASM2080927v1_genomic.fna.gz
 taxid: 6954
 reads:
-  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.1
-  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572.2
-  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.1
-  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248.2
-hmm: https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/gnomon/hmm_parameters/6956.params
-tasks:
-  star_wnode:
-    star_wnode: -cpus-per-worker 4
+  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572_1.gz
+  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR8506572_2.gz
+  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248_1.gz
+  - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/sample_data/Dermatophagoides_farinae_small/SRR9005248_2.gz