diff nf/subworkflows/ncbi/utilities.nf @ 0:d9c5c5b87fec draft

planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author fubar
date Sat, 03 Aug 2024 11:16:53 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nf/subworkflows/ncbi/utilities.nf	Sat Aug 03 11:16:53 2024 +0000
@@ -0,0 +1,162 @@
+#!/usr/bin/env nextflow
+nextflow.enable.dsl=2
+
+
+// Analog of shlex.split
+def List<String> shellSplit(CharSequence s) {
+    List<String> tokens = []
+    boolean escaping = false
+    char quoteChar = ' '
+    boolean quoting = false
+    int lastCloseQuoteIndex = Integer.MIN_VALUE
+    StringBuilder current = new StringBuilder()
+    
+    s.eachWithIndex { c, i ->
+        if (escaping) {
+            current.append(c)
+            escaping = false
+        // } else if (c == '\\' && !(quoting && quoteChar == '\'')) {
+        } else if (c == '\\' && !quoting) {
+            escaping = true
+        } else if (quoting && c == quoteChar) {
+            quoting = false
+            lastCloseQuoteIndex = i
+        } else if (!quoting && (c == '\'' || c == '"')) {
+            quoting = true
+            quoteChar = c
+        } else if (!quoting && c.isAllWhitespace()) {
+            if (current.size() > 0 || lastCloseQuoteIndex == (i - 1)) {
+                tokens.add(current.toString())
+                current = new StringBuilder()
+            }
+        } else {
+            current.append(c)
+        }
+    }
+    if (current.size() > 0 || lastCloseQuoteIndex == (s.length() - 1)) {
+        tokens.add(current.toString())
+    }
+
+    return tokens
+}
+
+
+// Convert a parameter list into a map
+def Map<String, String> to_map(List<String> list )
+{
+    def map = [:]
+    def s = list.size()
+    def i = 0
+    while (i < s)
+    {
+        def elem = list.get(i)
+        i = i + 1
+        if (elem.size() > 0 && elem[0] == '-')
+        {
+            if (i < s) {
+                def val = list.get(i)
+                if ( val.size() > 0 && (val[0] != '-' || val.contains(' ')) )
+                {
+                    map[elem] = val
+                    i = i + 1
+                } else {
+                    map[elem] = ""
+                }
+            } else {
+                map[elem] = ""
+            }
+        } else {
+            println("Error: parameter string not well formed, map ${map}, elem ${elem}, i ${i}, s ${s}")
+            return map
+        }
+    }
+    return map
+}
+
+
+def quote(String s)
+{
+    if (s.size() > 0 && !(s =~ /[^\w@%+=:,.\/-]/)) {
+        return s
+    }
+    return "'" + s + "'"
+}
+
+
+// Read a section of the parameters and merge them into the default parameters
+// Parameters:
+//   default_params: the default parameters, string
+//   parameters: the parameters as a map from string to string
+//   section_name: the name of the section in the parameters map to use
+// Return: the merged parameters
+def merge_params(default_params, parameters, section_name)
+{
+    def section = parameters.get(section_name, "")
+    def update_map = to_map(shellSplit(section))
+    def default_params_map = to_map(shellSplit(default_params))
+    default_params_map.putAll(update_map)
+    def l = []
+    default_params_map.each { parameter, value  ->
+        l << quote(parameter)
+        if (value.size() > 0) {
+            l << quote(value)
+        }
+    }
+
+    return l.join(" ")
+}
+
+
+
+process clean_fasta_ids {
+    input:
+        path fasta_in
+    output:
+        path "fasta_out", emit: 'fasta_out'
+    script:
+    """
+    ## turns Fasta inputs formatted with multi-part IDs into
+    ## single-part IDs, like
+    ## >gi|1234|ref|NW_1234.1 Some Defline For This Org
+    ## >gi|1234 Some Defline For This Org
+    ## LDS chokes on the multi-part IDs.
+    # the base64 nonsense is because I couldnt get it to not complain about the regex as syntax errors in some way.
+    # its just this:
+    # import re,sys;
+    # for l in sys.stdin:
+    #     <I had to delete the regex here because even in a comment nextflow lost it>
+    echo 'aW1wb3J0IHJlLHN5czsKZm9yIGwgaW4gc3lzLnN0ZGluOgogICAgcHJpbnQocmUuc3ViKHIiXig+' > reol.b64
+    echo 'Z2lcfFxkKylcfD8oW2Etel0rXHxbQS1aX10rW1xkXC5dK1x8KSguKikiLCAiXGc8MT5cZzwzPiIs' >> reol.b64
+    echo 'IGwuc3RyaXAoKSkpCg==' >> reol.b64
+    base64 -d ./reol.b64 > ./reol.py
+    cat ${fasta_in} | python reol.py > ./fasta_out
+    """
+    stub:
+    """
+    touch ./fasta.out
+    """
+}
+
+
+
+process multireader {
+    input:
+        path fasta_file
+        val parameters
+    output:
+        path ('output/fasta_file.asnt')  , emit: 'multireader_file'
+    script:
+    """
+    mkdir -p output
+    if [ -n "$fasta_file" ]; then
+        multireader $parameters -out-format asn_text  -input $fasta_file  -output output/fasta_file.asnt
+    else
+         touch output/fasta_file.asnt
+    fi
+    """
+    stub:
+    """
+        mkdir -p output
+        touch output/fasta_file.asnt
+    """
+}