Mercurial > repos > greg > vsnp_sample_names

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Jan 03 15:54:36 2021 +0000
@@ -0,0 +1,24 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">1.0</token>
+    <token name="@PROFILE@">19.09</token>
+    <xml name="param_reference_source">
+        <param name="reference_source" type="select" label="Choose the source for the reference genome">
+            <option value="cached" selected="true">locally cached</option>
+            <option value="history">from history</option>
+        </param>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @misc{None,
+                journal = {None},
+                author = {1. Stuber T},
+                title = {Manuscript in preparation},
+                year = {None},
+                url = {https://github.com/USDA-VS/vSNP},}
+            </citation>
+        </citations>
+    </xml>
+</macros>
+
--- a/vsnp_sample_names.xml	Tue Oct 27 18:26:58 2020 +0000
+++ b/vsnp_sample_names.xml	Sun Jan 03 15:54:36 2021 +0000
@@ -1,91 +1,100 @@
-<tool id="vsnp_sample_names" name="vSNP: sample names" version="1.0.0">
+<tool id="vsnp_sample_names" name="vSNP: sample names" version="@WRAPPER_VERSION@.1" profile="@PROFILE@">
     <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     <command detect_errors="exit_code"><![CDATA[
 #import os
 #import re
-#set output_dir = 'output'
-mkdir -p $output_dir
-#if str($input_type_cond.input_type) == "single":
-    ## We may have a single read or a pair, but in
-    ## either case we want the same base file name.
-    #set sample_name = $os.path.basename($input_type_cond.read.element_identifier)
-    #if $sample_name.find(".") > 0:
-        #set sample_name = $sample_name.split(".")[0]
-    #end if
-    #if $sample_name.find("_") > 0:
-        #set sample_name = $sample_name.split("_")[0]
+
+#set sample_name_read2 = None
+
+#if $input_type_cond.input_type in ['single', 'pair']:
+    #set read1 = $input_type_cond.read1
+    #set sample_name = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
+#else:
+    #set read1_filename = $input_type_cond.reads_collection['forward'].name
+    #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_filename))
+#end if
+
+#if $sample_name.find('_R1') >0:
+    ## Something like CMC_20E1_R1.fastq.gz
+    #set sample_name = $sample_name.split('_R1')[0]
+#else if $sample_name.find(".") > 0:
+    #if $read1.is_of_type('fastqsanger.gz'):
+        ## Something like my_sample.fastq.gz
+        #set sample_name = '.'.join($sample_name.split('.')[0:-2])
+    #else:
+        ## Something like my_sample.fastq
+        #set sample_name = $os.path.splitext($sample_name)[0]
     #end if
-    && echo '$sample_name' > '$output'
-#else:
-    #for $i in $input_type_cond.reads_collection:
-        #set sample_name = $os.path.basename($i.element_identifier)
-        #if $sample_name.find(".") > 0:
-            #set sample_name = $sample_name.split(".")[0]
-        #end if
-        #set output_file = $os.path.join($output_dir, $sample_name)
-        && echo '$sample_name' > '$output_file'
-    #end for
+#else if $sample_name.find("_") > 0:
+    #if $read1.is_of_type('fastqsanger.gz'):
+        ## Something like my_sample_fastq_gz
+        #set sample_name = '_'.join($sample_name.split('_')[0:-2])
+    #else:
+        ## Something like my_sample_fastq
+        #set sample_name = "_".join($sample_name.split("_")[0:-1])
+    #end if
 #end if
+echo '$sample_name' > '$output'
 ]]></command>
     <inputs>
         <conditional name="input_type_cond">
             <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
-                <option value="single" selected="true">Single files</option>
-                <option value="collection">Collections of files</option>
+                <option value="single" selected="true">Single dataset</option>
+                <option value="pair">Dataset pair</option>
+                <option value="paired">List of dataset pairs</option>
             </param>
             <when value="single">
-                <param name="read" type="data" format="fastqsanger.gz,fastqsanger" label="Sample file"/>
+                <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
             </when>
-            <when value="collection">
-                <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of sample files"/>
+            <when value="paired">
+                <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
+            </when>
+            <when value="pair">
+                <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
+                <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
             </when>
         </conditional>
     </inputs>
     <outputs>
-        <data name="output" format="txt">
-            <filter>input_type_cond['input_type'] == 'single'</filter>
-        </data>
-        <collection name="output__collection" type="list">
-            <discover_datasets pattern="__name__" directory="output" format="txt" />
-            <filter>input_type_cond['input_type'] == 'collection'</filter>
-        </collection>
+        <data name="output" format="txt"/>
     </outputs>
     <tests>
+        <!-- Single files -->
         <test>
-            <param name="input_type" value="collection"/>
+            <param name="input_type" value="single"/>
+            <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/>
+            <output name="output" file="sample_names.txt" ftype="txt"/>
+        </test>
+        <!-- Paired reads -->
+        <test>
+            <param name="input_type" value="paired"/>
             <param name="reads_collection">
-                <collection type="list">
-                    <element name="BCG_Danish_Human_UK_SRR9596061.fastq" value="BCG_Danish_Human_UK_SRR9596061.fastq" dbkey="89"/>
-                    <element name="Dassie_Dassie_ZA_SRR3745455.fastq" value="Dassie_Dassie_ZA_SRR3745455.fastq" dbkey="89"/>
-                    <element name="Mbov_Cattle_NI_SRR10993937.fastq" value="Mbov_Cattle_NI_SRR10993937.fastq" dbkey="89"/>
+                <collection type="paired">
+                    <element name="forward" value="CMC_20E1_R1.fastq.gz"/>
+                    <element name="reverse" value="CMC_20E1_R2.fastq.gz"/>
                 </collection>
             </param>
-            <output_collection name="output__collection" type="list">
-                <element name="BCG_Danish_Human_UK_SRR9596061" file="BCG_Danish_Human_UK_SRR9596061" ftype="txt"/>
-                <element name="Dassie_Dassie_ZA_SRR3745455" file="Dassie_Dassie_ZA_SRR3745455" ftype="txt"/>
-                <element name="Mbov_Cattle_NI_SRR10993937" file="Mbov_Cattle_NI_SRR10993937" ftype="txt"/>
-            </output_collection>
+            <output name="output" file="sample_names.txt" ftype="txt"/>
+        </test>
+        <!-- Paired reads in separate datasets -->
+        <test>
+            <param name="input_type" value="pair"/>
+            <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/>
+            <param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/>
+            <output name="output" file="sample_names.txt" ftype="txt"/>
         </test>
     </tests>
     <help>
 **What it does**

-Accepts one or more sample files and extracts a unique portion of the file name as the content of the output file(s).  These
-text files are then used as workflow parameter values for the Read Group Identifier parameter in the bwa-mem tool.
-
-**Required Options**
-
- * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads) based on the selected option.
+Accepts fastqsanger sample files, extracts a unique portion of the file name as the sample name, and writes it to
+the output.  The output text file can be consumed by the **Parse parameter value** expression tool to provide workflow
+parameter values to the **Read group identifier (ID)** and the **Sample name identifier (SM)**  parameters in the
+**Map with BWA-MEM** tool.
     </help>
-    <citations>
-        <citation type="bibtex">
-            @misc{None,
-            journal = {None},
-            author = {1. Stuber T},
-            title = {Manuscript in preparation},
-            year = {None},
-            url = {https://github.com/USDA-VS/vSNP},}
-        </citation>
-    </citations>
+    <expand macro="citations"/>
 </tool>