Mercurial > repos > greg > vsnp_sample_names

Binary file test-data/SRR14085881_forward has changed
Binary file test-data/SRR14085881_reverse has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_names2.txt	Fri Sep 03 17:21:52 2021 +0000
@@ -0,0 +1,1 @@
+SRR14085881
--- a/vsnp_sample_names.xml	Wed Aug 04 12:46:56 2021 +0000
+++ b/vsnp_sample_names.xml	Fri Sep 03 17:21:52 2021 +0000
@@ -4,16 +4,24 @@
         <import>macros.xml</import>
     </macros>
     <command detect_errors="exit_code"><![CDATA[
+#import difflib
 #import os
 #import re

-#set sample_name_read2 = None
-
-#if $input_type_cond.input_type in ['single', 'pair']:
+#if $input_type_cond.input_type == 'single':
+    #set read1 = $input_type_cond.read1
+    #set sample_name = re.sub('[^\s\w\-\\.]', '_', str($read1.element_identifier))
+#else if $input_type_cond.input_type == 'pair':
     #set read1 = $input_type_cond.read1
-    #set sample_name = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
+    #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
+    #set read2 = $input_type_cond.read2
+    #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
+    #set matches = difflib.SequenceMatcher(None, read1_identifier, read2_identifier).get_matching_blocks()
+    #set match = $matches[0]
+    #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_identifier[match.a:match.a + match.size]))
 #else:
-    #set read1_filename = $input_type_cond.reads_collection['forward'].name
+    #set read1 = $input_type_cond.reads_collection['forward']
+    #set read1_filename = $read1.name
     #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_filename))
 #end if

@@ -21,7 +29,7 @@
     ## Something like CMC_20E1_R1.fastq.gz
     #set sample_name = $sample_name.split('_R1')[0]
 #else if $sample_name.find(".") > 0:
-    #if $read1.is_of_type('fastqsanger.gz'):
+    #if $read1.is_of_type('fastqsanger.gz') and $sample_name.endswith('gz'):
         ## Something like my_sample.fastq.gz
         #set sample_name = '.'.join($sample_name.split('.')[0:-2])
     #else:
@@ -29,7 +37,7 @@
         #set sample_name = $os.path.splitext($sample_name)[0]
     #end if
 #else if $sample_name.find("_") > 0:
-    #if $read1.is_of_type('fastqsanger.gz'):
+    #if $read1.is_of_type('fastqsanger.gz') and $sample_name.endswith('gz'):
         ## Something like my_sample_fastq_gz
         #set sample_name = '_'.join($sample_name.split('_')[0:-2])
     #else:
@@ -37,6 +45,7 @@
         #set sample_name = "_".join($sample_name.split("_")[0:-1])
     #end if
 #end if
+
 echo '$sample_name' > '$output'
 ]]></command>
     <inputs>
@@ -49,13 +58,13 @@
             <when value="single">
                 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
             </when>
-            <when value="paired">
-                <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
-            </when>
             <when value="pair">
                 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
                 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
             </when>
+            <when value="paired">
+                <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
+            </when>
         </conditional>
     </inputs>
     <outputs>
@@ -68,7 +77,14 @@
             <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/>
             <output name="output" file="sample_names.txt" ftype="txt"/>
         </test>
-        <!-- Paired reads -->
+        <!-- Paired reads in separate datasets -->
+        <test>
+            <param name="input_type" value="pair"/>
+            <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/>
+            <param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/>
+            <output name="output" file="sample_names.txt" ftype="txt"/>
+        </test>
+        <!-- Collection of Paired reads -->
         <test>
             <param name="input_type" value="paired"/>
             <param name="reads_collection">
@@ -79,12 +95,16 @@
             </param>
             <output name="output" file="sample_names.txt" ftype="txt"/>
         </test>
-        <!-- Paired reads in separate datasets -->
+        <!-- Collection of Paired reads -->
         <test>
-            <param name="input_type" value="pair"/>
-            <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/>
-            <param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/>
-            <output name="output" file="sample_names.txt" ftype="txt"/>
+            <param name="input_type" value="paired"/>
+            <param name="reads_collection">
+                <collection type="paired">
+                    <element name="forward" value="SRR14085881_forward"/>
+                    <element name="reverse" value="SRR14085881_reverse"/>
+                </collection>
+            </param>
+            <output name="output" file="sample_names2.txt" ftype="txt"/>
         </test>
     </tests>
     <help>