diff umi-tools_extract.xml @ 5:f77bc14eba31 draft

planemo upload commit 57e3e460a740aa7aad217c8365527c49e88c9a30
author iuc
date Tue, 05 Jun 2018 19:44:38 -0400
parents e73a22ff585c
children 3cfd8e1073d7
line wrap: on
line diff
--- a/umi-tools_extract.xml	Mon Apr 16 16:38:40 2018 -0400
+++ b/umi-tools_extract.xml	Tue Jun 05 19:44:38 2018 -0400
@@ -8,13 +8,15 @@
     @COMMAND_LINK@
 
     umi_tools extract
+            --extract-method='$extract_method.value'
             --bc-pattern='$bc_pattern'
+
             #if $input_type.type == 'single':
                 #if $gz:
                     --stdin=input_single.gz
                     --stdout out.gz
                 #else
-                    --stdin='$input_type.input_single'
+                    --stdin=input_single.txt
                     --stdout '$out'
                 #end if
             #else:
@@ -24,8 +26,8 @@
                     --stdout out1.gz
                     --read2-out=out2.gz
                 #else:
-                    --stdin='$input_type.input_read1'
-                    --read2-in='$input_type.input_read2'
+                    --stdin=input_read1.txt
+                    --read2-in=input_read2.txt
                     --stdout '$out1'
                     --read2-out='$out2'
                 #end if
@@ -34,6 +36,13 @@
                     --bc-pattern2='$input_type.barcode.bc_pattern2'
                 #end if
             #end if
+
+            #if $barcodes.use_barcodes.value == 'yes':
+                --filter-cell-barcode
+                --whitelist='$barcodes.filter_barcode_file'
+                '$barcodes.filter_correct.value'
+            #end if
+
             #if not $prime3:
                 --3prime
             #end if
@@ -57,12 +66,49 @@
     ]]></command>
     <inputs>
         <expand macro="input_types" />
+
+        <conditional name="barcodes" >
+            <param name="use_barcodes" argument="--filter-cell-barcode" type="select" label="Use Known Barcodes?" >
+                <option value="yes">Yes</option>
+                <option value="no" selected="true" >No</option>
+            </param>
+            <when value="no" />
+            <when value="yes" >
+                <param name="filter_barcode_file" type="data" format="tsv" label="Barcode File" />
+                <param name="filter_correct" argument="--error-correct-cell" type="boolean" truevalue="--error-correct-cell" falsevalue="" checked="false" label="Apply correction to cell barcodes?" help="This only applies if your barcode file has two columns output from the umi_tools whitelist command." />
+            </when>
+        </conditional>
+
+        <param name="extract_method" type="select" label="Method to extract barcodes" >
+            <option value="regex">Regular Expressions</option>
+            <option value="string" selected="true">String</option>
+        </param>
+            
         <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
             help="Use this option to specify the format of the UMI/barcode. Use Ns to
                     represent the random positions and Xs to indicate the bc positions.
                     Bases with Ns will be extracted and added to the read name. Remaining
                     bases, marked with an X will be reattached to the read.">
+            <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits">
+                    <add value="!="/>
+                    <add value="-"/>
+                    <add value="_"/>
+                    <add value="."/>
+                    <add value="?"/>
+                    <add value="&lt;"/><!-- left triangle bracket -->
+                    <add value="&gt;"/><!-- right triangle bracket -->
+                    <add value="&#91;"/> <!-- left square bracket -->
+                    <add value="&#93;"/> <!-- right square bracket -->
+                    <add value="&#94;"/> <!-- caret -->
+                    <add value="&#123;"/> <!-- left curly -->
+                    <add value="&#125;"/> <!-- right curly -->
+                    <add value="&#40;"/> <!-- left parenthesis -->
+                    <add value="&#41;"/> <!-- right parenthesis -->
+                </valid>
+            </sanitizer>
         </param>
+
         <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?"
             truevalue="1" falsevalue="0" checked="true"
             help="By default the barcode is assumed to be on the 5' end of the read, but
@@ -126,17 +172,37 @@
             <output name="out_log" file="out_paired.log" lines_diff="16"/>
         </test>
         <test>
-            <param name="type" value="paired_collection" />
+            <param name="type" value="paired_collection" /> <!-- same as before, but uncompressed -->
             <param name="input_readpair" >
                 <collection type="paired">
-                    <element name="forward" ftype="fastq.gz" value="t_R1.fastq.gz" />
-                    <element name="reverse" ftype="fastq.gz" value="t_R2.fastq.gz" />
+                    <element name="forward" ftype="fastq" value="t_R1.fastq" />
+                    <element name="reverse" ftype="fastq" value="t_R2.fastq" />
                 </collection>
             </param>
             <param name="bc_pattern" value="NNNXXX" />
             <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" />
             <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" />
-            <output name="out_log" file="out_paired.log" lines_diff="16"/>
+            <output name="out_log" file="out_paired.log" lines_diff="25" />
+        </test>
+        <test>
+            <param name="type" value="paired" />
+            <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" />
+            <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" />
+            <param name="extract_method" value="string" />
+            <param name="bc_pattern" value="CCCCCCNNNNNNNNNN" />
+            <param name="use_barcodes" value="yes" />
+            <param name="filter_barcode_file" value="scrb_seq_barcodes" />
+            <output name="out2" file="scrb_extract.fastq.gz" decompress="true" />
+        </test>
+        <test><!-- same as above but with regex barcode-->
+            <param name="type" value="paired" />
+            <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" />
+            <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" />
+            <param name="extract_method" value="regex" />
+            <param name="bc_pattern" value="^(?P&lt;cell_1&gt;.{6})(?P&lt;umi_1&gt;.{10})" />
+            <param name="use_barcodes" value="yes" />
+            <param name="filter_barcode_file" value="scrb_seq_barcodes" />
+            <output name="out2" file="scrb_extract.fastq.gz" decompress="true" />
         </test>
     </tests>
     <help><![CDATA[