changeset 3:b557acca0b56 draft

planemo upload commit a7a086ce7d7d84f53d4a022fa1da25ef7b9a5b9a
author iuc
date Fri, 20 Jul 2018 03:50:03 -0400 (2018-07-20)
parents 2cf36d9ea571
children 70cb5527defb
files test-data/fc.ENSDARG00000019692.counts.name test-data/fc.ENSDARG00000019692.counts.test umi-tools_counts.xml
diffstat 3 files changed, 86 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fc.ENSDARG00000019692.counts.name	Fri Jul 20 03:50:03 2018 -0400
@@ -0,0 +1,2 @@
+gene	fc_ENSDARG00000019692_ACCAGA	fc_ENSDARG00000019692_ACGTTG	fc_ENSDARG00000019692_ACTCTG	fc_ENSDARG00000019692_AGACAG	fc_ENSDARG00000019692_AGTGTC	fc_ENSDARG00000019692_ATGTCG	fc_ENSDARG00000019692_CTAGGA	fc_ENSDARG00000019692_GAAGAC	fc_ENSDARG00000019692_GGTAAC	fc_ENSDARG00000019692_TGGTGA
+ENSDARG00000019692	2	1	1	1	1	1	1	1	2	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fc.ENSDARG00000019692.counts.test	Fri Jul 20 03:50:03 2018 -0400
@@ -0,0 +1,2 @@
+gene	test_ACCAGA	test_ACGTTG	test_ACTCTG	test_AGACAG	test_AGTGTC	test_ATGTCG	test_CTAGGA	test_GAAGAC	test_GGTAAC	test_TGGTGA
+ENSDARG00000019692	2	1	1	1	1	1	1	1	2	1
--- a/umi-tools_counts.xml	Mon Jul 16 17:35:28 2018 -0400
+++ b/umi-tools_counts.xml	Fri Jul 20 03:50:03 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.1">
+<tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.2">
     <description>performs quantification of UMIs from BAM files</description>
     <macros>
         <import>macros.xml</import>
@@ -10,36 +10,50 @@
     </macros>
     <expand macro="requirements" />
     <command detect_errors="exit_code"><![CDATA[
-    ln -s '${input_bam}' 'input.bam' &&
-    ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' &&
+#import re
+
+ln -s '${input_bam}' 'input.bam' &&
+ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' &&
 
-    umi_tools count
-            -I input.bam
-            '$paired'
-            --extract-umi-method='$barcodes.extract_umi_method.value'
-            #if str($barcodes.extract_umi_method) == 'read_id':
-            --umi-separator='$barcodes.umi_separator.value'
-            #else if str($barcodes.extract_umi_method) == 'tag':
-            --umi-tag='$barcodes.umi_tag.value'
-            --cell-tag='$barcodes.cell_tag.value'
-            #end if
-            --method='$method.value'
-            --edit-distance-threshold='$edit_distance_threshold'
-            --mapping-quality='$advanced.mapping_quality'
-            --per-gene
-            '$wide_format_cell_counts'
-            '$advanced.per_contig'
-            '$advanced.per_cell'
-            #if str($advanced.gene_tag) != "":
-            --gene-tag='$advanced.gene_tag.value'
-            #end if
-            #if str($advanced.skip_tags_regex) != "":
-            --skip-tags-regex='$advanced.skip_tags_regex.value'
-            #end if
-            #if '$advanced.random_seed' != 0:
-            --random-seed='$advanced.random_seed'
-            #end if
-            -S '$out_counts'
+umi_tools count
+    -I input.bam
+    '$paired'
+    --extract-umi-method='$barcodes.extract_umi_method.value'
+#if str($barcodes.extract_umi_method) == 'read_id':
+    --umi-separator='$barcodes.umi_separator.value'
+#else if str($barcodes.extract_umi_method) == 'tag':
+    --umi-tag='$barcodes.umi_tag.value'
+    --cell-tag='$barcodes.cell_tag.value'
+#end if
+    --method='$method.value'
+    --edit-distance-threshold='$edit_distance_threshold'
+    --mapping-quality='$advanced.mapping_quality'
+    --per-gene
+    '$wide_format_cell_counts'
+    '$advanced.per_contig'
+    '$advanced.per_cell'
+
+#if str($advanced.gene_tag) != "":
+    --gene-tag='$advanced.gene_tag.value'
+#end if
+#if str($advanced.skip_tags_regex) != "":
+    --skip-tags-regex='$advanced.skip_tags_regex.value'
+#end if
+#if '$advanced.random_seed' != 0:
+    --random-seed='$advanced.random_seed'
+#end if
+    -S '$out_counts'
+
+
+#if str($cond_extra.prepender) != "none":
+#set $replacer = re.sub('[^\w\_]+', '_', str($input_bam.element_identifier.rsplit('.',1)[0]))
+    #if str($cond_extra.prepender) == "string":
+#set $replacer = str($cond_extra.custom_label)
+    #end if
+
+&& sed -i -r '1s|\b([ACGT]+)\b|'"$replacer"'_\1|g' '$out_counts'
+#end if
+
     ]]></command>
     <inputs>
         <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" />
@@ -84,7 +98,7 @@
             <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library
 prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either
 -\-gene-tag or -\-per-contig option" /> -->
-            <param argument="--gene-tag" name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="XT" >
+            <param argument="--gene-tag" name="gene_tag" type="text" label="Deduplicate per gene." value="XT" help="The gene information is encoded in the bam read tag." >
                 <expand macro="sanitize_tag" />
             </param>
             <param argument="--skip-tags-regex" name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" >
@@ -111,6 +125,26 @@
             <param argument="--per-cell" name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="true" label="Group reads only if they have the same cell barcode." />
             <param argument="--random-seed" name="random_seed" type="integer" min="0" value="0" label="Random Seed" />
         </section>
+        <conditional name="cond_extra" >
+            <param name="prepender" type="select" label="Prepend a label to all column headers" help="This preserves uniqueness when merging with other files with the same headers. Note: filename must not contain a '.' character" >
+                <option value="none" selected="true" >No modifications</option>
+                <option value="string">Custom Label</option>
+                <option value="dataset name">Dataset Name</option>
+            </param>
+            <when value="none"></when>
+            <when value="dataset name"></when>
+            <when value="string">
+                <param name="custom_label" type="text" label="Label to Prepend" >
+                    <sanitizer invalid_char="">
+                        <valid initial="string.letters,string.digits">
+                            <add value="-"/>
+                            <add value="_"/>
+                            <add value="."/>
+                        </valid>
+                    </sanitizer>
+                </param>
+            </when>
+        </conditional>
     </inputs>
     <outputs>
         <data name="out_counts" format="tabular" />
@@ -154,6 +188,23 @@
             <param name="method" value="unique" />
             <output name="out_counts" value="fc.ENSDARG00000019692.counts" />
         </test>
+        <test><!-- count ENSDARG00000019692, relabel string -->
+            <param name="input_bam" value="fc.ENSDARG00000019692.bam" />
+            <param name="method" value="unique" />
+            <conditional name="cond_extra" >
+                <param name="prepender" value="string" />
+                <param name="custom_label" value="test" />
+            </conditional>
+            <output name="out_counts" value="fc.ENSDARG00000019692.counts.test" />
+        </test>
+        <test><!-- count ENSDARG00000019692, relabel filename -->
+            <param name="input_bam" value="fc.ENSDARG00000019692.bam" />
+            <param name="method" value="unique" />
+            <conditional name="cond_extra" >
+                <param name="prepender" value="dataset name" />
+            </conditional>
+            <output name="out_counts" value="fc.ENSDARG00000019692.counts.name" />
+        </test>
     </tests>
     <help><![CDATA[