Repository 'umi_tools_dedup'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/umi_tools_dedup

Changeset 11:7fa28eb10fed (2021-02-10)
Previous changeset 10:c6567483aa1e (2019-12-05) Next changeset 12:4098ab380097 (2021-09-13)
Commit message:
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
modified:
umi-tools_dedup.xml
added:
test-data/stats_outputs_edit_distance.tsv
test-data/stats_outputs_per_umi.tsv
test-data/stats_outputs_per_umi_per_position.tsv
b
diff -r c6567483aa1e -r 7fa28eb10fed test-data/stats_outputs_edit_distance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats_outputs_edit_distance.tsv Wed Feb 10 19:30:35 2021 +0000
b
@@ -0,0 +1,3 @@
+unique unique_null directional directional_null edit_distance
+491 491 491 491 Single_UMI
+0 0 0 0 0
b
diff -r c6567483aa1e -r 7fa28eb10fed test-data/stats_outputs_per_umi.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats_outputs_per_umi.tsv Wed Feb 10 19:30:35 2021 +0000
b
@@ -0,0 +1,17 @@
+UMI median_counts_pre times_observed_pre total_counts_pre median_counts_post times_observed_post total_counts_post
+AA 1 34 35 1 34 35
+AC 1 35 35 1 35 35
+AG 1 25 25 1 25 25
+AT 1 50 50 1 50 50
+CA 1 27 27 1 27 27
+CC 1 21 21 1 21 21
+CG 1 11 11 1 11 11
+CT 1 23 24 1 23 24
+GA 1 24 24 1 24 24
+GC 1 23 23 1 23 23
+GG 1 31 32 1 31 32
+GT 1 27 28 1 27 28
+TA 1 61 63 1 61 63
+TC 1 24 24 1 24 24
+TG 1 27 27 1 27 27
+TT 1 48 48 1 48 48
b
diff -r c6567483aa1e -r 7fa28eb10fed test-data/stats_outputs_per_umi_per_position.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats_outputs_per_umi_per_position.tsv Wed Feb 10 19:30:35 2021 +0000
b
@@ -0,0 +1,4 @@
+counts instances_pre instances_post
+1 486 486
+2 4 4
+3 1 1
b
diff -r c6567483aa1e -r 7fa28eb10fed umi-tools_dedup.xml
--- a/umi-tools_dedup.xml Thu Dec 05 01:32:18 2019 -0500
+++ b/umi-tools_dedup.xml Wed Feb 10 19:30:35 2021 +0000
[
@@ -1,4 +1,4 @@
-<tool id="umi_tools_dedup" name="UMI-tools deduplicate" version="@VERSION@.0">
+<tool id="umi_tools_dedup" name="UMI-tools deduplicate" version="@VERSION@+galaxy1">
     <description>Extract UMI from fastq files</description>
     <macros>
         <import>macros.xml</import>
@@ -16,6 +16,7 @@
         #end if
 
         umi_tools dedup
+            '$output_stats_bool'
             --random-seed 0
             --extract-umi-method $extract_umi_method
             #if str($extract_umi_method) != 'read_id':
@@ -63,51 +64,71 @@
         <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." />
         <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" />
         <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." />
+        <param name="output_stats_bool" type="boolean" truevalue="--output-stats=stats_outputs" falsevalue="" checked="false" label="Output UMI related statistics files?"/>
     </inputs>
     <outputs>
         <data format="bam" name="output" />
+        <collection name="output_stats" type="list" label="UMI_tools dedup stats">
+            <filter>output_stats_bool</filter>
+            <data name="edit_distance" format="tabular" from_work_dir="stats_outputs_edit_distance.tsv"/>
+            <data name="per_umi" format="tabular" from_work_dir="stats_outputs_per_umi.tsv"/>
+            <data name="per_umi_per_position" format="tabular" from_work_dir="stats_outputs_per_umi_per_position.tsv"/>
+        </collection>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in1.sam" ftype="sam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="method" value="unique" />
             <output name="output" file="dedup_out1.bam" ftype="bam" sort="True"/>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in2.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="paired" value="True" />
             <param name="method" value="unique" />
             <output name="output" file="dedup_out2.bam" ftype="bam" sort="True" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in3.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="method" value="unique" />
             <output name="output" file="dedup_out3.bam" ftype="bam" sort="True" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in4.bam" ftype="bam" />
             <param name="extract_umi_method" value="tag" />
             <param name="umi_tag" value="BX" />
             <param name="method" value="unique" />
             <output name="output" file="dedup_out4.bam" ftype="bam" sort="True" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in5.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="umi_tag" value="BX" />
             <param name="method" value="cluster" />
             <output name="output" file="dedup_out5.bam" ftype="bam" sort="True" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in6.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="umi_tag" value="BX" />
             <param name="method" value="directional" />
             <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" />
         </test>
+        <test expect_num_outputs="5">
+            <param name="input" value="group_in6.bam" ftype="bam" />
+            <param name="extract_umi_method" value="read_id" />
+            <param name="umi_tag" value="BX" />
+            <param name="method" value="directional" />
+            <param name="output_stats_bool" value="true"/>
+            <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" />
+            <output_collection name="output_stats">
+                <element name="edit_distance" file="stats_outputs_edit_distance.tsv" />
+                <element name="per_umi" file="stats_outputs_per_umi.tsv" />
+                <element name="per_umi_per_position" file="stats_outputs_per_umi_per_position.tsv" />
+            </output_collection>
+        </test>
     </tests>
     <help><![CDATA[
 umi_tools dedup - Deduplicate reads based on their UMI