Mercurial > repos > iuc > umi_tools_dedup

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats_outputs_edit_distance.tsv	Wed Feb 10 19:30:35 2021 +0000
@@ -0,0 +1,3 @@
+unique	unique_null	directional	directional_null	edit_distance
+491	491	491	491	Single_UMI
+0	0	0	0	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats_outputs_per_umi.tsv	Wed Feb 10 19:30:35 2021 +0000
@@ -0,0 +1,17 @@
+UMI	median_counts_pre	times_observed_pre	total_counts_pre	median_counts_post	times_observed_post	total_counts_post
+AA	1	34	35	1	34	35
+AC	1	35	35	1	35	35
+AG	1	25	25	1	25	25
+AT	1	50	50	1	50	50
+CA	1	27	27	1	27	27
+CC	1	21	21	1	21	21
+CG	1	11	11	1	11	11
+CT	1	23	24	1	23	24
+GA	1	24	24	1	24	24
+GC	1	23	23	1	23	23
+GG	1	31	32	1	31	32
+GT	1	27	28	1	27	28
+TA	1	61	63	1	61	63
+TC	1	24	24	1	24	24
+TG	1	27	27	1	27	27
+TT	1	48	48	1	48	48
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats_outputs_per_umi_per_position.tsv	Wed Feb 10 19:30:35 2021 +0000
@@ -0,0 +1,4 @@
+counts	instances_pre	instances_post
+1	486	486
+2	4	4
+3	1	1
--- a/umi-tools_dedup.xml	Thu Dec 05 01:32:18 2019 -0500
+++ b/umi-tools_dedup.xml	Wed Feb 10 19:30:35 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="umi_tools_dedup" name="UMI-tools deduplicate" version="@VERSION@.0">
+<tool id="umi_tools_dedup" name="UMI-tools deduplicate" version="@VERSION@+galaxy1">
     <description>Extract UMI from fastq files</description>
     <macros>
         <import>macros.xml</import>
@@ -16,6 +16,7 @@
         #end if

         umi_tools dedup
+            '$output_stats_bool'
             --random-seed 0
             --extract-umi-method $extract_umi_method
             #if str($extract_umi_method) != 'read_id':
@@ -63,51 +64,71 @@
         <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." />
         <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" />
         <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." />
+        <param name="output_stats_bool" type="boolean" truevalue="--output-stats=stats_outputs" falsevalue="" checked="false" label="Output UMI related statistics files?"/>
     </inputs>
     <outputs>
         <data format="bam" name="output" />
+        <collection name="output_stats" type="list" label="UMI_tools dedup stats">
+            <filter>output_stats_bool</filter>
+            <data name="edit_distance" format="tabular" from_work_dir="stats_outputs_edit_distance.tsv"/>
+            <data name="per_umi" format="tabular" from_work_dir="stats_outputs_per_umi.tsv"/>
+            <data name="per_umi_per_position" format="tabular" from_work_dir="stats_outputs_per_umi_per_position.tsv"/>
+        </collection>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in1.sam" ftype="sam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="method" value="unique" />
             <output name="output" file="dedup_out1.bam" ftype="bam" sort="True"/>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in2.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="paired" value="True" />
             <param name="method" value="unique" />
             <output name="output" file="dedup_out2.bam" ftype="bam" sort="True" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in3.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="method" value="unique" />
             <output name="output" file="dedup_out3.bam" ftype="bam" sort="True" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in4.bam" ftype="bam" />
             <param name="extract_umi_method" value="tag" />
             <param name="umi_tag" value="BX" />
             <param name="method" value="unique" />
             <output name="output" file="dedup_out4.bam" ftype="bam" sort="True" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in5.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="umi_tag" value="BX" />
             <param name="method" value="cluster" />
             <output name="output" file="dedup_out5.bam" ftype="bam" sort="True" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" value="group_in6.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="umi_tag" value="BX" />
             <param name="method" value="directional" />
             <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" />
         </test>
+        <test expect_num_outputs="5">
+            <param name="input" value="group_in6.bam" ftype="bam" />
+            <param name="extract_umi_method" value="read_id" />
+            <param name="umi_tag" value="BX" />
+            <param name="method" value="directional" />
+            <param name="output_stats_bool" value="true"/>
+            <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" />
+            <output_collection name="output_stats">
+                <element name="edit_distance" file="stats_outputs_edit_distance.tsv" />
+                <element name="per_umi" file="stats_outputs_per_umi.tsv" />
+                <element name="per_umi_per_position" file="stats_outputs_per_umi_per_position.tsv" />
+            </output_collection>
+        </test>
     </tests>
     <help><![CDATA[
 umi_tools dedup - Deduplicate reads based on their UMI