diff umi-tools_group.xml @ 1:f73f13641bb6 draft

planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
author iuc
date Wed, 10 Jan 2018 19:09:28 -0500
parents 860bc357b678
children a24f5b991320
line wrap: on
line diff
--- a/umi-tools_group.xml	Tue Aug 29 17:37:21 2017 -0400
+++ b/umi-tools_group.xml	Wed Jan 10 19:09:28 2018 -0500
@@ -4,7 +4,7 @@
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements">
-        <requirement type="package" version="1.5">samtools</requirement>
+        <requirement type="package" version="1.6">samtools</requirement>
     </expand>
     <command detect_errors="exit_code"><![CDATA[
         #if $input.is_of_type("sam"):
@@ -15,9 +15,11 @@
             #set $input_file = 'input.bam'
         #end if
 
-        umi_tools group --extract-umi-method $extract_umi_method
+        umi_tools group
+            --random-seed 0
+            --extract-umi-method $extract_umi_method
             #if str($extract_umi_method) != 'read_id':
-                --umi-separator '$umi_separator' --umi-tag $umi_tag
+                --umi-separator '$umi_separator' --umi-tag '$umi_tag'
             #end if
             --method $method --edit-distance-threshold $edit_distance_threshold
             $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold
@@ -50,7 +52,7 @@
         <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position">
             <option value="unique">Reads group share the exact same UMI</option>
             <option value="cluster">Identify clusters based on hamming distance</option>
-            <option value="directional">Identify clusters based on distance and counts</option>
+            <option value="directional">Identify clusters based on distance and counts, restrict network expansion by threshold</option>
         </param>
         <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (&gt;14bp)" />
         <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." />
@@ -58,7 +60,7 @@
         <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." />
         <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" />
         <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" />
-        <param argument="--subset" type="float" min="0" max="1" value="1" label="Only consider a random selection of the reads" />
+        <param argument="--subset" type="float" min="0.0" max="1.0" value="1.0" label="Only consider a random selection of the reads" />
         <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" />
         <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" />
         <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." />
@@ -73,17 +75,11 @@
     </outputs>
     <tests>
         <test>
-            <param name="input" value="group_in1.sam" ftype="sam" />
-            <param name="extract_umi_method" value="read_id" />
-            <param name="method" value="unique" />
-            <output name="output" file="group_out1.bam" />
-        </test>
-        <test>
             <param name="input" value="group_in2.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="paired" value="True" />
             <param name="method" value="unique" />
-            <output name="output" file="group_out2.bam" />
+            <output name="output" file="group_out2.bam" ftype="bam" sort="True" />
         </test>
         <test>
             <param name="input" value="group_in3.bam" ftype="bam" />
@@ -91,7 +87,7 @@
             <param name="group_output" value="True" />
             <param name="method" value="unique" />
             <output name="group_out" file="group_out3.tab" />
-            <output name="output" file="group_out3.bam" />
+            <output name="output" file="group_out3.bam" ftype="bam" sort="True" />
         </test>
         <test>
             <param name="input" value="group_in4.bam" ftype="bam" />
@@ -99,21 +95,21 @@
             <param name="umi_tag" value="BX" />
             <param name="method" value="unique" />
             <output name="group_out" file="group_out4.tab" />
-            <output name="output" file="group_out4.bam" />
+            <output name="output" file="group_out4.bam" ftype="bam" sort="True" />
         </test>
         <test>
             <param name="input" value="group_in5.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="umi_tag" value="BX" />
             <param name="method" value="cluster" />
-            <output name="output" file="group_out5.bam" />
+            <output name="output" file="group_out5.bam" ftype="bam" sort="True" />
         </test>
         <test>
             <param name="input" value="group_in6.bam" ftype="bam" />
             <param name="extract_umi_method" value="read_id" />
             <param name="umi_tag" value="BX" />
             <param name="method" value="directional" />
-            <output name="output" file="group_out6.bam" />
+            <output name="output" file="group_out6.bam" ftype="bam" sort="True" />
         </test>
     </tests>
     <help><![CDATA[