Mercurial > repos > jjohnson > fgbio_group_reads_by_umi

--- a/fgbio_group_reads_by_umi.xml	Sun Feb 21 23:40:34 2021 +0000
+++ b/fgbio_group_reads_by_umi.xml	Wed Feb 24 13:02:09 2021 +0000
@@ -51,10 +51,10 @@
                 </help>
             </param>
             <param argument="--min-map-q" type="integer" value="" optional="true" label="Minimum mapping quality" help="Default: 30"/>
-            <param argument="--raw-tag" type="text" value="" label="The tag containing the raw UMI" help="Default: RX">
+            <param argument="--raw-tag" type="text" value="" optional="true" label="The tag containing the raw UMI" help="Default: RX">
                 <expand macro="sam_tag_validator"/>
             </param>
-            <param argument="--assign-tag" type="text" value="" label="The output tag for UMI grouping" help="Default: MI">
+            <param argument="--assign-tag" type="text" value="" optional="true" label="The output tag for UMI grouping" help="Default: MI">
                 <expand macro="sam_tag_validator"/>
             </param>
             <param argument="--include-non-pf-reads" type="select" value="true" optional="true" label="Include non-PF reads">
@@ -68,8 +68,14 @@
     <outputs>
         <data name="family_size_histogram" format="tabular" >
             <filter>output_counts == True</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="family_size,count,fraction,fraction_gt_or_eq_family_size" />
+            </actions>
         </data>
-        <data name="output" format="bam" />
+        <data name="output" format="unsorted.bam" >
+            <expand macro="sort_order_change_format" />
+        </data>
     </outputs>
     <help><![CDATA[
 **fgbio GroupReadsByUmi**
--- a/macros.xml	Sun Feb 21 23:40:34 2021 +0000
+++ b/macros.xml	Wed Feb 24 13:02:09 2021 +0000
@@ -17,22 +17,33 @@
     </xml>
     <xml name="sam_sort_order">
         <param argument="--sort-order" type="select" optional="true" label="Sort BAM by">
+            <option value="TemplateCoordinate">TemplateCoordinate</option>
             <option value="Coordinate">Coordinate</option>
             <option value="Queryname">Queryname</option>
             <option value="Random">Random</option>
             <option value="RandomQuery">RandomQuery</option>
         </param>
     </xml>
+
+    <xml name="sort_order_change_format">
+        <change_format>
+            <when input="sort_order" value="Coordinate" format="bam" />
+            <when input="sort_order" value="TemplateCoordinate" format="bam" />
+            <when input="sort_order" value="QueryName" format="unsorted.bam" />
+            <when input="sort_order" value="Random" format="unsorted.bam" />
+            <when input="sort_order" value="RandomQuery" format="unsorted.bam" />
+        </change_format>
+    </xml>

     <token name="@READ_STRUCTURES_HELP@"><![CDATA[
 **Read Structures**

 Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized:

-    T identifies a template read
-    B identifies a sample barcode read
-    M identifies a unique molecular index read
-    S identifies a set of bases that should be skipped or ignored
+ -  T identifies a template read
+ -  B identifies a sample barcode read
+ -  M identifies a unique molecular index read
+ -  S identifies a set of bases that should be skipped or ignored

 The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml.bak	Wed Feb 24 13:02:09 2021 +0000
@@ -0,0 +1,56 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.3.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">fgbio</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@READ_STRUCTURE_PATTERN@">(([1-9][0-9]*[TBMS])*([+]|[1-9][0-9]*)[TBMS])</token>
+    <token name="@READ_STRUCTURES_PATTERN@">@READ_STRUCTURE_PATTERN@(\s@READ_STRUCTURE_PATTERN@)*</token>
+    <xml name="read_structures_validator">
+            <validator type="regex" message="">^@READ_STRUCTURES_PATTERN@$</validator>
+    </xml>
+    <xml name="sam_tag_validator">
+            <validator type="regex" message="">^[A-Za-z][A-Za-z]$</validator>
+    </xml>
+    <xml name="sam_sort_order">
+        <param argument="--sort-order" type="select" optional="true" label="Sort BAM by">
+            <option value="Coordinate">Coordinate</option>
+            <option value="Queryname">Queryname</option>
+            <option value="Random">Random</option>
+            <option value="RandomQuery">RandomQuery</option>
+        </param>
+    </xml>
+
+    <token name="@READ_STRUCTURES_HELP@"><![CDATA[
+**Read Structures**
+
+Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized:
+
+    - T identifies a template read
+    - B identifies a sample barcode read
+    - M identifies a unique molecular index read
+    - S identifies a set of bases that should be skipped or ignored
+
+The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify:
+
+::
+
+    --input r1.fq r2.fq i1.fq --read-structures 5M5S+T +T +B
+
+Alternative if you know your reads are of fixed length you could specify:
+
+::
+
+    --input r1.fq r2.fq i1.fq --read-structures 5M5S65T 75T 8B
+
+
+]]></token>
+    <xml name="citations">
+        <citations>
+            <yield />
+        </citations>
+    </xml>
+</macros>