Mercurial > repos > jjohnson > fgbio_fastq_to_bam

--- a/fgbio_fastq_to_bam.xml	Sun Feb 21 23:40:09 2021 +0000
+++ b/fgbio_fastq_to_bam.xml	Wed Feb 24 13:01:53 2021 +0000
@@ -76,7 +76,9 @@
         </section>
     </inputs>
     <outputs>
-        <data name="output" format="unsorted.bam" />
+        <data name="output" format="bam" >
+            <expand macro="sort_order_change_format" />
+        </data>
     </outputs>
     <help><![CDATA[
 **fgbio FastqToBam**
--- a/macros.xml	Sun Feb 21 23:40:09 2021 +0000
+++ b/macros.xml	Wed Feb 24 13:01:53 2021 +0000
@@ -17,22 +17,33 @@
     </xml>
     <xml name="sam_sort_order">
         <param argument="--sort-order" type="select" optional="true" label="Sort BAM by">
+            <option value="TemplateCoordinate">TemplateCoordinate</option>
             <option value="Coordinate">Coordinate</option>
             <option value="Queryname">Queryname</option>
             <option value="Random">Random</option>
             <option value="RandomQuery">RandomQuery</option>
         </param>
     </xml>
+
+    <xml name="sort_order_change_format">
+        <change_format>
+            <when input="sort_order" value="Coordinate" format="bam" />
+            <when input="sort_order" value="TemplateCoordinate" format="bam" />
+            <when input="sort_order" value="QueryName" format="unsorted.bam" />
+            <when input="sort_order" value="Random" format="unsorted.bam" />
+            <when input="sort_order" value="RandomQuery" format="unsorted.bam" />
+        </change_format>
+    </xml>

     <token name="@READ_STRUCTURES_HELP@"><![CDATA[
 **Read Structures**

 Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized:

-    T identifies a template read
-    B identifies a sample barcode read
-    M identifies a unique molecular index read
-    S identifies a set of bases that should be skipped or ignored
+ -  T identifies a template read
+ -  B identifies a sample barcode read
+ -  M identifies a unique molecular index read
+ -  S identifies a set of bases that should be skipped or ignored

 The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml.bak	Wed Feb 24 13:01:53 2021 +0000
@@ -0,0 +1,56 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.3.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">fgbio</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@READ_STRUCTURE_PATTERN@">(([1-9][0-9]*[TBMS])*([+]|[1-9][0-9]*)[TBMS])</token>
+    <token name="@READ_STRUCTURES_PATTERN@">@READ_STRUCTURE_PATTERN@(\s@READ_STRUCTURE_PATTERN@)*</token>
+    <xml name="read_structures_validator">
+            <validator type="regex" message="">^@READ_STRUCTURES_PATTERN@$</validator>
+    </xml>
+    <xml name="sam_tag_validator">
+            <validator type="regex" message="">^[A-Za-z][A-Za-z]$</validator>
+    </xml>
+    <xml name="sam_sort_order">
+        <param argument="--sort-order" type="select" optional="true" label="Sort BAM by">
+            <option value="Coordinate">Coordinate</option>
+            <option value="Queryname">Queryname</option>
+            <option value="Random">Random</option>
+            <option value="RandomQuery">RandomQuery</option>
+        </param>
+    </xml>
+
+    <token name="@READ_STRUCTURES_HELP@"><![CDATA[
+**Read Structures**
+
+Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized:
+
+    - T identifies a template read
+    - B identifies a sample barcode read
+    - M identifies a unique molecular index read
+    - S identifies a set of bases that should be skipped or ignored
+
+The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify:
+
+::
+
+    --input r1.fq r2.fq i1.fq --read-structures 5M5S+T +T +B
+
+Alternative if you know your reads are of fixed length you could specify:
+
+::
+
+    --input r1.fq r2.fq i1.fq --read-structures 5M5S65T 75T 8B
+
+
+]]></token>
+    <xml name="citations">
+        <citations>
+            <yield />
+        </citations>
+    </xml>
+</macros>