# HG changeset patch
# User jjohnson
# Date 1614171729 0
# Node ID 5688163088599d459427d89cbe46a17670792dd5
# Parent 900cd28657689b05f8acb36034967fd34da42943
"planemo upload commit 692ea558cbbefee93859dc2b005fab5ac4970eb8"
diff -r 900cd2865768 -r 568816308859 fgbio_group_reads_by_umi.xml
--- a/fgbio_group_reads_by_umi.xml Sun Feb 21 23:40:34 2021 +0000
+++ b/fgbio_group_reads_by_umi.xml Wed Feb 24 13:02:09 2021 +0000
@@ -51,10 +51,10 @@
-
+
-
+
@@ -68,8 +68,14 @@
output_counts == True
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+ pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized:
- T identifies a template read
- B identifies a sample barcode read
- M identifies a unique molecular index read
- S identifies a set of bases that should be skipped or ignored
+ - T identifies a template read
+ - B identifies a sample barcode read
+ - M identifies a unique molecular index read
+ - S identifies a set of bases that should be skipped or ignored
The last pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify:
diff -r 900cd2865768 -r 568816308859 macros.xml.bak
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml.bak Wed Feb 24 13:02:09 2021 +0000
@@ -0,0 +1,56 @@
+
+ 1.3.0
+ 0
+
+
+ fgbio
+
+
+
+ (([1-9][0-9]*[TBMS])*([+]|[1-9][0-9]*)[TBMS])
+ @READ_STRUCTURE_PATTERN@(\s@READ_STRUCTURE_PATTERN@)*
+
+ ^@READ_STRUCTURES_PATTERN@$
+
+
+ ^[A-Za-z][A-Za-z]$
+
+
+
+
+
+
+
+
+
+
+ pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized:
+
+ - T identifies a template read
+ - B identifies a sample barcode read
+ - M identifies a unique molecular index read
+ - S identifies a set of bases that should be skipped or ignored
+
+The last pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify:
+
+::
+
+ --input r1.fq r2.fq i1.fq --read-structures 5M5S+T +T +B
+
+Alternative if you know your reads are of fixed length you could specify:
+
+::
+
+ --input r1.fq r2.fq i1.fq --read-structures 5M5S65T 75T 8B
+
+
+]]>
+
+
+
+
+
+