# HG changeset patch
# User iuc
# Date 1539539089 14400
# Node ID 740ce0a18f0d3ed40299939055f946fe3bb34e35
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_merge commit b3426aed6615742d96dfb8f7346a9e0d4e391a99
diff -r 000000000000 -r 740ce0a18f0d macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sun Oct 14 13:44:49 2018 -0400
@@ -0,0 +1,172 @@
+
+
+
+ samtools
+
+
+
+ 1.9
+ #set $flags = sum(map(int, str($filter).split(',')))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$
+
+
+
+
+
+
+
+
+
+
+
+
+ @misc{SAM_def,
+ title={Definition of SAM/BAM format},
+ url = {https://samtools.github.io/hts-specs/},}
+
+ 10.1093/bioinformatics/btp352
+ 10.1093/bioinformatics/btr076
+ 10.1093/bioinformatics/btr509
+
+ @misc{Danecek_et_al,
+ Author={Danecek, P., Schiffels, S., Durbin, R.},
+ title={Multiallelic calling model in bcftools (-m)},
+ url = {http://samtools.github.io/bcftools/call-m.pdf},}
+
+
+ @misc{Durbin_VCQC,
+ Author={Durbin, R.},
+ title={Segregation based metric for variant call QC},
+ url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},}
+
+
+ @misc{Li_SamMath,
+ Author={Li, H.},
+ title={Mathematical Notes on SAMtools Algorithms},
+ url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},}
+
+
+ @misc{SamTools_github,
+ title={SAMTools GitHub page},
+ url = {https://github.com/samtools/samtools},}
+
+
+
+
+ &1 | grep Version]]>
+
+
+
+
+
+
+
diff -r 000000000000 -r 740ce0a18f0d samtools_merge.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_merge.xml Sun Oct 14 13:44:49 2018 -0400
@@ -0,0 +1,129 @@
+
+ merge multiple sorted alignment files
+
+ macros.xml
+
+
+
+
+ &2 echo "inconsistently sorted input" &&
+ exit 1 &&
+ #end if
+#end for
+
+samtools merge
+-@ \$addthreads
+-s $seed
+## TODO force overwrite seems necessay (but I do not understand why ...)
+-f
+## Galaxy provides only default compression
+## #if $compression == 'levelone'
+## -1
+## #else if $compression == 'uncompressed'
+## -u
+## #end if
+#if str($headerbam) != 'None'
+ -h '$headerbam'
+#end if
+#if $sortby=='name'
+ -n
+#end if
+## TODO since galaxy can't represent this as data type at the moment this option is unsupported
+## -t TAG The input alignments have been sorted by the value of TAG, then by either position or name (if -n is given).
+#if str($region) != ''
+ -R '$region'
+#end if
+## Attach an RG tag to each alignment. The tag value is inferred from file names.
+## -r
+## TODO -r makes no sense with the link names, is there some data set metadata (tags,...) that could be used?
+$idrg
+$idpg
+$output
+#for $i, $bam in enumerate( $bamfiles ):
+ ${i}.sam
+#end for
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+Merge multiple sorted alignment files, producing a single sorted output file that contains all the input records and maintains the existing sort order.
+
+If a file to take @headers from is specified the @SQ headers of input files will be merged into the specified header, otherwise they will be merged into a composite header created from the input headers. If in the process of merging @SQ lines for coordinate sorted input files, a conflict arises as to the order (for example input1.bam has @SQ for a,b,c and input2.bam has b,a,c) then the resulting output file will need to be re-sorted back into coordinate order.
+
+Unless the @PG/@RG headers are made unique when merging @RG and @PG records into the output header then any IDs found to be duplicates of existing IDs in the output header will have a suffix appended to them to differentiate them from similar header records from other files and the read records will be updated to reflect this.
+
+
+
diff -r 000000000000 -r 740ce0a18f0d test-data/2.merge.expected-samin.bam
Binary file test-data/2.merge.expected-samin.bam has changed
diff -r 000000000000 -r 740ce0a18f0d test-data/2.merge.expected.bam
Binary file test-data/2.merge.expected.bam has changed
diff -r 000000000000 -r 740ce0a18f0d test-data/2.merge.expected.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.merge.expected.sam Sun Oct 14 13:44:49 2018 -0400
@@ -0,0 +1,67 @@
+@HD VN:1.4
+@SQ SN:insert LN:599
+@SQ SN:ref1 LN:45
+@SQ SN:ref2 LN:40
+@SQ SN:ref3 LN:4
+@RG ID:fish PG:donkey
+@RG ID:cow PU:13_&^&&*(:332
+@RG PU:*9u8jkjjkjd: ID:colt
+@RG ID:fish-55424A4 PG:llama
+@RG ID:cow-3A2CCEF5 PU:13_&^&&*(:332 PG:donkey-4861F4EF
+@RG PU:*9u8jkjjkjd: ID:colt-6ADB4A65
+@RG ID:fish-39E5EF
+@RG ID:cow-1802EEEC PU:13_&^&&*(:332
+@RG PU:*9u8jkjjkjd: ID:colt-7EC68B3F
+@PG ID:bull PP:donkey
+@PG ID:donkey
+@PG ID:moose
+@PG PP:moose ID:cow
+@PG ID:llama
+@PG ID:bull-2B019719 PP:donkey-4861F4EF
+@PG ID:donkey-4861F4EF
+@PG ID:bull-60104A41 PP:donkey-2EE20DF8
+@PG ID:donkey-2EE20DF8
+@CO
+@CO Do you know?
+@CO Do you know?
+@CO Another comment from test_input_1_c
+r000 99 insert 50 30 10M = 80 30 ATTTAGCTAC AAAAAAAAAA RG:Z:cow PG:Z:bull
+r000 211 insert 80 30 10M = 50 -30 CCCAATCATT AAAAAAAAAA RG:Z:cow PG:Z:bull
+r001 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:fish
+r005 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF
+r008 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8
+r002 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10
+r003 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:cow
+r006 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF
+r007 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF
+r009 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8
+r010 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8
+r004 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt
+r007 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF
+r010 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8
+r003 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:cow
+r006 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF
+r009 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8
+r001 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:fish
+r005 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF
+r008 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8
+x1 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:colt PG:Z:bull
+x7 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719
+x10 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow-1802EEEC PG:Z:bull-60104A41
+x2 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:colt PG:Z:bull
+x8 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719
+x11 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41
+x3 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:fish PG:Z:bull
+x9 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719
+x12 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41
+x4 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:fish PG:Z:bull
+x10 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719
+x13 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41
+x5 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:fish PG:Z:bull
+x11 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719
+x14 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41
+x6 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow
+x12 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719
+x15 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41
+u1 4 * 0 30 23M * 0 0 TAATTAAGTCTACAGAAAAAAAA ???????????????????????
+u2 4 * 0 30 * * 0 0 TAATTAAGTCTACAGAAAAAAAA ???????????????????????
diff -r 000000000000 -r 740ce0a18f0d test-data/4.merge.expected.bam
Binary file test-data/4.merge.expected.bam has changed
diff -r 000000000000 -r 740ce0a18f0d test-data/6.merge.expected.bam
Binary file test-data/6.merge.expected.bam has changed
diff -r 000000000000 -r 740ce0a18f0d test-data/7.merge.expected.bam
Binary file test-data/7.merge.expected.bam has changed
diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_a.bam
Binary file test-data/test_input_1_a.bam has changed
diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_a.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_input_1_a.sam Sun Oct 14 13:44:49 2018 -0400
@@ -0,0 +1,28 @@
+@HD VN:1.4
+@SQ SN:insert LN:599
+@SQ SN:ref1 LN:45
+@SQ SN:ref2 LN:40
+@SQ SN:ref3 LN:4
+@RG ID:fish PG:donkey
+@RG ID:cow PU:13_&^&&*(:332
+@RG PU:*9u8jkjjkjd: ID:colt
+@PG ID:bull PP:donkey
+@PG ID:donkey
+@PG ID:moose
+@PG PP:moose ID:cow
+@CO
+r000 99 insert 50 30 10M = 80 30 ATTTAGCTAC AAAAAAAAAA RG:Z:cow PG:Z:bull
+r000 211 insert 80 30 10M = 50 -30 CCCAATCATT AAAAAAAAAA RG:Z:cow PG:Z:bull
+r001 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:fish PG:Z:donkey
+r002 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 PG:Z:donkey
+r003 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:cow
+r004 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:donkey
+r003 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:cow PG:Z:donkey
+r001 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:fish PG:Z:donkey
+x1 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:colt PG:Z:bull
+x2 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:colt PG:Z:bull
+x3 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:fish PG:Z:bull
+x4 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:fish PG:Z:bull
+x5 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:fish PG:Z:bull
+x6 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow
+u1 4 * 0 30 23M * 0 0 TAATTAAGTCTACAGAAAAAAAA ???????????????????????
diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_a_regex.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_input_1_a_regex.sam Sun Oct 14 13:44:49 2018 -0400
@@ -0,0 +1,28 @@
+@HD VN:1.4
+@SQ SN:insert LN:599
+@SQ SN:ref1|this=that LN:45
+@SQ SN:ref2*HLA:1a:2:b LN:40
+@SQ SN:ref3 LN:4
+@RG ID:fish PG:donkey
+@RG ID:cow PU:13_&^&&*(:332
+@RG PU:*9u8jkjjkjd: ID:colt
+@PG ID:bull PP:donkey
+@PG ID:donkey
+@PG ID:moose
+@PG PP:moose ID:cow
+@CO
+r000 99 insert 50 30 10M = 80 30 ATTTAGCTAC AAAAAAAAAA RG:Z:cow PG:Z:bull
+r000 211 insert 80 30 10M = 50 -30 CCCAATCATT AAAAAAAAAA RG:Z:cow PG:Z:bull
+r001 163 ref1|this=that 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:fish PG:Z:colt
+r002 0 ref1|this=that 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 PG:Z:colt
+r003 0 ref1|this=that 9 30 5H6M * 0 0 AGCTAA * RG:Z:cow
+r004 0 ref1|this=that 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:colt
+r003 16 ref1|this=that 29 30 6H5M * 0 0 TAGGC * RG:Z:cow PG:Z:colt
+r001 83 ref1|this=that 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:fish PG:Z:colt
+x1 0 ref2*HLA:1a:2:b 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:colt PG:Z:bull
+x2 0 ref2*HLA:1a:2:b 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:colt PG:Z:bull
+x3 0 ref2*HLA:1a:2:b 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:fish PG:Z:bull
+x4 0 ref2*HLA:1a:2:b 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:fish PG:Z:bull
+x5 0 ref2*HLA:1a:2:b 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:fish PG:Z:bull
+x6 0 ref2*HLA:1a:2:b 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow
+u1 4 * 0 30 23M * 0 0 TAATTAAGTCTACAGAAAAAAAA ???????????????????????
diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_b.bam
Binary file test-data/test_input_1_b.bam has changed
diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_b.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_input_1_b.sam Sun Oct 14 13:44:49 2018 -0400
@@ -0,0 +1,24 @@
+@HD VN:1.4
+@SQ SN:insert LN:599
+@SQ SN:ref1 LN:45
+@SQ SN:ref2 LN:40
+@SQ SN:ref3 LN:4
+@PG ID:llama
+@RG ID:fish PG:llama
+@RG ID:cow PU:13_&^&&*(:332 PG:donkey
+@RG PU:*9u8jkjjkjd: ID:colt
+@PG ID:bull PP:donkey
+@PG ID:donkey
+@CO Do you know?
+r005 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt PG:Z:donkey
+r006 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt PG:Z:donkey
+r007 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt PG:Z:donkey
+r007 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:donkey
+r006 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt PG:Z:donkey
+r005 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt PG:Z:donkey
+x7 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow PG:Z:bull
+x8 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow PG:Z:bull
+x9 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow PG:Z:bull
+x10 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow PG:Z:bull
+x11 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow PG:Z:bull
+x12 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow PG:Z:bull
diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_b_regex.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_input_1_b_regex.sam Sun Oct 14 13:44:49 2018 -0400
@@ -0,0 +1,24 @@
+@HD VN:1.4
+@SQ SN:insert LN:599
+@SQ SN:ref2*HLA:1a:2:b LN:40
+@SQ SN:ref3 LN:4
+@SQ SN:ref1 LN:45
+@PG ID:llama_{a}
+@RG ID:fish-[1] PG:llama_{a}
+@RG ID:cow-[2] PU:13_&^&&*(:332 PG:donkey
+@RG PU:*9u8jkjjkjd: ID:colt
+@PG ID:bull PP:donkey
+@PG ID:donkey
+@CO Do you know?
+x7 0 ref2*HLA:1a:2:b 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow-[2] PG:Z:bull
+x8 0 ref2*HLA:1a:2:b 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow-[2] PG:Z:bull
+x9 0 ref2*HLA:1a:2:b 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow-[2] PG:Z:bull
+x10 0 ref2*HLA:1a:2:b 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow-[2] PG:Z:bull
+x11 0 ref2*HLA:1a:2:b 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow-[2] PG:Z:bull
+x12 0 ref2*HLA:1a:2:b 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow-[2] PG:Z:bull
+r005 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt PG:Z:donkey
+r006 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt PG:Z:donkey
+r007 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt PG:Z:donkey
+r007 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:donkey
+r006 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt PG:Z:donkey
+r005 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt PG:Z:donkey
diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_c.bam
Binary file test-data/test_input_1_c.bam has changed
diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_c.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_input_1_c.sam Sun Oct 14 13:44:49 2018 -0400
@@ -0,0 +1,23 @@
+@HD VN:1.4
+@SQ SN:ref1 LN:45
+@SQ SN:ref2 LN:40
+@RG ID:fish
+@RG ID:cow PU:13_&^&&*(:332
+@RG PU:*9u8jkjjkjd: ID:colt
+@PG ID:bull PP:donkey
+@PG ID:donkey
+@CO Do you know?
+@CO Another comment from test_input_1_c
+r008 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt PG:Z:donkey
+r009 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt PG:Z:donkey
+r010 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt PG:Z:donkey
+r010 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:donkey
+r009 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt PG:Z:donkey
+r008 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt PG:Z:donkey
+x10 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow PG:Z:bull
+x11 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow PG:Z:bull
+x12 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow PG:Z:bull
+x13 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow PG:Z:bull
+x14 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow PG:Z:bull
+x15 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow PG:Z:bull
+u2 4 * 0 30 * * 0 0 TAATTAAGTCTACAGAAAAAAAA ???????????????????????