Repository 'sam2interval'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/sam2interval

Changeset 0:8c737b8ddc45 (2013-08-26)
Next changeset 1:75557c0908a9 (2020-02-05)
Commit message:
Uploaded tool tarball.
added:
sam2interval.py
sam2interval.xml
test-data/sam2interval-test3.sam
test-data/sam2interval_noprintAll.dat
test-data/sam2interval_printAll.dat
test-data/sam2interval_with_unmapped_reads_noprintAll.dat
test-data/sam_bioinf_example.sam
b
diff -r 000000000000 -r 8c737b8ddc45 sam2interval.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sam2interval.py Mon Aug 26 15:12:38 2013 -0400
[
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+import sys
+import optparse
+import re
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit()
+
+def main():
+    usage = """%prog [options]
+    
+options (listed below) default to 'None' if omitted
+    """
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option(
+        '-f','--input_sam_file',
+        metavar="INPUT_SAM_FILE",
+        dest='input_sam',
+        default = False,
+        help='Name of the SAM file to be filtered. STDIN is default')
+            
+    parser.add_option(
+        '-c','--flag_column',
+        dest='flag_col',
+        default = '2',
+        help='Column containing SAM bitwise flag. 1-based')
+        
+    parser.add_option(
+        '-s','--start_column',
+        dest='start_col',
+        default = '4',
+        help='Column containing position. 1-based')
+
+    parser.add_option(
+        '-g','--cigar_column',
+        dest='cigar_col',
+        default = '6',
+        help='Column containing CIGAR or extended CIGAR string')
+
+    parser.add_option(
+        '-r','--ref_column',
+        dest='ref_col',
+        default = '3',
+        help='Column containing name of the reference sequence coordinate. 1-based')
+        
+    parser.add_option(
+        '-e','--read_column',
+        dest='read_col',
+        default = '1',
+        help='Column containing read name. 1-based')
+
+    parser.add_option(
+        '-p','--print_all',
+        dest='prt_all',
+        action='store_true',
+        default = False,
+        help='Print coordinates and original SAM?')
+    
+    options, args = parser.parse_args()
+
+    if options.input_sam:
+        infile = open ( options.input_sam, 'r')
+    else:
+        infile = sys.stdin
+
+    cigar = re.compile( '\d+M|\d+N|\d+D|\d+P' )
+
+    print '#chrom\tstart\tend\tstrand\tread_name' # provide a (partial) header so that strand is automatically set in metadata
+
+    for line in infile:
+        line = line.rstrip( '\r\n' )
+        if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
+            fields = line.split( '\t' )
+            start = int( fields[ int( options.start_col ) - 1 ] ) - 1
+            end = 0
+            for op in cigar.findall( fields[ int( options.cigar_col) - 1 ] ):
+                end += int( op[ 0:len( op ) - 1 ] )
+                
+            strand = '+' 
+            if bool( int( fields[ int( options.flag_col ) - 1 ] ) & 0x0010 ):
+                strand = '-'
+            read_name = fields[ int( options.read_col ) - 1 ]
+            ref_name  = fields[ int( options.ref_col ) - 1 ]
+            
+            if ref_name != '*':
+                # Do not print lines with unmapped reads that contain '*' instead of chromosome name        
+                if options.prt_all: 
+                    print '%s\t%s\t%s\t%s\t%s' % (ref_name, str(start), str(end+start), strand, line)
+                else:
+                    print '%s\t%s\t%s\t%s\t%s' % (ref_name, str(start), str(end+start), strand, read_name)
+
+if __name__ == "__main__": main()
+
b
diff -r 000000000000 -r 8c737b8ddc45 sam2interval.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sam2interval.xml Mon Aug 26 15:12:38 2013 -0400
b
@@ -0,0 +1,72 @@
+<tool id="sam2interval" name="Convert SAM" version="1.0.1">
+  <description>to interval</description>
+  <command interpreter="python">sam2interval.py --input_sam_file=$input1 $print_all > $out_file1
+  </command>
+  <inputs>
+    <param format="sam" name="input1" type="data" label="Select dataset to convert"/>
+    <param name="print_all" type="select" label="Print all?" help="Do you want to retain original SAM fields? See example below.">
+        <option value="-p">Yes</option>
+        <option value="">No</option>
+    </param>
+  </inputs>
+ <outputs>
+    <data format="interval" name="out_file1" label="Converted Interval" />
+  </outputs>
+<tests>
+    <test>          
+        <param name="input1" value="sam_bioinf_example.sam" ftype="sam"/>
+        <param name="print_all" value="Yes"/>
+        <output name="out_file1" file="sam2interval_printAll.dat" ftype="interval"/>
+    </test>
+    <test>          
+        <param name="input1" value="sam_bioinf_example.sam" ftype="sam"/>
+        <param name="print_all" value="No"/>
+        <output name="out_file1" file="sam2interval_noprintAll.dat" ftype="interval"/>
+    </test>
+    <test>
+        <param name="input1" value="sam2interval-test3.sam" ftype="sam"/>
+        <param name="print_all" value="No"/>
+        <output name="out_file1" file="sam2interval_with_unmapped_reads_noprintAll.dat" ftype="interval"/>
+    </test>
+
+</tests>
+  <help>
+
+**What it does**
+
+Converts positional information from a SAM dataset into interval format with 0-based start and 1-based end. CIGAR string of SAM format is used to compute the end coordinate.
+
+-----
+
+**Example**
+
+Converting the following dataset::
+
+ r001 163 ref  7 30 8M2I4M1D3M = 37  39 TTAGATAAAGGATACTA *
+ r002   0 ref  9 30 3S6M1P1I4M *  0   0 AAAAGATAAGGATA    *
+ r003   0 ref  9 30       5H6M *  0   0 AGCTAA            * NM:i:1
+ r004   0 ref 16 30    6M14N5M *  0   0 ATAGCTTCAGC       *
+ r003  16 ref 29 30       6H5M *  0   0 TAGGC             * NM:i:0
+ r001  83 ref 37 30         9M =  7 -39 CAGCGCCAT         *
+
+into Interval format will produce the following if *Print all?* is set to **Yes**::
+
+ ref  6 22 + r001 163 ref  7 30 8M2I4M1D3M = 37  39 TTAGATAAAGGATACTA *
+ ref  8 19 + r002   0 ref  9 30 3S6M1P1I4M *  0   0 AAAAGATAAGGATA    *
+ ref  8 14 + r003   0 ref  9 30 5H6M       *  0   0 AGCTAA            * NM:i:1
+ ref 15 40 + r004   0 ref 16 30 6M14N5M    *  0   0 ATAGCTTCAGC       *
+ ref 28 33 - r003  16 ref 29 30 6H5M       *  0   0 TAGGC             * NM:i:0
+ ref 36 45 - r001  83 ref 37 30 9M         =  7 -39 CAGCGCCAT         *

+Setting  *Print all?* to **No** will generate the following::
+
+ ref  6 22 + r001
+ ref  8 19 + r002
+ ref  8 14 + r003
+ ref 15 40 + r004
+ ref 28 33 - r003
+ ref 36 45 - r001
+
+
+  </help>
+</tool>
b
diff -r 000000000000 -r 8c737b8ddc45 test-data/sam2interval-test3.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam2interval-test3.sam Mon Aug 26 15:12:38 2013 -0400
b
b'@@ -0,0 +1,1000 @@\n+@HD\tVN:1.0\tSO:unsorted\n+@SQ\tSN:chr10\tLN:135534747\n+@SQ\tSN:chr11\tLN:135006516\n+@SQ\tSN:chr12\tLN:133851895\n+@SQ\tSN:chr13\tLN:115169878\n+@SQ\tSN:chr14\tLN:107349540\n+@SQ\tSN:chr15\tLN:102531392\n+@SQ\tSN:chr16\tLN:90354753\n+@SQ\tSN:chr17\tLN:81195210\n+@SQ\tSN:chr18\tLN:78077248\n+@SQ\tSN:chr19\tLN:59128983\n+@SQ\tSN:chr1\tLN:249250621\n+@SQ\tSN:chr20\tLN:63025520\n+@SQ\tSN:chr21\tLN:48129895\n+@SQ\tSN:chr22\tLN:51304566\n+@SQ\tSN:chr2\tLN:243199373\n+@SQ\tSN:chr3\tLN:198022430\n+@SQ\tSN:chr4\tLN:191154276\n+@SQ\tSN:chr5\tLN:180915260\n+@SQ\tSN:chr6\tLN:171115067\n+@SQ\tSN:chr7\tLN:159138663\n+@SQ\tSN:chr8\tLN:146364022\n+@SQ\tSN:chr9\tLN:141213431\n+@SQ\tSN:chrM\tLN:16571\n+@SQ\tSN:chrX\tLN:155270560\n+@SQ\tSN:chrY\tLN:59373566\n+@PG\tID:Bowtie\tVN:0.12.7\tCL:"bowtie -q -p 4 -S --phred33-quals /galaxy/data/hg19/hg19canon/bowtie_index/hg19canon /galaxy/test_database/files/000/235/dataset_235440.dat"\n+GA5:3:1:24:1745#0/1\t16\tchrM\t6566\t255\t76M\t*\t0\t0\tACCCCGCCGGAGGAGGAGACCCCATTCTATACCAACACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCT\t(??<B?@A?@A?B>>>?A:A@@BB@?B@BA9BAA?B8BB?BAACBABBBBB@BBABB?BBBABBCBCBBCCCCBCB\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:23:263#0/1\t16\tchrM\t4097\t255\t76M\t*\t0\t0\tCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTGCGACCAACTCATACACCTCC\t4845/324818.//78777686<;48;3667882557@;8/?@@<B==@=@A>@@@A@=:@B@BABBB=B>BB@BB\tXA:i:1\tMD:Z:55A20\tNM:i:1\n+GA5:3:1:24:1658#0/1\t16\tchrM\t14760\t255\t76M\t*\t0\t0\tCGCAAAACTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCAT\t8::@A?6@8A<?A@@B?BA;BBB@9>BBA@A=AB8?<BB@ABAABB@BBABBCC>BBACC@CACBCCBCBABCCCB\tXA:i:0\tMD:Z:7T68\tNM:i:1\n+GA5:3:1:24:1108#0/1\t16\tchrM\t1245\t255\t76M\t*\t0\t0\tCCTTCCTCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAA\t################66<==:?;<???@@<@@@8:AA@@A??BA??AA@AABB@@BB?AA>ABAB>BBB@CBBCB\tXA:i:0\tMD:Z:0T3G71\tNM:i:2\n+GA5:3:1:24:1272#0/1\t0\tchrM\t5318\t255\t76M\t*\t0\t0\tCCACCATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATC\t>?:<<(BB(A@BCBCC@+0@BBCC@BCBCCCCBCB?A<CC?ACCB@BCBCBBCBBCACCCCBCCBCBCBABBBBB?\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:25:1989#0/1\t16\tchrM\t10935\t255\t76M\t*\t0\t0\tGACCCCCTAACAACCCCCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATCATGGCAAGCCAACGCCACT\t(2:?A@958=A=3?BAAA>0=@=BB><A>A>B>4@B?A=@;@BA:BCCB@BACBBBCCBCBBCCCCCCBCCCCBCB\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:25:1877#0/1\t16\tchr1\t567535\t255\t76M\t*\t0\t0\tCCATCACAAGACCTCGTCCAACACGACACGTACTACGTTGTAGCCCCCTTCCACTATGTCCTATCAATAGGAGCTG\t###########################################<>4\'>==?B>8>AABABBBBBCBBBCCB=@4<B\tXA:i:0\tMD:Z:0T6T4A4A1T26A29\tNM:i:6\n+GA5:3:1:25:1203#0/1\t16\tchrM\t16408\t255\t76M\t*\t0\t0\tACCCCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCGCTCCGGGCCCATAACACTTGGGGGTAGCTAA\t.2/*3:?=?6:<6?<?<:7;:>=@7?=@??<?@A@8=@?;A@;A?A??AA?AAAAA?AAB=BAABBBBBBBB@BCB\tXA:i:0\tMD:Z:0T2T72\tNM:i:2\n+GA5:3:1:24:1720#0/1\t0\tchrM\t8685\t255\t76M\t*\t0\t0\tCCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCATTTT\tBBBBCBCCCBCCCCCCCBBBCCCBCCCBCCBCBBBBBBBCCCBBBB@BBC@BBBBABBBB8BB@@BBB@BAAC@AB\tXA:i:1\tMD:Z:17G58\tNM:i:1\n+GA5:3:1:25:74#0/1\t16\tchrM\t4953\t255\t76M\t*\t0\t0\tCATCATAGCAGGCAGTTGAGGTGGATTAAACCAAACCCAGCTACGCAAAATCTTAGCATACTCCTCAATTACCCAC\t984;:8=7?<86?=7=;7=77529;5>@A>?A?@8AAB?<A??@=B@ABB@A@@B@BAA?B@BB?BBBBB?BBBAB\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:26:129#0/1\t16\tchr1\t567928\t255\t76M\t*\t0\t0\tCTGGAGTGACTATATGGATGCCCCCCACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTAGACAAAAAA\t/6557751501345777455::5::@8;?>65<=9>4=?>@@>A@>@<B@=@B@AAABABBBB@BABA@BBBBBBA\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:27:1860#0/1\t16\tchrM\t6831\t255\t76M\t*\t0\t0\tCATAATCATCGCTATCCCCACCGGCGTCAAAGTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGA\t:72=724\'3=5/=84@<@>7B??A?7;;AAA<:AA>77<A=8:<@AAAB>B:B@BBBBBBBB@BBCCCBBCCC<CB\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:26:303#0/1\t0\tchrM\t7433\t255\t76M\t*\t0\t0\tACATAAAATCTAGACAAAAAAGGAAGGAATCGAACCCCCCAAAGCTGGGTTCAAGCCAACCCCATGGCCTCCATGT\t2@@ACCCCC@=@C@6B6:8>86@3855@:>>*0>299?3:2;/6-\'3>&\'-8679;2:##################\tXA:i:0\tMD:Z:48T26A0\tNM:i:2\n+GA5:3:1:27:509#0/1\t16\tchrM\t5343\t255\t76M\t*\t0\t0\tCTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAACGTAAAAATAAAATGACAG\t,<;A:;@C@AB:?BAAAAB@AB@CA@BCBCCCBCBCBABACBCCBCCBCBBBCCCCCACCCCCCCB'..b'69\t255\t76M\t*\t0\t0\tACCTCAGAAGTTTTTTTCTTCGCAGGATTTTTCTGAGCCTTTTACCACTCCAGCCTAGCCCCTACCCCCCAATTAG\tBBBCBCCB@BABBBBBBBCBBBABBBABBBBAA@BB@>@BBAAB@@A>?B<@59@?>>@:?>>>>>>>7@9.:=7=\tXA:i:0\tMD:Z:72C3\tNM:i:1\n+GA5:3:1:434:1170#0/1\t16\tchrM\t10363\t255\t76M\t*\t0\t0\tCTGGCCTATGAGTGACTACAAAAAGGATTAGACTGAACCGAATTGGTATATAGTTTAAACAAAACGAATGATTTCG\t77/89==?>=@;A?:<<=B@BBB@<@A?>B@?AA<B?AA?AB@B>BBBBBBBABBCBCBCBCCBCCCCCBCBCBCB\tXA:i:0\tMD:Z:36G39\tNM:i:1\n+GA5:3:1:436:428#0/1\t0\tchrM\t3139\t255\t76M\t*\t0\t0\tAGAAATAAGGCCTACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTATACCCACACCCA\tBCACBCCBBBBBBBBCCBBBBBBBABBABABB@AABA@@@BB@BAA??@AA@>8;B@?B5@@@A@@>2<@@?>5/%\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:435:1711#0/1\t16\tchrM\t4321\t255\t76M\t*\t0\t0\tCCCTTATTTCTAGGACTATGAGAATCGAACCCATCCCTGAGAATCCAAAATTCTCCGTGCCACCTATCACACCCCA\t>8>/5>6???6:7>7:>@>=@>:@>@>?:@@B@:AABA@?BAB=ABAABB>?B:AB@A@AB?BBBBAB@C@ABACB\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:428:297#0/1\t4\t*\t0\t0\t*\t*\t0\t0\tTTAAGTGCTGTGGCCAGAAGCGGGGGGAGGGGGGGTTTGGTGGAAATTTTTTGTTATGATGTCTGTGTGGAAAGCG\tBCCBC@CBCBBCBBBBBABBBCCBBBA-@BBBBBB.>AA@/@B5.,8??@AB@?;2<:6;;2366244=4+//0)-\tXM:i:0\n+GA5:3:1:436:1244#0/1\t16\tchr1\t567522\t255\t76M\t*\t0\t0\tTTTATTAGCNAACTCAACACTAGACATCGTACTACACGACACGTACTACGTTGTAGCCCACTTCCACTATGTCCTA\t#################################7?8;5.=::0>/95(<(>=368*B=@(B@>BB>A?BB@@CBBB\tXA:i:0\tMD:Z:1G7A6T59\tNM:i:3\n+GA5:3:1:437:1704#0/1\t0\tchrM\t8469\t255\t76M\t*\t0\t0\tCTACCTCCCTCACCAAAGCCCATAAAAATAAAAAATTATAACAAACCCTGAGAACCAAAATGAACGAAAATCTGTT\t<>2@BCBCBBCCBCBBBB?CCCBBBCC@BCABCB8CCCCCBCBBCBBBCBBBAB@BBA4ABAB>=B6?B;B>AA@6\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:437:1581#0/1\t16\tchrM\t3892\t255\t76M\t*\t0\t0\tAACCCCCTTCGACCTTGCCGAAGGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTC\t++76<<651556::545:<?54;?;?;???::<:@<@?<>5;=>:;??AB?BB?BABB??BAABABBBBBA?BABB\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:438:1005#0/1\t16\tchrM\t13658\t255\t76M\t*\t0\t0\tACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCCTATTCGCAGGAT\t17-65/9877797@<@?3<)>BAA7ABB:6:;BB6?AA8=77BB;=-?A?>:BABB>@BBCBBBBABBBBBBBCCB\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:439:1607#0/1\t16\tchrM\t9923\t255\t76M\t*\t0\t0\tCCTGATACTGGCATTTTGTAGATGTGGTTTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTT\t1:8?>>8@?6@@?>???@@?@@A@@AAAAB@9ABB@<@BBA@AA@@B?BB@@B@BBBBBBBBABBBBCB?CBBBCB\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:438:1116#0/1\t0\tchrM\t15866\t255\t76M\t*\t0\t0\tAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCT\tBACBCCBBBCCCCCBCCC@BCBCCBCBCCCACCBBBCBBBBBBBABBB:BB:ABBB<BAAB?>9:A@AB:<A>A.@\tXA:i:0\tMD:Z:67C8\tNM:i:1\n+GA5:3:1:437:120#0/1\t16\tchrM\t5354\t255\t76M\t*\t0\t0\tGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAACGTAAAAATAAAATGACAGTTTGAACATNC\t(64/696866<67=9=<9==<3:9?9@>:@:=@AAA?A=B?@>BA?B@AABBBB@BABB@@??BAAABBB@BB:%<\tXA:i:1\tMD:Z:74A1\tNM:i:1\n+GA5:3:1:440:1464#0/1\t0\tchrM\t8745\t255\t76M\t*\t0\t0\tTATCCTTAATCATTTTTATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCATTTACACCAACCACCCAACT\tBABBBBCBBCBBBBBCCBBBBA@AABBABBA@@BA@AAA?4?B@@AA@?A@@?A@@@>@@@@>>?;9?@>@8<00=\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:440:140#0/1\t0\tchrM\t5340\t255\t76M\t*\t0\t0\tCTACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAACGTAAAAATAAAATGA\tBC@BCCCCCCCBBBCABC@@CAC@BBBBBBBBB@BBABBBBBBBBBAA@BBAB@BBB@@A@B?>:@A@@B?@@@72\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:440:823#0/1\t0\tchrM\t3735\t255\t76M\t*\t0\t0\tAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCCTTATCACAAC\t>A<@BBBAABBABBBB@?CA@ABABBB@@BBABAA@BA@B@@@=A=A@A??@A5:6?99??@9<?>;<>8?>:>><\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:441:633#0/1\t0\tchrM\t4253\t255\t76M\t*\t0\t0\tCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGCTTAAACCCCCTTATT\tBCCCCBBBBCCCBBCBCBCBBBCCBBBBBCBAB?BB?BBBBABBBAA=AAAAAA==<><=:>@A?@?==79<@?@?\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:442:1663#0/1\t0\tchrM\t13503\t255\t76M\t*\t0\t0\tTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCAT\tBBCBBBBBBBBBBBBBBBBBBBBBBBBB@BBBBBABBBB@ABBBA?A@A??==?<3<3>6>;@@B@@<>??=@>>4\tXA:i:0\tMD:Z:76\tNM:i:0\n+GA5:3:1:443:1999#0/1\t0\tchrM\t16347\t255\t76M\t*\t0\t0\tAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCA\tBBBBBB9CABBBCBBB@>AABABBB?AAA@??@@6A??B??8;;?@?6:6:8=@9?=?86=:447<8=.=552891\tXA:i:0\tMD:Z:76\tNM:i:0\n'
b
diff -r 000000000000 -r 8c737b8ddc45 test-data/sam2interval_noprintAll.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam2interval_noprintAll.dat Mon Aug 26 15:12:38 2013 -0400
b
@@ -0,0 +1,7 @@
+#chrom start end strand read_name
+ref 6 22 + r001
+ref 8 19 + r002
+ref 8 14 + r003
+ref 15 40 + r004
+ref 28 33 - r003
+ref 36 45 - r001
b
diff -r 000000000000 -r 8c737b8ddc45 test-data/sam2interval_printAll.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam2interval_printAll.dat Mon Aug 26 15:12:38 2013 -0400
b
@@ -0,0 +1,7 @@
+#chrom start end strand read_name
+ref 6 22 + r001 163 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTA *
+ref 8 19 + r002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA *
+ref 8 14 + r003 0 ref 9 30 5H6M * 0 0 AGCTAA * NM:i:1
+ref 15 40 + r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC *
+ref 28 33 - r003 16 ref 29 30 6H5M * 0 0 TAGGC * NM:i:0
+ref 36 45 - r001 83 ref 37 30 9M = 7 -39 CAGCGCCAT *
b
diff -r 000000000000 -r 8c737b8ddc45 test-data/sam2interval_with_unmapped_reads_noprintAll.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam2interval_with_unmapped_reads_noprintAll.dat Mon Aug 26 15:12:38 2013 -0400
b
b'@@ -0,0 +1,927 @@\n+#chrom\tstart\tend\tstrand\tread_name\n+chrM\t6565\t6641\t-\tGA5:3:1:24:1745#0/1\n+chrM\t4096\t4172\t-\tGA5:3:1:23:263#0/1\n+chrM\t14759\t14835\t-\tGA5:3:1:24:1658#0/1\n+chrM\t1244\t1320\t-\tGA5:3:1:24:1108#0/1\n+chrM\t5317\t5393\t+\tGA5:3:1:24:1272#0/1\n+chrM\t10934\t11010\t-\tGA5:3:1:25:1989#0/1\n+chr1\t567534\t567610\t-\tGA5:3:1:25:1877#0/1\n+chrM\t16407\t16483\t-\tGA5:3:1:25:1203#0/1\n+chrM\t8684\t8760\t+\tGA5:3:1:24:1720#0/1\n+chrM\t4952\t5028\t-\tGA5:3:1:25:74#0/1\n+chr1\t567927\t568003\t-\tGA5:3:1:26:129#0/1\n+chrM\t6830\t6906\t-\tGA5:3:1:27:1860#0/1\n+chrM\t7432\t7508\t+\tGA5:3:1:26:303#0/1\n+chrM\t5342\t5418\t-\tGA5:3:1:27:509#0/1\n+chrM\t3199\t3275\t-\tGA5:3:1:27:1876#0/1\n+chrM\t1392\t1468\t+\tGA5:3:1:27:462#0/1\n+chrM\t7909\t7985\t+\tGA5:3:1:27:1826#0/1\n+chr2\t49456941\t49457017\t-\tGA5:3:1:28:1961#0/1\n+chrM\t16239\t16315\t+\tGA5:3:1:25:1902#0/1\n+chrM\t11515\t11591\t-\tGA5:3:1:28:704#0/1\n+chrM\t7343\t7419\t-\tGA5:3:1:29:78#0/1\n+chrM\t5628\t5704\t+\tGA5:3:1:29:65#0/1\n+chrM\t9238\t9314\t+\tGA5:3:1:30:571#0/1\n+chrM\t202\t278\t-\tGA5:3:1:30:1418#0/1\n+chrM\t10065\t10141\t+\tGA5:3:1:31:1998#0/1\n+chrM\t9875\t9951\t+\tGA5:3:1:32:1159#0/1\n+chrM\t9031\t9107\t-\tGA5:3:1:32:1723#0/1\n+chr1\t564778\t564854\t-\tGA5:3:1:32:1365#0/1\n+chrM\t5333\t5409\t-\tGA5:3:1:33:300#0/1\n+chrM\t435\t511\t+\tGA5:3:1:33:625#0/1\n+chrM\t4066\t4142\t+\tGA5:3:1:33:1597#0/1\n+chrM\t6214\t6290\t+\tGA5:3:1:32:1051#0/1\n+chrM\t7217\t7293\t-\tGA5:3:1:33:1476#0/1\n+chrM\t11261\t11337\t-\tGA5:3:1:34:1493#0/1\n+chrM\t14521\t14597\t-\tGA5:3:1:35:718#0/1\n+chrM\t3593\t3669\t-\tGA5:3:1:35:1074#0/1\n+chr17\t22023720\t22023796\t-\tGA5:3:1:35:1946#0/1\n+chrM\t12846\t12922\t-\tGA5:3:1:36:143#0/1\n+chrM\t7047\t7123\t-\tGA5:3:1:38:1282#0/1\n+chrM\t2693\t2769\t+\tGA5:3:1:36:258#0/1\n+chrM\t8062\t8138\t-\tGA5:3:1:38:1622#0/1\n+chrM\t6159\t6235\t+\tGA5:3:1:39:651#0/1\n+chr1\t566417\t566493\t-\tGA5:3:1:40:1756#0/1\n+chrM\t3031\t3107\t-\tGA5:3:1:41:277#0/1\n+chrM\t6276\t6352\t+\tGA5:3:1:41:24#0/1\n+chrM\t14786\t14862\t-\tGA5:3:1:42:1739#0/1\n+chrM\t10589\t10665\t+\tGA5:3:1:44:1997#0/1\n+chrM\t16248\t16324\t-\tGA5:3:1:42:449#0/1\n+chrM\t7319\t7395\t+\tGA5:3:1:44:1202#0/1\n+chrM\t3527\t3603\t+\tGA5:3:1:44:1689#0/1\n+chrM\t13998\t14074\t+\tGA5:3:1:45:1004#0/1\n+chrM\t13229\t13305\t+\tGA5:3:1:46:1996#0/1\n+chrM\t7456\t7532\t-\tGA5:3:1:46:1624#0/1\n+chrM\t5109\t5185\t-\tGA5:3:1:47:148#0/1\n+chrM\t2909\t2985\t+\tGA5:3:1:46:1928#0/1\n+chr13\t110076579\t110076655\t+\tGA5:3:1:48:345#0/1\n+chr1\t566432\t566508\t-\tGA5:3:1:49:208#0/1\n+chr1\t565743\t565819\t-\tGA5:3:1:48:1620#0/1\n+chrM\t3155\t3231\t-\tGA5:3:1:48:1536#0/1\n+chrM\t4657\t4733\t+\tGA5:3:1:49:1377#0/1\n+chrM\t7368\t7444\t+\tGA5:3:1:51:1072#0/1\n+chrM\t5329\t5405\t-\tGA5:3:1:51:78#0/1\n+chrM\t2171\t2247\t-\tGA5:3:1:51:851#0/1\n+chr1\t568282\t568358\t+\tGA5:3:1:52:518#0/1\n+chrM\t4376\t4452\t+\tGA5:3:1:52:796#0/1\n+chrM\t14645\t14721\t-\tGA5:3:1:52:192#0/1\n+chrM\t4694\t4770\t-\tGA5:3:1:55:1412#0/1\n+chrM\t2412\t2488\t+\tGA5:3:1:53:1458#0/1\n+chrM\t9057\t9133\t-\tGA5:3:1:55:1305#0/1\n+chrM\t10527\t10603\t+\tGA5:3:1:55:855#0/1\n+chr1\t564493\t564569\t-\tGA5:3:1:55:451#0/1\n+chrM\t7102\t7178\t+\tGA5:3:1:55:299#0/1\n+chrM\t10896\t10972\t-\tGA5:3:1:55:1194#0/1\n+chrM\t13626\t13702\t+\tGA5:3:1:56:1499#0/1\n+chrM\t5609\t5685\t+\tGA5:3:1:57:959#0/1\n+chrM\t7666\t7742\t-\tGA5:3:1:56:473#0/1\n+chrM\t14730\t14806\t+\tGA5:3:1:56:636#0/1\n+chrM\t10682\t10758\t+\tGA5:3:1:57:1408#0/1\n+chrM\t5069\t5145\t+\tGA5:3:1:58:763#0/1\n+chr1\t566474\t566550\t-\tGA5:3:1:59:688#0/1\n+chrM\t9140\t9216\t+\tGA5:3:1:59:791#0/1\n+chrM\t16340\t16416\t-\tGA5:3:1:59:503#0/1\n+chr1\t565718\t565794\t-\tGA5:3:1:61:1190#0/1\n+chrM\t6732\t6808\t+\tGA5:3:1:58:1920#0/1\n+chrM\t16239\t16315\t+\tGA5:3:1:57:1111#0/1\n+chrM\t3180\t3256\t-\tGA5:3:1:61:1838#0/1\n+chrM\t13789\t13865\t+\tGA5:3:1:61:722#0/1\n+chrM\t10350\t10426\t+\tGA5:3:1:62:1796#0/1\n+chrM\t10519\t10595\t-\tGA5:3:1:62:1469#0/1\n+chrM\t5391\t5467\t+\tGA5:3:1:63:1214#0/1\n+chrM\t6420\t6496\t-\tGA5:3:1:61:594#0/1\n+chrM\t745\t821\t-\tGA5:3:1:62:1922#0/1\n+chrM\t1023\t1099\t+\tGA5:3:1:63:762#0/1\n+chr1\t569720\t569796\t+\tGA5:3:1:61:900#0/1\n+chr1\t569272\t569348\t+\tGA5:3:1:63:462#0/1\n+chrM\t10091\t10167\t+\tGA5:3:1:63:709#0/1\n+chrM\t10937\t11013\t-\tGA5:3:1:63:1686#0/1\n+chrM\t6460\t6536\t-\tGA5:3:1:64:1185#0/1\n+chrM\t12342\t12418\t+\tGA5:3:1:64:1804#0/1\n+chrM\t8694\t8770\t-\tGA5:3:1:67:480#0/1\n+chrM\t2104\t2180\t+\tGA5:3:1:65:500#0/1\n+chrM\t16386\t16462\t-\tGA5:3:1:68:3'..b'856\t-\tGA5:3:1:394:1561#0/1\n+chrM\t3734\t3810\t+\tGA5:3:1:395:207#0/1\n+chr11\t10530095\t10530171\t+\tGA5:3:1:390:112#0/1\n+chrM\t1385\t1461\t+\tGA5:3:1:397:1357#0/1\n+chrM\t9675\t9751\t+\tGA5:3:1:398:1207#0/1\n+chrM\t11084\t11160\t+\tGA5:3:1:398:12#0/1\n+chrM\t8583\t8659\t+\tGA5:3:1:399:941#0/1\n+chrM\t3305\t3381\t+\tGA5:3:1:399:1072#0/1\n+chrM\t8512\t8588\t-\tGA5:3:1:399:1299#0/1\n+chrM\t11287\t11363\t-\tGA5:3:1:400:488#0/1\n+chrM\t4790\t4866\t+\tGA5:3:1:399:542#0/1\n+chrM\t1435\t1511\t-\tGA5:3:1:400:748#0/1\n+chr1\t567255\t567331\t-\tGA5:3:1:401:673#0/1\n+chrM\t566\t642\t-\tGA5:3:1:402:1934#0/1\n+chrM\t13143\t13219\t-\tGA5:3:1:402:1807#0/1\n+chrM\t12171\t12247\t-\tGA5:3:1:402:283#0/1\n+chrM\t11264\t11340\t+\tGA5:3:1:399:1484#0/1\n+chrM\t9209\t9285\t-\tGA5:3:1:402:1896#0/1\n+chrM\t10940\t11016\t-\tGA5:3:1:403:1595#0/1\n+chrM\t11515\t11591\t-\tGA5:3:1:404:1551#0/1\n+chrM\t6685\t6761\t-\tGA5:3:1:405:1600#0/1\n+chrM\t184\t260\t-\tGA5:3:1:403:1870#0/1\n+chrM\t31\t107\t+\tGA5:3:1:404:716#0/1\n+chrM\t11170\t11246\t+\tGA5:3:1:405:1184#0/1\n+chrM\t15089\t15165\t-\tGA5:3:1:406:336#0/1\n+chrM\t8123\t8199\t-\tGA5:3:1:405:1223#0/1\n+chrM\t12144\t12220\t-\tGA5:3:1:406:283#0/1\n+chrM\t5813\t5889\t-\tGA5:3:1:406:97#0/1\n+chrM\t15487\t15563\t+\tGA5:3:1:407:686#0/1\n+chrM\t7975\t8051\t-\tGA5:3:1:406:381#0/1\n+chrM\t14369\t14445\t+\tGA5:3:1:407:315#0/1\n+chrM\t4772\t4848\t+\tGA5:3:1:407:846#0/1\n+chrM\t16483\t16559\t-\tGA5:3:1:407:692#0/1\n+chrM\t8096\t8172\t-\tGA5:3:1:408:761#0/1\n+chrM\t34\t110\t-\tGA5:3:1:408:87#0/1\n+chrM\t10347\t10423\t+\tGA5:3:1:408:1553#0/1\n+chrM\t14112\t14188\t+\tGA5:3:1:409:1433#0/1\n+chrM\t6575\t6651\t+\tGA5:3:1:409:571#0/1\n+chrM\t696\t772\t+\tGA5:3:1:412:395#0/1\n+chrM\t11790\t11866\t-\tGA5:3:1:410:1990#0/1\n+chrM\t16268\t16344\t-\tGA5:3:1:411:1317#0/1\n+chr13\t110076567\t110076643\t+\tGA5:3:1:412:346#0/1\n+chrM\t1687\t1763\t+\tGA5:3:1:412:1157#0/1\n+chrM\t10936\t11012\t-\tGA5:3:1:414:1810#0/1\n+chrM\t3994\t4070\t+\tGA5:3:1:414:1062#0/1\n+chr1\t566609\t566685\t+\tGA5:3:1:414:1864#0/1\n+chrM\t3842\t3918\t+\tGA5:3:1:414:976#0/1\n+chrM\t9306\t9382\t-\tGA5:3:1:413:1773#0/1\n+chrM\t16224\t16300\t-\tGA5:3:1:415:897#0/1\n+chrM\t3903\t3979\t-\tGA5:3:1:416:214#0/1\n+chrM\t2943\t3019\t-\tGA5:3:1:415:549#0/1\n+chrM\t14533\t14609\t-\tGA5:3:1:417:1892#0/1\n+chrM\t11515\t11591\t-\tGA5:3:1:417:1023#0/1\n+chrM\t8482\t8558\t-\tGA5:3:1:418:1783#0/1\n+chrM\t11515\t11591\t-\tGA5:3:1:419:1023#0/1\n+chr1\t565758\t565834\t+\tGA5:3:1:419:1785#0/1\n+chrM\t8485\t8561\t-\tGA5:3:1:419:892#0/1\n+chrM\t3314\t3390\t-\tGA5:3:1:420:168#0/1\n+chrM\t11316\t11392\t-\tGA5:3:1:420:617#0/1\n+chrM\t2370\t2446\t-\tGA5:3:1:421:271#0/1\n+chrM\t14762\t14838\t+\tGA5:3:1:417:1426#0/1\n+chr1\t568483\t568559\t-\tGA5:3:1:423:731#0/1\n+chrM\t3019\t3095\t+\tGA5:3:1:424:54#0/1\n+chr1\t564926\t565002\t-\tGA5:3:1:424:1669#0/1\n+chrM\t12573\t12649\t-\tGA5:3:1:424:594#0/1\n+chrM\t2816\t2892\t+\tGA5:3:1:425:382#0/1\n+chrM\t14111\t14187\t-\tGA5:3:1:425:932#0/1\n+chrM\t2462\t2538\t-\tGA5:3:1:425:1589#0/1\n+chrM\t8469\t8545\t+\tGA5:3:1:421:1967#0/1\n+chrM\t11092\t11168\t-\tGA5:3:1:425:162#0/1\n+chrM\t6473\t6549\t+\tGA5:3:1:426:1489#0/1\n+chrM\t16220\t16296\t-\tGA5:3:1:427:1426#0/1\n+chrM\t4832\t4908\t+\tGA5:3:1:427:1780#0/1\n+chr17\t22023725\t22023801\t-\tGA5:3:1:427:1140#0/1\n+chr1\t567374\t567450\t-\tGA5:3:1:427:420#0/1\n+chrM\t11821\t11897\t+\tGA5:3:1:429:1694#0/1\n+chrM\t8061\t8137\t-\tGA5:3:1:430:378#0/1\n+chr5\t134260199\t134260275\t-\tGA5:3:1:429:1507#0/1\n+chrM\t9877\t9953\t-\tGA5:3:1:431:248#0/1\n+chrM\t11833\t11909\t+\tGA5:3:1:428:1466#0/1\n+chrM\t3658\t3734\t-\tGA5:3:1:432:1011#0/1\n+chrM\t861\t937\t-\tGA5:3:1:432:381#0/1\n+chrM\t5607\t5683\t+\tGA5:3:1:432:886#0/1\n+chrM\t8622\t8698\t-\tGA5:3:1:433:1453#0/1\n+chrM\t9468\t9544\t+\tGA5:3:1:433:1188#0/1\n+chrM\t10362\t10438\t-\tGA5:3:1:434:1170#0/1\n+chrM\t3138\t3214\t+\tGA5:3:1:436:428#0/1\n+chrM\t4320\t4396\t-\tGA5:3:1:435:1711#0/1\n+chr1\t567521\t567597\t-\tGA5:3:1:436:1244#0/1\n+chrM\t8468\t8544\t+\tGA5:3:1:437:1704#0/1\n+chrM\t3891\t3967\t-\tGA5:3:1:437:1581#0/1\n+chrM\t13657\t13733\t-\tGA5:3:1:438:1005#0/1\n+chrM\t9922\t9998\t-\tGA5:3:1:439:1607#0/1\n+chrM\t15865\t15941\t+\tGA5:3:1:438:1116#0/1\n+chrM\t5353\t5429\t-\tGA5:3:1:437:120#0/1\n+chrM\t8744\t8820\t+\tGA5:3:1:440:1464#0/1\n+chrM\t5339\t5415\t+\tGA5:3:1:440:140#0/1\n+chrM\t3734\t3810\t+\tGA5:3:1:440:823#0/1\n+chrM\t4252\t4328\t+\tGA5:3:1:441:633#0/1\n+chrM\t13502\t13578\t+\tGA5:3:1:442:1663#0/1\n+chrM\t16346\t16422\t+\tGA5:3:1:443:1999#0/1\n'
b
diff -r 000000000000 -r 8c737b8ddc45 test-data/sam_bioinf_example.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam_bioinf_example.sam Mon Aug 26 15:12:38 2013 -0400
b
@@ -0,0 +1,6 @@
+r001 163 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTA *
+r002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA *
+r003 0 ref 9 30 5H6M * 0 0 AGCTAA * NM:i:1
+r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC *
+r003 16 ref 29 30 6H5M * 0 0 TAGGC * NM:i:0
+r001 83 ref 37 30 9M = 7 -39 CAGCGCCAT *