# HG changeset patch # User Jan Kanis # Date 1403093520 -7200 # Node ID 0c4ac210068b45eac1724244da7d4a14b2aaced8 # Parent fa8a93bdefd7928c3333a9574791c520b38a09d8 handle reverse matches diff -r fa8a93bdefd7 -r 0c4ac210068b blast2html.py --- a/blast2html.py Wed Jun 18 12:25:37 2014 +0200 +++ b/blast2html.py Wed Jun 18 14:12:00 2014 +0200 @@ -87,29 +87,37 @@ step = 60 - def split(txt): - return [txt[i:i+step] for i in range(0, len(txt), step)] - qfrom = int(hsp['Hsp_query-from']) qto = int(hsp['Hsp_query-to']) + qframe = int(hsp['Hsp_query-frame']) hfrom = int(hsp['Hsp_hit-from']) hto = int(hsp['Hsp_hit-to']) + hframe = int(hsp['Hsp_hit-frame']) qseq = hsp.Hsp_qseq.text midline = hsp.Hsp_midline.text hseq = hsp.Hsp_hseq.text + + if not qframe in [1, -1]: + warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}".format(nodeid(hsp), qframe)) + qframe = -1 if qframe < 0 else 1 + if not hframe in [1, -1]: + warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}".format(nodeid(hsp), hframe)) + hframe = -1 if hframe < 0 else 1 - offset = 0 + def split(txt): + return [txt[i:i+step] for i in range(0, len(txt), step)] + for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), step)): yield ( - "Query {:>7} {} {}\n".format(qfrom+offset, qs, qfrom+offset+len(qs)-1) + + "Query {:>7} {} {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) + " {:7} {}\n".format('', mid) + - "Subject{:>7} {} {}".format(hfrom+offset, hs, hfrom+offset+len(hs)-1) + "Subject{:>7} {} {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe) ) - if qfrom+len(qseq)-1 != qto: + if qfrom+(len(qseq)-1)*qframe != qto: warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format( nodeid(hsp), qfrom, qto, len(qseq))) - if hfrom+len(hseq)-1 != hto: + if hfrom+(len(hseq)-1)*hframe != hto: warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format( nodeid(hsp), hfrom, hto, len(hseq))) diff -r fa8a93bdefd7 -r 0c4ac210068b test-data/blast xml example1.html --- a/test-data/blast xml example1.html Wed Jun 18 12:25:37 2014 +0200 +++ b/test-data/blast xml example1.html Wed Jun 18 14:12:00 2014 +0200 @@ -6667,7 +6667,7 @@
Query        2  GTCCGTCG  9
                 ||||||||
-Subject    177  GTCCGTCG  184
+Subject 177 GTCCGTCG 170

@@ -6715,7 +6715,7 @@

Query        2  GTCCGTC  8
                 |||||||
-Subject   2048  GTCCGTC  2054
+Subject 2048 GTCCGTC 2042

@@ -6853,7 +6853,7 @@

Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641
+Subject 1634 CGTGAAGA 1627

@@ -6994,7 +6994,7 @@

Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641
+Subject 1634 CGTGAAGA 1627

@@ -7111,7 +7111,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    298  TCGTGAAGA  306
+Subject 298 TCGTGAAGA 290

@@ -7672,7 +7672,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -7813,7 +7813,7 @@

Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575
+Subject 1569 TGAAGAG 1563
@@ -7924,7 +7924,7 @@
Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575
+Subject 1569 TGAAGAG 1563 @@ -8392,7 +8392,7 @@
Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641
+Subject 1634 CGTGAAGA 1627

@@ -8551,7 +8551,7 @@

Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641
+Subject 1634 CGTGAAGA 1627

@@ -8716,7 +8716,7 @@

Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575
+Subject 1569 TGAAGAG 1563
@@ -8809,7 +8809,7 @@
Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575
+Subject 1569 TGAAGAG 1563 @@ -8878,7 +8878,7 @@
Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641
+Subject 1634 CGTGAAGA 1627

@@ -9043,7 +9043,7 @@

Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575
+Subject 1569 TGAAGAG 1563
@@ -9088,7 +9088,7 @@
Query        4  CCGTCGTGA  12
                 |||||||||
-Subject     19  CCGTCGTGA  27
+Subject 19 CCGTCGTGA 11 @@ -9178,7 +9178,7 @@
Query        4  CCGTCGTGA  12
                 |||||||||
-Subject     19  CCGTCGTGA  27
+Subject 19 CCGTCGTGA 11 @@ -9268,7 +9268,7 @@
Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -9385,7 +9385,7 @@

Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1587  CGTGAAGA  1594
+Subject 1587 CGTGAAGA 1580

@@ -9550,7 +9550,7 @@

Query       10  TGAAGAG  16
                 |||||||
-Subject   1533  TGAAGAG  1539
+Subject 1533 TGAAGAG 1527
@@ -9814,7 +9814,7 @@
Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -9907,7 +9907,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -10000,7 +10000,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -10093,7 +10093,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -10186,7 +10186,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -10279,7 +10279,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -10348,7 +10348,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -10441,7 +10441,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -10534,7 +10534,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -10627,7 +10627,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -10768,7 +10768,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -11263,7 +11263,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -11380,7 +11380,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -11473,7 +11473,7 @@

Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313
+Subject 305 TCGTGAAGA 297

@@ -12961,7 +12961,7 @@

Query        5  CGTCGTGA  12
                 ||||||||
-Subject     52  CGTCGTGA  59
+Subject 52 CGTCGTGA 45