0
|
1 #!/usr/bin/python
|
|
2
|
|
3 import os.path
|
|
4 import sys
|
|
5
|
|
6 class bed:
|
|
7 def __init__(self):
|
|
8 self.numberTargets = 0
|
|
9 self.referenceSequences = {}
|
|
10 self.referenceSequenceList = []
|
|
11
|
|
12 def openBed(self, filename):
|
|
13 if filename == "stdin": self.filehandle = sys.stdin
|
|
14 else:
|
|
15 try: self.filehandle = open(filename,"r")
|
|
16 except IOError:
|
|
17 print >> sys.stderr, "Failed to find file: ",filename
|
|
18 exit(1)
|
|
19
|
|
20 # Get a bed record.
|
|
21 def getRecord(self):
|
|
22 self.record = self.filehandle.readline()
|
|
23 if not self.record: return False
|
|
24
|
|
25 self.numberTargets = self.numberTargets + 1
|
|
26 self.ref = ""
|
|
27 self.start = 0
|
|
28 self.end = 0
|
|
29
|
|
30 # bed file should be 0-based, half-open, so the start coordinate
|
|
31 # must be that in the bed file plus one.
|
|
32 entries = self.record.rstrip("\n").split("\t")
|
|
33 self.referenceSequence = entries[0]
|
|
34
|
|
35 # Add the reference sequence to the dictionary. If it didn't previously
|
|
36 # exist append the reference sequence to the end of the list as well.
|
|
37 # This ensures that the order in which the reference sequences appeared
|
|
38 # in the header can be preserved.
|
|
39 if self.referenceSequence not in self.referenceSequences:
|
|
40 self.referenceSequences[self.referenceSequence] = True
|
|
41 self.referenceSequenceList.append(self.referenceSequence)
|
|
42
|
|
43 try: self.start = int(entries[1]) + 1
|
|
44 except:
|
|
45 text = "start position need is not an integer"
|
|
46 self.generalError(text, "start", entries[1])
|
|
47
|
|
48 try: self.end = int(entries[2])
|
|
49 except:
|
|
50 text = "end position need is not an integer"
|
|
51 self.generalError(text, "end", entries[2])
|
|
52
|
|
53 # Check that the record is a valid interval.
|
|
54 if self.end - self.start < 0:
|
|
55 print >> sys.stderr, "Invalid target interval:\n\t", self.record
|
|
56 exit(1)
|
|
57
|
|
58 return True
|
|
59
|
|
60 # Parse through the bed file until the correct reference sequence is
|
|
61 # encountered and the end position is greater than or equal to that requested.
|
|
62 def parseBed(self, referenceSequence, position):
|
|
63 success = True
|
|
64 if self.referenceSequence != referenceSequence:
|
|
65 while self.referenceSequence != referenceSequence and success: success = self.getRecord()
|
|
66
|
|
67 while self.referenceSequence == referenceSequence and self.end < position and success: success = self.getRecord()
|
|
68
|
|
69 return success
|
|
70
|
|
71 # Close the bed file.
|
|
72 def closeBed(self, filename):
|
|
73 self.filehandle.close()
|
|
74
|
|
75 # Define error messages for different handled errors.
|
|
76 def generalError(self, text, field, fieldValue):
|
|
77 print >> sys.stderr, "\nError encountered when attempting to read:"
|
|
78 if field != "": print >> sys.stderr, "\t", field, ": ", fieldValue
|
|
79 print >> sys.stderr, "\n", text
|
|
80 exit(1)
|