annotate mytools/bedClean.py @ 9:87eb5c5ddfe9

Uploaded
author xuebing
date Fri, 09 Mar 2012 20:01:43 -0500
parents f0dc65e7f6c0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
1 import sys
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
2
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
3 def readChrSize(filename):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
4 f = open(filename)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
5 chrSize = {}
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
6 for line in f:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
7 chrom,size = line.strip().split()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
8 chrSize[chrom]=int(size)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
9 f.close()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
10 return chrSize
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
11
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
12 def cleanFile(filename,chrSize,outfile):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
13 f = open(filename)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
14 out = open(outfile,'w')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
15 i = 0
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
16 for line in f:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
17 i = i + 1
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
18 flds = line.strip().split('\t')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
19 if len(flds) < 3:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
20 print 'line',i,'incomplete line:\n',line
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
21 elif chrSize.has_key(flds[0]):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
22 if int(flds[1]) > int(flds[2]):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
23 tmp = flds[1]
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
24 flds[1] = flds[2]
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
25 flds[2] = tmp
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
26 if int( flds[1]) < 0 or int(flds[2]) <0:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
27 print 'line',i,'negative coordinates:\n',line
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
28 elif int(flds[2]) > chrSize[flds[0]]:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
29 print 'line',i,'end larger than chr size:\n',line
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
30 else:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
31 out.write('\t'.join(flds)+'\n')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
32 else:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
33 print 'line',i,'chromosome',flds[0],'not found!\n',line
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
34 f.close()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
35 out.close()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
36
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
37 cleanFile(sys.argv[1],readChrSize(sys.argv[2]),sys.argv[3])