annotate tools/validation/fix_errors.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Fix errors in a dataset.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 For now, only removing erroneous lines is supported.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 usage: %prog input errorsfile output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 -x, --ext: dataset extension (type)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 -m, --methods=N: comma separated list of repair methods
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 from bx.cookbook import doc_optparse
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 from galaxy import util
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 options, args = doc_optparse.parse( __doc__ )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 methods = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 if options.methods: methods = options.methods.split(",")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 ext = options.ext
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 in_file = open(args[0], "r")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 error_file = open(args[1], "r")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 out_file = open(args[2], "w")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 # string_to_object errors
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 error_list = util.string_to_object(error_file.read())
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 # index by error type and then by line number
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 error_lines = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 error_types = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 for error in error_list:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 if error.linenum:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 if error.linenum in error_lines:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 error_lines[error.linenum].append(error)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 error_lines[error.linenum] = [error]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 error_type = error.__class__.__name__
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 if error_type in error_types:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 error_types[error_type].append(error)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 error_types[error_type] = [error]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 linenum = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 for line in in_file:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 linenum += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 # write unless
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 if "lines" in methods:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 if linenum in error_lines:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 line = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 # other processing here?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 if line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 out_file.write(line)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 main()