diff tools/validation/fix_errors.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/validation/fix_errors.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+"""
+Fix errors in a dataset.
+For now, only removing erroneous lines is supported.
+
+usage: %prog input errorsfile output
+    -x, --ext: dataset extension (type)
+    -m, --methods=N: comma separated list of repair methods
+"""
+
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.cookbook import doc_optparse
+
+from galaxy import util
+
+def main():
+    options, args = doc_optparse.parse( __doc__ )
+    methods = []
+    try:
+        if options.methods: methods = options.methods.split(",")
+    except:
+        pass
+    
+    ext = options.ext
+
+    in_file = open(args[0], "r")
+    error_file = open(args[1], "r")
+    out_file = open(args[2], "w")
+
+    # string_to_object errors
+    error_list = util.string_to_object(error_file.read())
+    # index by error type and then by line number
+    error_lines = {}
+    error_types = {}
+    for error in error_list:
+        if error.linenum:
+            if error.linenum in error_lines:
+                error_lines[error.linenum].append(error)
+            else:
+                error_lines[error.linenum] = [error]
+        error_type = error.__class__.__name__
+        if error_type in error_types:
+            error_types[error_type].append(error)
+        else:
+            error_types[error_type] = [error]
+
+    linenum = 0
+    for line in in_file:
+        linenum += 1
+        # write unless
+        if "lines" in methods:
+            if linenum in error_lines:
+                line = None
+            # other processing here?
+        if line:
+            out_file.write(line)
+    
+if __name__ == "__main__":
+    main()