annotate tools/stats/dna_filtering.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 This tool takes a tab-delimited text file as input and creates filters on columns based on certain properties. The tool will skip over invalid lines within the file, informing the user about the number of lines skipped.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 usage: %prog [options]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 -i, --input=i: tabular input file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 -o, --output=o: filtered output file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 -c, --cond=c: conditions to filter on
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 -n, --n_handling=n: how to handle N and X
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 -l, --columns=l: columns
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 -t, --col_types=t: column types
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 #from __future__ import division
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 import os.path, re, string, string, sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 from bx.cookbook import doc_optparse
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 # Older py compatibility
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 set()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 from sets import Set as set
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 #assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 def get_operands( filter_condition ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 # Note that the order of all_operators is important
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 items_to_strip = [ '==', '!=', ' and ', ' or ' ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 for item in items_to_strip:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 if filter_condition.find( item ) >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 filter_condition = filter_condition.replace( item, ' ' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 operands = set( filter_condition.split( ' ' ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 return operands
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 def stop_err( msg ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 sys.stderr.write( msg )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 #Parse Command Line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 options, args = doc_optparse.parse( __doc__ )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 input = options.input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 output = options.output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 cond = options.cond
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 n_handling = options.n_handling
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 columns = options.columns
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 col_types = options.col_types
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 in_columns = int( columns )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 assert col_types #check to see that the column types variable isn't null
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 in_column_types = col_types.split( ',' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 stop_err( "Data does not appear to be tabular. This tool can only be used with tab-delimited data." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 # Unescape if input has been escaped
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 cond_text = cond.replace( '__eq__', '==' ).replace( '__ne__', '!=' ).replace( '__sq__', "'" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 orig_cond_text = cond_text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 # Expand to allow for DNA codes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 dot_letters = [ letter for letter in string.uppercase if letter not in \
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 [ 'A', 'C', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'N', 'R', 'S', 'V', 'W', 'X', 'Y' ] ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 dot_letters.append( '.' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 codes = {'A': [ 'A', 'D', 'H', 'M', 'R', 'V', 'W' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 'C': [ 'C', 'B', 'H', 'M', 'S', 'V', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 'G': [ 'G', 'B', 'D', 'K', 'R', 'S', 'V' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 'T': [ 'T', 'U', 'B', 'D', 'H', 'K', 'W', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 'U': [ 'T', 'U', 'B', 'D', 'H', 'K', 'W', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 'K': [ 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'R', 'S', 'V', 'W', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 'M': [ 'A', 'C', 'B', 'D', 'H', 'M', 'R', 'S', 'V', 'W', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 'R': [ 'A', 'G', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 'Y': [ 'C', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'S', 'V', 'W', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 'S': [ 'C', 'G', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 'W': [ 'A', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'R', 'V', 'W', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 'B': [ 'C', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 'V': [ 'A', 'C', 'G', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 'H': [ 'A', 'C', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 'D': [ 'A', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'R', 'S', 'V', 'W', 'Y' ],
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 '.': dot_letters,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 '-': [ '-' ]}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 # Add handling for N and X
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 if n_handling == "all":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 codes[ 'N' ] = [ 'A', 'C', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'N', 'R', 'S', 'V', 'W', 'X', 'Y' ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 codes[ 'X' ] = [ 'A', 'C', 'G', 'T', 'U', 'B', 'D', 'H', 'K', 'M', 'N', 'R', 'S', 'V', 'W', 'X', 'Y' ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 for code in codes.keys():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 if code != '.' and code != '-':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 codes[code].append( 'N' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 codes[code].append( 'X' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 codes[ 'N' ] = dot_letters
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 codes[ 'X' ] = dot_letters
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 codes[ '.' ].extend( [ 'N', 'X' ] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 # Expand conditions to allow for DNA codes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 match_replace = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 pat = re.compile( 'c\d+\s*[!=]=\s*[\w\d"\'+-.]+' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 matches = pat.findall( cond_text )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 for match in matches:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 if match.find( 'chr' ) >= 0 or match.find( 'scaffold' ) >= 0 or match.find( '+' ) >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 if match.find( '==' ) >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 match_parts = match.split( '==' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 elif match.find( '!=' ) >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 match_parts = match.split( '!=' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 raise Exception, "The operators '==' and '!=' were not found."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 left = match_parts[0].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 right = match_parts[1].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 new_match = "(%s)" % ( match )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 elif match.find( '==' ) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 match_parts = match.split( '==' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 left = match_parts[0].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 right = match_parts[1].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 new_match = '(%s in codes[%s] and %s in codes[%s])' % ( left, right, right, left )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 elif match.find( '!=' ) > 0 :
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 match_parts = match.split( '!=' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 left = match_parts[0].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 right = match_parts[1].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 new_match = '(%s not in codes[%s] or %s not in codes[%s])' % ( left, right, right, left )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 raise Exception, "The operators '==' and '!=' were not found."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 assert left.startswith( 'c' ), 'The column names should start with c (lowercase)'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 if right.find( "'" ) >= 0 or right.find( '"' ) >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 test = right.replace( "'", '' ).replace( '"', '' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 assert test in string.uppercase or test.find( '+' ) >= 0 or test.find( '.' ) >= 0 or test.find( '-' ) >= 0\
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 or test.startswith( 'chr' ) or test.startswith( 'scaffold' ), \
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 'The value to search for should be a valid base, code, plus sign, chromosome (like "chr1") or scaffold (like "scaffold5"). ' \
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 'Use the general filter tool to filter on anything else first'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 assert right.startswith( 'c' ), 'The column names should start with c (lowercase)'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 match_replace[match] = new_match
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 if len( match_replace.keys() ) == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 raise Exception, 'There do not appear to be any valid conditions'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 for match in match_replace.keys():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 cond_text = cond_text.replace( match, match_replace[match] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 stop_err( "At least one of your conditions is invalid. Make sure to use only '!=' or '==', valid column numbers, and valid base values.\n" + str(e) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 # Attempt to determine if the condition includes executable stuff and, if so, exit
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 secured = dir()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 operands = get_operands( cond_text )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 for operand in operands:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 check = int( operand )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 if operand in secured:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 stop_err( "Illegal value '%s' in condition '%s'" % ( operand, cond_text ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 # Prepare the column variable names and wrappers for column data types
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 cols, type_casts = [], []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 for col in range( 1, in_columns + 1 ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 col_name = "c%d" % col
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 cols.append( col_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 col_type = in_column_types[ col - 1 ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 type_cast = "%s(%s)" % ( col_type, col_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 type_casts.append( type_cast )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 col_str = ', '.join( cols ) # 'c1, c2, c3, c4'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 type_cast_str = ', '.join( type_casts ) # 'str(c1), int(c2), int(c3), str(c4)'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 assign = "%s = line.split( '\\t' )" % col_str
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 wrap = "%s = %s" % ( col_str, type_cast_str )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 skipped_lines = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 first_invalid_line = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 invalid_line = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 lines_kept = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 total_lines = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 out = open( output, 'wt' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 # Read and filter input file, skipping invalid lines
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 code = '''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 for i, line in enumerate( file( input ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 total_lines += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 line = line.rstrip( '\\r\\n' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 if not line or line.startswith( '#' ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 skipped_lines += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 if not invalid_line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 first_invalid_line = i + 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 invalid_line = line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 %s = line.split( '\\t' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 %s = %s
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 if %s:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 lines_kept += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 print >> out, line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 skipped_lines += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 if not invalid_line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 first_invalid_line = i + 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 invalid_line = line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 ''' % ( col_str, col_str, type_cast_str, cond_text )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 valid_filter = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 exec code
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 if str( e ).startswith( 'invalid syntax' ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 valid_filter = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 stop_err( 'Filter condition "%s" likely invalid. See tool tips, syntax and examples.' % orig_cond_text + ' '+str(e))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 stop_err( str( e ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 if valid_filter:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207 valid_lines = total_lines - skipped_lines
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208 print 'Filtering with %s, ' % orig_cond_text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 if valid_lines > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 print 'kept %4.2f%% of %d lines.' % ( 100.0*lines_kept/valid_lines, total_lines )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212 print 'Possible invalid filter condition "%s" or non-existent column referenced. See tool tips, syntax and examples.' % orig_cond_text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 if skipped_lines > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 print 'Skipped %d invalid lines starting at line #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216 if __name__ == "__main__" : __main__()