comparison filter_kw_val.py @ 4:2080e2a4f209 draft

planemo upload commit ef71f7a32bb76c79052b535be1d0beceff6e03a5-dirty
author proteore
date Tue, 05 Feb 2019 08:22:47 -0500
parents 52a7afd01c6d
children 33ca9ba2495a
comparison
equal deleted inserted replaced
3:68cee865018e 4:2080e2a4f209
41 41
42 #Check if a variable is a float or an integer 42 #Check if a variable is a float or an integer
43 def is_number(number_format, n): 43 def is_number(number_format, n):
44 float_format = re.compile(r"^[-]?[0-9][0-9]*.?[0-9]+$") 44 float_format = re.compile(r"^[-]?[0-9][0-9]*.?[0-9]+$")
45 int_format = re.compile(r"^[-]?[0-9][0-9]*$") 45 int_format = re.compile(r"^[-]?[0-9][0-9]*$")
46 scientific_number = re.compile(r"^[-+]?[\d]+\.?[\d]*[Ee](?:[-+]?[\d]+)?$")
46 test = "" 47 test = ""
47 if number_format == "int": 48 if number_format == "int":
48 test = re.match(int_format, n) 49 test = re.match(int_format, n)
49 elif number_format == "float": 50 elif number_format == "float":
50 test = re.match(float_format, n) 51 test = re.match(float_format, n)
52 if test is None : test = re.match(scientific_number,n)
53
51 if test: 54 if test:
52 return True 55 return True
56 else :
57 return False
53 58
54 #Filter the document 59 #Filter the document
55 def filters(args): 60 def filters(args):
56 filename = args.input.split(",")[0] 61 filename = args.input.split(",")[0]
57 header = str_to_bool(args.input.split(",")[1]) 62 header = str_to_bool(args.input.split(",")[1])
73 78
74 if args.value: 79 if args.value:
75 for v in args.value: 80 for v in args.value:
76 v[0] = v[0].replace(",",".") 81 v[0] = v[0].replace(",",".")
77 if is_number("float", v[0]): 82 if is_number("float", v[0]):
78 csv_file = comma_number_to_float(csv_file,v[1],header) 83 csv_file = comma_number_to_float(csv_file,column_from_txt(v[1]),header)
79 results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2]) 84 results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2])
80 else: 85 else:
81 raise ValueError("Please enter a number in filter by value") 86 raise ValueError("Please enter a number in filter by value")
82 87
83 if args.values_range: 88 if args.values_range:
84 for vr in args.values_range: 89 for vr in args.values_range:
85 vr[:2] = [value.replace(",",".") for value in vr[:2]] 90 vr[:2] = [value.replace(",",".") for value in vr[:2]]
86 csv_file = comma_number_to_float(csv_file,vr[2],header) 91 csv_file = comma_number_to_float(csv_file,column_from_txt(vr[2]),header)
87 if (is_number("float", vr[0]) or is_number("int", vr[0])) and (is_number("float",vr[1]) or is_number("int",vr[1])): 92 if (is_number("float", vr[0]) or is_number("int", vr[0])) and (is_number("float",vr[1]) or is_number("int",vr[1])):
88 results_dict = filter_values_range(csv_file, header, results_dict, vr[0], vr[1], vr[2], vr[3]) 93 results_dict = filter_values_range(csv_file, header, results_dict, vr[0], vr[1], vr[2], vr[3])
89 94
90 remaining_lines=[] 95 remaining_lines=[]
91 filtered_lines=[] 96 filtered_lines=[]
141 unsortable_lines = [i for i,line in enumerate(tab) if (line[sort_col]=='' or line[sort_col] == 'NA')] 146 unsortable_lines = [i for i,line in enumerate(tab) if (line[sort_col]=='' or line[sort_col] == 'NA')]
142 unsorted_tab=[ tab[i] for i in unsortable_lines] 147 unsorted_tab=[ tab[i] for i in unsortable_lines]
143 tab= [line for i,line in enumerate(tab) if i not in unsortable_lines] 148 tab= [line for i,line in enumerate(tab) if i not in unsortable_lines]
144 149
145 if only_number(tab,sort_col) and any_float(tab,sort_col) : 150 if only_number(tab,sort_col) and any_float(tab,sort_col) :
151 tab = comma_number_to_float(tab,sort_col,False)
146 tab = sorted(tab, key=lambda row: float(row[sort_col]), reverse=reverse) 152 tab = sorted(tab, key=lambda row: float(row[sort_col]), reverse=reverse)
147 elif only_number(tab,sort_col): 153 elif only_number(tab,sort_col):
148 tab = sorted(tab, key=lambda row: int(row[sort_col]), reverse=reverse) 154 tab = sorted(tab, key=lambda row: int(row[sort_col]), reverse=reverse)
149 else : 155 else :
150 tab = sorted(tab, key=lambda row: row[sort_col], reverse=reverse) 156 tab = sorted(tab, key=lambda row: row[sort_col], reverse=reverse)
165 171
166 return tmp 172 return tmp
167 173
168 #turn into float a column 174 #turn into float a column
169 def comma_number_to_float(csv_file,ncol,header) : 175 def comma_number_to_float(csv_file,ncol,header) :
170 ncol = int(ncol.replace("c","")) - 1
171 if header : 176 if header :
172 tmp=[csv_file[0]] 177 tmp=[csv_file[0]]
173 csv_file=csv_file[1:] 178 csv_file=csv_file[1:]
174 else : 179 else :
175 tmp=[] 180 tmp=[]
188 return True 193 return True
189 194
190 return False 195 return False
191 196
192 def only_number(tab,col) : 197 def only_number(tab,col) :
193
194 for line in tab : 198 for line in tab :
195 if not (is_number("float",line[col].replace(",",".")) or is_number("int",line[col].replace(",","."))) : 199 if not (is_number("float",line[col].replace(",",".")) or is_number("int",line[col].replace(",","."))) :
196 return False 200 return False
197 return True 201 return True
198 202