comparison vsnp_build_tables.py @ 1:b60858c3eb91 draft

Uploaded
author greg
date Thu, 30 Apr 2020 15:31:55 -0400
parents 38a38babcb31
children abfb861df879
comparison
equal deleted inserted replaced
0:38a38babcb31 1:b60858c3eb91
13 INPUT_JSON_DIR = 'input_json_dir' 13 INPUT_JSON_DIR = 'input_json_dir'
14 INPUT_NEWICK_DIR = 'input_newick_dir' 14 INPUT_NEWICK_DIR = 'input_newick_dir'
15 # Maximum columns allowed in a LibreOffice 15 # Maximum columns allowed in a LibreOffice
16 # spreadsheet is 1024. Excel allows for 16 # spreadsheet is 1024. Excel allows for
17 # 16,384 columns, but we'll set the lower 17 # 16,384 columns, but we'll set the lower
18 # number as the maximum since Galaxy is 18 # number as the maximum. Some browsers
19 # mostly run on Linux. 19 # (e.g., Firefox on Linux) are configured
20 MAXCOLS = 10000 20 # to use LibreOffice for Excel spreadsheets.
21 MAXCOLS = 1024
21 OUTPUT_EXCEL_DIR = 'output_excel_dir' 22 OUTPUT_EXCEL_DIR = 'output_excel_dir'
22 23
23 24
24 def annotate_table(table_df, group, annotation_dict): 25 def annotate_table(table_df, group, annotation_dict):
25 for gbk_chrome, pro in list(annotation_dict.items()): 26 for gbk_chrome, pro in list(annotation_dict.items()):
243 snps_df = pandas.read_json(json_file, orient='split') 244 snps_df = pandas.read_json(json_file, orient='split')
244 with open(newick_file, 'r') as fh: 245 with open(newick_file, 'r') as fh:
245 for line in fh: 246 for line in fh:
246 line = re.sub('[:,]', '\n', line) 247 line = re.sub('[:,]', '\n', line)
247 line = re.sub('[)(]', '', line) 248 line = re.sub('[)(]', '', line)
248 line = re.sub('[0-9].*\.[0-9].*\n', '', line) 249 line = re.sub(r'[0-9].*\.[0-9].*\n', '', line)
249 line = re.sub('root\n', '', line) 250 line = re.sub('root\n', '', line)
250 sample_order = line.split('\n') 251 sample_order = line.split('\n')
251 sample_order = list([_f for _f in sample_order if _f]) 252 sample_order = list([_f for _f in sample_order if _f])
252 sample_order.insert(0, 'root') 253 sample_order.insert(0, 'root')
253 tree_order = snps_df.loc[sample_order] 254 tree_order = snps_df.loc[sample_order]