Mercurial > repos > greg > vsnp_build_tables
comparison vsnp_build_tables.py @ 1:b60858c3eb91 draft
Uploaded
author | greg |
---|---|
date | Thu, 30 Apr 2020 15:31:55 -0400 |
parents | 38a38babcb31 |
children | abfb861df879 |
comparison
equal
deleted
inserted
replaced
0:38a38babcb31 | 1:b60858c3eb91 |
---|---|
13 INPUT_JSON_DIR = 'input_json_dir' | 13 INPUT_JSON_DIR = 'input_json_dir' |
14 INPUT_NEWICK_DIR = 'input_newick_dir' | 14 INPUT_NEWICK_DIR = 'input_newick_dir' |
15 # Maximum columns allowed in a LibreOffice | 15 # Maximum columns allowed in a LibreOffice |
16 # spreadsheet is 1024. Excel allows for | 16 # spreadsheet is 1024. Excel allows for |
17 # 16,384 columns, but we'll set the lower | 17 # 16,384 columns, but we'll set the lower |
18 # number as the maximum since Galaxy is | 18 # number as the maximum. Some browsers |
19 # mostly run on Linux. | 19 # (e.g., Firefox on Linux) are configured |
20 MAXCOLS = 10000 | 20 # to use LibreOffice for Excel spreadsheets. |
21 MAXCOLS = 1024 | |
21 OUTPUT_EXCEL_DIR = 'output_excel_dir' | 22 OUTPUT_EXCEL_DIR = 'output_excel_dir' |
22 | 23 |
23 | 24 |
24 def annotate_table(table_df, group, annotation_dict): | 25 def annotate_table(table_df, group, annotation_dict): |
25 for gbk_chrome, pro in list(annotation_dict.items()): | 26 for gbk_chrome, pro in list(annotation_dict.items()): |
243 snps_df = pandas.read_json(json_file, orient='split') | 244 snps_df = pandas.read_json(json_file, orient='split') |
244 with open(newick_file, 'r') as fh: | 245 with open(newick_file, 'r') as fh: |
245 for line in fh: | 246 for line in fh: |
246 line = re.sub('[:,]', '\n', line) | 247 line = re.sub('[:,]', '\n', line) |
247 line = re.sub('[)(]', '', line) | 248 line = re.sub('[)(]', '', line) |
248 line = re.sub('[0-9].*\.[0-9].*\n', '', line) | 249 line = re.sub(r'[0-9].*\.[0-9].*\n', '', line) |
249 line = re.sub('root\n', '', line) | 250 line = re.sub('root\n', '', line) |
250 sample_order = line.split('\n') | 251 sample_order = line.split('\n') |
251 sample_order = list([_f for _f in sample_order if _f]) | 252 sample_order = list([_f for _f in sample_order if _f]) |
252 sample_order.insert(0, 'root') | 253 sample_order.insert(0, 'root') |
253 tree_order = snps_df.loc[sample_order] | 254 tree_order = snps_df.loc[sample_order] |