Mercurial > repos > greg > vsnp_build_tables
comparison vsnp_build_tables.py @ 1:b60858c3eb91 draft
Uploaded
| author | greg |
|---|---|
| date | Thu, 30 Apr 2020 15:31:55 -0400 |
| parents | 38a38babcb31 |
| children | abfb861df879 |
comparison
equal
deleted
inserted
replaced
| 0:38a38babcb31 | 1:b60858c3eb91 |
|---|---|
| 13 INPUT_JSON_DIR = 'input_json_dir' | 13 INPUT_JSON_DIR = 'input_json_dir' |
| 14 INPUT_NEWICK_DIR = 'input_newick_dir' | 14 INPUT_NEWICK_DIR = 'input_newick_dir' |
| 15 # Maximum columns allowed in a LibreOffice | 15 # Maximum columns allowed in a LibreOffice |
| 16 # spreadsheet is 1024. Excel allows for | 16 # spreadsheet is 1024. Excel allows for |
| 17 # 16,384 columns, but we'll set the lower | 17 # 16,384 columns, but we'll set the lower |
| 18 # number as the maximum since Galaxy is | 18 # number as the maximum. Some browsers |
| 19 # mostly run on Linux. | 19 # (e.g., Firefox on Linux) are configured |
| 20 MAXCOLS = 10000 | 20 # to use LibreOffice for Excel spreadsheets. |
| 21 MAXCOLS = 1024 | |
| 21 OUTPUT_EXCEL_DIR = 'output_excel_dir' | 22 OUTPUT_EXCEL_DIR = 'output_excel_dir' |
| 22 | 23 |
| 23 | 24 |
| 24 def annotate_table(table_df, group, annotation_dict): | 25 def annotate_table(table_df, group, annotation_dict): |
| 25 for gbk_chrome, pro in list(annotation_dict.items()): | 26 for gbk_chrome, pro in list(annotation_dict.items()): |
| 243 snps_df = pandas.read_json(json_file, orient='split') | 244 snps_df = pandas.read_json(json_file, orient='split') |
| 244 with open(newick_file, 'r') as fh: | 245 with open(newick_file, 'r') as fh: |
| 245 for line in fh: | 246 for line in fh: |
| 246 line = re.sub('[:,]', '\n', line) | 247 line = re.sub('[:,]', '\n', line) |
| 247 line = re.sub('[)(]', '', line) | 248 line = re.sub('[)(]', '', line) |
| 248 line = re.sub('[0-9].*\.[0-9].*\n', '', line) | 249 line = re.sub(r'[0-9].*\.[0-9].*\n', '', line) |
| 249 line = re.sub('root\n', '', line) | 250 line = re.sub('root\n', '', line) |
| 250 sample_order = line.split('\n') | 251 sample_order = line.split('\n') |
| 251 sample_order = list([_f for _f in sample_order if _f]) | 252 sample_order = list([_f for _f in sample_order if _f]) |
| 252 sample_order.insert(0, 'root') | 253 sample_order.insert(0, 'root') |
| 253 tree_order = snps_df.loc[sample_order] | 254 tree_order = snps_df.loc[sample_order] |
