annotate vsnp_build_tables.py @ 0:12f2b14549f6 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
author iuc
date Wed, 02 Dec 2020 09:11:24 +0000
parents
children b03e88e7bb1d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
2
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
3 import argparse
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
4 import multiprocessing
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
5 import os
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
6 import queue
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
7 import re
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
8
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
9 import pandas
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
10 import pandas.io.formats.excel
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
11 from Bio import SeqIO
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
12
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
13 INPUT_JSON_AVG_MQ_DIR = 'input_json_avg_mq_dir'
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
14 INPUT_JSON_DIR = 'input_json_dir'
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
15 INPUT_NEWICK_DIR = 'input_newick_dir'
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
16 # Maximum columns allowed in a LibreOffice
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
17 # spreadsheet is 1024. Excel allows for
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
18 # 16,384 columns, but we'll set the lower
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
19 # number as the maximum. Some browsers
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
20 # (e.g., Firefox on Linux) are configured
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
21 # to use LibreOffice for Excel spreadsheets.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
22 MAXCOLS = 1024
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
23 OUTPUT_EXCEL_DIR = 'output_excel_dir'
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
24
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
25
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
26 def annotate_table(table_df, group, annotation_dict):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
27 for gbk_chrome, pro in list(annotation_dict.items()):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
28 ref_pos = list(table_df)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
29 ref_series = pandas.Series(ref_pos)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
30 ref_df = pandas.DataFrame(ref_series.str.split(':', expand=True).values, columns=['reference', 'position'])
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
31 all_ref = ref_df[ref_df['reference'] == gbk_chrome]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
32 positions = all_ref.position.to_frame()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
33 # Create an annotation file.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
34 annotation_file = "%s_annotations.csv" % group
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
35 with open(annotation_file, "a") as fh:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
36 for _, row in positions.iterrows():
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
37 pos = row.position
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
38 try:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
39 aaa = pro.iloc[pro.index.get_loc(int(pos))][['chrom', 'locus', 'product', 'gene']]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
40 try:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
41 chrom, name, locus, tag = aaa.values[0]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
42 print("{}:{}\t{}, {}, {}".format(chrom, pos, locus, tag, name), file=fh)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
43 except ValueError:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
44 # If only one annotation for the entire
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
45 # chromosome (e.g., flu) then having [0] fails
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
46 chrom, name, locus, tag = aaa.values
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
47 print("{}:{}\t{}, {}, {}".format(chrom, pos, locus, tag, name), file=fh)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
48 except KeyError:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
49 print("{}:{}\tNo annotated product".format(gbk_chrome, pos), file=fh)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
50 # Read the annotation file into a data frame.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
51 annotations_df = pandas.read_csv(annotation_file, sep='\t', header=None, names=['index', 'annotations'], index_col='index')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
52 # Remove the annotation_file from disk since both
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
53 # cascade and sort tables are built using the file,
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
54 # and it is opened for writing in append mode.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
55 os.remove(annotation_file)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
56 # Process the data.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
57 table_df_transposed = table_df.T
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
58 table_df_transposed.index = table_df_transposed.index.rename('index')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
59 table_df_transposed = table_df_transposed.merge(annotations_df, left_index=True, right_index=True)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
60 table_df = table_df_transposed.T
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
61 return table_df
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
62
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
63
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
64 def excel_formatter(json_file_name, excel_file_name, group, annotation_dict):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
65 pandas.io.formats.excel.header_style = None
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
66 table_df = pandas.read_json(json_file_name, orient='split')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
67 if annotation_dict is not None:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
68 table_df = annotate_table(table_df, group, annotation_dict)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
69 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
70 table_df = table_df.append(pandas.Series(name='no annotations'))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
71 writer = pandas.ExcelWriter(excel_file_name, engine='xlsxwriter')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
72 table_df.to_excel(writer, sheet_name='Sheet1')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
73 writer_book = writer.book
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
74 ws = writer.sheets['Sheet1']
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
75 format_a = writer_book.add_format({'bg_color': '#58FA82'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
76 format_g = writer_book.add_format({'bg_color': '#F7FE2E'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
77 format_c = writer_book.add_format({'bg_color': '#0000FF'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
78 format_t = writer_book.add_format({'bg_color': '#FF0000'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
79 format_normal = writer_book.add_format({'bg_color': '#FDFEFE'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
80 formatlowqual = writer_book.add_format({'font_color': '#C70039', 'bg_color': '#E2CFDD'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
81 format_ambigous = writer_book.add_format({'font_color': '#C70039', 'bg_color': '#E2CFDD'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
82 format_n = writer_book.add_format({'bg_color': '#E2CFDD'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
83 rows, cols = table_df.shape
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
84 ws.set_column(0, 0, 30)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
85 ws.set_column(1, cols, 2.1)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
86 ws.freeze_panes(2, 1)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
87 format_annotation = writer_book.add_format({'font_color': '#0A028C', 'rotation': '-90', 'align': 'top'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
88 # Set last row.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
89 ws.set_row(rows + 1, cols + 1, format_annotation)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
90 # Make sure that row/column locations don't overlap.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
91 ws.conditional_format(rows - 2, 1, rows - 1, cols, {'type': 'cell', 'criteria': '<', 'value': 55, 'format': formatlowqual})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
92 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'cell', 'criteria': '==', 'value': 'B$2', 'format': format_normal})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
93 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'A', 'format': format_a})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
94 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'G', 'format': format_g})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
95 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'C', 'format': format_c})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
96 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'T', 'format': format_t})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
97 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'S', 'format': format_ambigous})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
98 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'Y', 'format': format_ambigous})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
99 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'R', 'format': format_ambigous})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
100 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'W', 'format': format_ambigous})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
101 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'K', 'format': format_ambigous})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
102 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'M', 'format': format_ambigous})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
103 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'N', 'format': format_n})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
104 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': '-', 'format': format_n})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
105 format_rotation = writer_book.add_format({})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
106 format_rotation.set_rotation(90)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
107 for column_num, column_name in enumerate(list(table_df.columns)):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
108 ws.write(0, column_num + 1, column_name, format_rotation)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
109 format_annotation = writer_book.add_format({'font_color': '#0A028C', 'rotation': '-90', 'align': 'top'})
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
110 # Set last row.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
111 ws.set_row(rows, 400, format_annotation)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
112 writer.save()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
113
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
114
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
115 def get_annotation_dict(gbk_file):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
116 gbk_dict = SeqIO.to_dict(SeqIO.parse(gbk_file, "genbank"))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
117 annotation_dict = {}
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
118 tmp_file = "features.csv"
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
119 # Create a file of chromosomes and features.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
120 for chromosome in list(gbk_dict.keys()):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
121 with open(tmp_file, 'w+') as fh:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
122 for feature in gbk_dict[chromosome].features:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
123 if "CDS" in feature.type or "rRNA" in feature.type:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
124 try:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
125 product = feature.qualifiers['product'][0]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
126 except KeyError:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
127 product = None
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
128 try:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
129 locus = feature.qualifiers['locus_tag'][0]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
130 except KeyError:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
131 locus = None
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
132 try:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
133 gene = feature.qualifiers['gene'][0]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
134 except KeyError:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
135 gene = None
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
136 fh.write("%s\t%d\t%d\t%s\t%s\t%s\n" % (chromosome, int(feature.location.start), int(feature.location.end), locus, product, gene))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
137 # Read the chromosomes and features file into a data frame.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
138 df = pandas.read_csv(tmp_file, sep='\t', names=["chrom", "start", "stop", "locus", "product", "gene"])
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
139 # Process the data.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
140 df = df.sort_values(['start', 'gene'], ascending=[True, False])
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
141 df = df.drop_duplicates('start')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
142 pro = df.reset_index(drop=True)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
143 pro.index = pandas.IntervalIndex.from_arrays(pro['start'], pro['stop'], closed='both')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
144 annotation_dict[chromosome] = pro
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
145 return annotation_dict
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
146
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
147
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
148 def get_base_file_name(file_path):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
149 base_file_name = os.path.basename(file_path)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
150 if base_file_name.find(".") > 0:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
151 # Eliminate the extension.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
152 return os.path.splitext(base_file_name)[0]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
153 elif base_file_name.find("_") > 0:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
154 # The dot extension was likely changed to
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
155 # the " character.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
156 items = base_file_name.split("_")
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
157 return "_".join(items[0:-1])
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
158 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
159 return base_file_name
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
160
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
161
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
162 def output_cascade_table(cascade_order, mqdf, group, annotation_dict):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
163 cascade_order_mq = pandas.concat([cascade_order, mqdf], join='inner')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
164 output_table(cascade_order_mq, "cascade", group, annotation_dict)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
165
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
166
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
167 def output_excel(df, type_str, group, annotation_dict, count=None):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
168 # Output the temporary json file that
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
169 # is used by the excel_formatter.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
170 if count is None:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
171 if group is None:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
172 json_file_name = "%s_order_mq.json" % type_str
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
173 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_table.xlsx" % type_str)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
174 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
175 json_file_name = "%s_%s_order_mq.json" % (group, type_str)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
176 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_table.xlsx" % (group, type_str))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
177 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
178 if group is None:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
179 json_file_name = "%s_order_mq_%d.json" % (type_str, count)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
180 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_table_%d.xlsx" % (type_str, count))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
181 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
182 json_file_name = "%s_%s_order_mq_%d.json" % (group, type_str, count)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
183 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_table_%d.xlsx" % (group, type_str, count))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
184 df.to_json(json_file_name, orient='split')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
185 # Output the Excel file.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
186 excel_formatter(json_file_name, excel_file_name, group, annotation_dict)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
187
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
188
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
189 def output_sort_table(cascade_order, mqdf, group, annotation_dict):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
190 sort_df = cascade_order.T
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
191 sort_df['abs_value'] = sort_df.index
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
192 sort_df[['chrom', 'pos']] = sort_df['abs_value'].str.split(':', expand=True)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
193 sort_df = sort_df.drop(['abs_value', 'chrom'], axis=1)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
194 sort_df.pos = sort_df.pos.astype(int)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
195 sort_df = sort_df.sort_values(by=['pos'])
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
196 sort_df = sort_df.drop(['pos'], axis=1)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
197 sort_df = sort_df.T
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
198 sort_order_mq = pandas.concat([sort_df, mqdf], join='inner')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
199 output_table(sort_order_mq, "sort", group, annotation_dict)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
200
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
201
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
202 def output_table(df, type_str, group, annotation_dict):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
203 if isinstance(group, str) and group.startswith("dataset"):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
204 # Inputs are single files, not collections,
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
205 # so input file names are not useful for naming
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
206 # output files.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
207 group_str = None
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
208 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
209 group_str = group
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
210 count = 0
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
211 chunk_start = 0
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
212 chunk_end = 0
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
213 column_count = df.shape[1]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
214 if column_count >= MAXCOLS:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
215 # Here the number of columns is greater than
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
216 # the maximum allowed by Excel, so multiple
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
217 # outputs will be produced.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
218 while column_count >= MAXCOLS:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
219 count += 1
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
220 chunk_end += MAXCOLS
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
221 df_of_type = df.iloc[:, chunk_start:chunk_end]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
222 output_excel(df_of_type, type_str, group_str, annotation_dict, count=count)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
223 chunk_start += MAXCOLS
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
224 column_count -= MAXCOLS
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
225 count += 1
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
226 df_of_type = df.iloc[:, chunk_start:]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
227 output_excel(df_of_type, type_str, group_str, annotation_dict, count=count)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
228 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
229 output_excel(df, type_str, group_str, annotation_dict)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
230
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
231
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
232 def preprocess_tables(task_queue, annotation_dict, timeout):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
233 while True:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
234 try:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
235 tup = task_queue.get(block=True, timeout=timeout)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
236 except queue.Empty:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
237 break
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
238 newick_file, json_file, json_avg_mq_file = tup
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
239 avg_mq_series = pandas.read_json(json_avg_mq_file, typ='series', orient='split')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
240 # Map quality to dataframe.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
241 mqdf = avg_mq_series.to_frame(name='MQ')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
242 mqdf = mqdf.T
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
243 # Get the group.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
244 group = get_base_file_name(newick_file)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
245 snps_df = pandas.read_json(json_file, orient='split')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
246 with open(newick_file, 'r') as fh:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
247 for line in fh:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
248 line = re.sub('[:,]', '\n', line)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
249 line = re.sub('[)(]', '', line)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
250 line = re.sub(r'[0-9].*\.[0-9].*\n', '', line)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
251 line = re.sub('root\n', '', line)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
252 sample_order = line.split('\n')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
253 sample_order = list([_f for _f in sample_order if _f])
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
254 sample_order.insert(0, 'root')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
255 tree_order = snps_df.loc[sample_order]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
256 # Count number of SNPs in each column.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
257 snp_per_column = []
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
258 for column_header in tree_order:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
259 count = 0
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
260 column = tree_order[column_header]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
261 for element in column:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
262 if element != column[0]:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
263 count = count + 1
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
264 snp_per_column.append(count)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
265 row1 = pandas.Series(snp_per_column, tree_order.columns, name="snp_per_column")
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
266 # Count number of SNPS from the
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
267 # top of each column in the table.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
268 snp_from_top = []
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
269 for column_header in tree_order:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
270 count = 0
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
271 column = tree_order[column_header]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
272 # for each element in the column
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
273 # skip the first element
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
274 for element in column[1:]:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
275 if element == column[0]:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
276 count = count + 1
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
277 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
278 break
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
279 snp_from_top.append(count)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
280 row2 = pandas.Series(snp_from_top, tree_order.columns, name="snp_from_top")
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
281 tree_order = tree_order.append([row1])
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
282 tree_order = tree_order.append([row2])
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
283 # In pandas=0.18.1 even this does not work:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
284 # abc = row1.to_frame()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
285 # abc = abc.T --> tree_order.shape (5, 18), abc.shape (1, 18)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
286 # tree_order.append(abc)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
287 # Continue to get error: "*** ValueError: all the input arrays must have same number of dimensions"
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
288 tree_order = tree_order.T
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
289 tree_order = tree_order.sort_values(['snp_from_top', 'snp_per_column'], ascending=[True, False])
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
290 tree_order = tree_order.T
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
291 # Remove snp_per_column and snp_from_top rows.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
292 cascade_order = tree_order[:-2]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
293 # Output the cascade table.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
294 output_cascade_table(cascade_order, mqdf, group, annotation_dict)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
295 # Output the sorted table.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
296 output_sort_table(cascade_order, mqdf, group, annotation_dict)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
297 task_queue.task_done()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
298
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
299
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
300 def set_num_cpus(num_files, processes):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
301 num_cpus = int(multiprocessing.cpu_count())
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
302 if num_files < num_cpus and num_files < processes:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
303 return num_files
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
304 if num_cpus < processes:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
305 half_cpus = int(num_cpus / 2)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
306 if num_files < half_cpus:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
307 return num_files
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
308 return half_cpus
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
309 return processes
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
310
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
311
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
312 if __name__ == '__main__':
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
313 parser = argparse.ArgumentParser()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
314
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
315 parser.add_argument('--input_avg_mq_json', action='store', dest='input_avg_mq_json', required=False, default=None, help='Average MQ json file')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
316 parser.add_argument('--input_newick', action='store', dest='input_newick', required=False, default=None, help='Newick file')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
317 parser.add_argument('--input_snps_json', action='store', dest='input_snps_json', required=False, default=None, help='SNPs json file')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
318 parser.add_argument('--gbk_file', action='store', dest='gbk_file', required=False, default=None, help='Optional gbk file'),
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
319 parser.add_argument('--processes', action='store', dest='processes', type=int, help='User-selected number of processes to use for job splitting')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
320
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
321 args = parser.parse_args()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
322
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
323 if args.gbk_file is not None:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
324 # Create the annotation_dict for annotating
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
325 # the Excel tables.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
326 annotation_dict = get_annotation_dict(args.gbk_file)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
327 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
328 annotation_dict = None
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
329
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
330 # The assumption here is that the list of files
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
331 # in both INPUT_NEWICK_DIR and INPUT_JSON_DIR are
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
332 # named such that they are properly matched if
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
333 # the directories contain more than 1 file (i.e.,
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
334 # hopefully the newick file names and json file names
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
335 # will be something like Mbovis-01D6_* so they can be
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
336 # sorted and properly associated with each other).
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
337 if args.input_newick is not None:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
338 newick_files = [args.input_newick]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
339 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
340 newick_files = []
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
341 for file_name in sorted(os.listdir(INPUT_NEWICK_DIR)):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
342 file_path = os.path.abspath(os.path.join(INPUT_NEWICK_DIR, file_name))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
343 newick_files.append(file_path)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
344 if args.input_snps_json is not None:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
345 json_files = [args.input_snps_json]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
346 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
347 json_files = []
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
348 for file_name in sorted(os.listdir(INPUT_JSON_DIR)):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
349 file_path = os.path.abspath(os.path.join(INPUT_JSON_DIR, file_name))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
350 json_files.append(file_path)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
351 if args.input_avg_mq_json is not None:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
352 json_avg_mq_files = [args.input_avg_mq_json]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
353 else:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
354 json_avg_mq_files = []
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
355 for file_name in sorted(os.listdir(INPUT_JSON_AVG_MQ_DIR)):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
356 file_path = os.path.abspath(os.path.join(INPUT_JSON_AVG_MQ_DIR, file_name))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
357 json_avg_mq_files.append(file_path)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
358
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
359 multiprocessing.set_start_method('spawn')
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
360 queue1 = multiprocessing.JoinableQueue()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
361 queue2 = multiprocessing.JoinableQueue()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
362 num_files = len(newick_files)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
363 cpus = set_num_cpus(num_files, args.processes)
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
364 # Set a timeout for get()s in the queue.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
365 timeout = 0.05
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
366
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
367 for i, newick_file in enumerate(newick_files):
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
368 json_file = json_files[i]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
369 json_avg_mq_file = json_avg_mq_files[i]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
370 queue1.put((newick_file, json_file, json_avg_mq_file))
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
371
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
372 # Complete the preprocess_tables task.
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
373 processes = [multiprocessing.Process(target=preprocess_tables, args=(queue1, annotation_dict, timeout, )) for _ in range(cpus)]
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
374 for p in processes:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
375 p.start()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
376 for p in processes:
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
377 p.join()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
378 queue1.join()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
379
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
380 if queue1.empty():
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
381 queue1.close()
12f2b14549f6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff changeset
382 queue1.join_thread()