Mercurial > repos > iuc > vsnp_get_snps
annotate vsnp_determine_ref_from_data.py @ 0:ec6e02f4eab7 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
| author | iuc | 
|---|---|
| date | Tue, 16 Nov 2021 08:26:58 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 
0
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
2 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
3 import argparse | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
4 import gzip | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
5 import os | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
6 from collections import OrderedDict | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
7 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
8 import yaml | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
9 from Bio.SeqIO.QualityIO import FastqGeneralIterator | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
10 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
11 OUTPUT_DBKEY_DIR = 'output_dbkey' | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
12 OUTPUT_METRICS_DIR = 'output_metrics' | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
13 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
14 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
15 def get_sample_name(file_path): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
16 base_file_name = os.path.basename(file_path) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
17 if base_file_name.find(".") > 0: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
18 # Eliminate the extension. | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
19 return os.path.splitext(base_file_name)[0] | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
20 return base_file_name | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
21 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
22 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
23 def get_dbkey(dnaprints_dict, key, s): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
24 # dnaprints_dict looks something like this: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
25 # {'brucella': {'NC_002945v4': ['11001110', '11011110', '11001100']} | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
26 # {'bovis': {'NC_006895': ['11111110', '00010010', '01111011']}} | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
27 d = dnaprints_dict.get(key, {}) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
28 for data_table_value, v_list in d.items(): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
29 if s in v_list: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
30 return data_table_value | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
31 return "" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
32 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
33 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
34 def get_dnaprints_dict(dnaprint_fields): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
35 # A dndprint_fields entry looks something liek this. | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
36 # [['AF2122', '/galaxy/tool-data/vsnp/AF2122/dnaprints/NC_002945v4.yml']] | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
37 dnaprints_dict = {} | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
38 for item in dnaprint_fields: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
39 # Here item is a 2-element list of data | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
40 # table components, # value and path. | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
41 value = item[0] | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
42 path = item[1].strip() | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
43 with open(path, "rt") as fh: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
44 # The format of all dnaprints yaml | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
45 # files is something like this: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
46 # brucella: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
47 # - 0111111111111111 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
48 print_dict = yaml.load(fh, Loader=yaml.Loader) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
49 for print_dict_k, print_dict_v in print_dict.items(): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
50 dnaprints_v_dict = dnaprints_dict.get(print_dict_k, {}) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
51 if len(dnaprints_v_dict) > 0: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
52 # dnaprints_dict already contains k (e.g., 'brucella', | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
53 # and dnaprints_v_dict will be a dictionary # that | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
54 # looks something like this: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
55 # {'NC_002945v4': ['11001110', '11011110', '11001100']} | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
56 value_list = dnaprints_v_dict.get(value, []) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
57 value_list = value_list + print_dict_v | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
58 dnaprints_v_dict[value] = value_list | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
59 else: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
60 # dnaprints_v_dict is an empty dictionary. | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
61 dnaprints_v_dict[value] = print_dict_v | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
62 dnaprints_dict[print_dict_k] = dnaprints_v_dict | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
63 # dnaprints_dict looks something like this: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
64 # {'brucella': {'NC_002945v4': ['11001110', '11011110', '11001100']} | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
65 # {'bovis': {'NC_006895': ['11111110', '00010010', '01111011']}} | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
66 return dnaprints_dict | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
67 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
68 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
69 def get_group_and_dbkey(dnaprints_dict, brucella_string, brucella_sum, bovis_string, bovis_sum, para_string, para_sum): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
70 if brucella_sum > 3: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
71 group = "Brucella" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
72 dbkey = get_dbkey(dnaprints_dict, "brucella", brucella_string) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
73 elif bovis_sum > 3: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
74 group = "TB" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
75 dbkey = get_dbkey(dnaprints_dict, "bovis", bovis_string) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
76 elif para_sum >= 1: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
77 group = "paraTB" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
78 dbkey = get_dbkey(dnaprints_dict, "para", para_string) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
79 else: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
80 group = "" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
81 dbkey = "" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
82 return group, dbkey | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
83 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
84 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
85 def get_oligo_dict(): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
86 oligo_dict = {} | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
87 oligo_dict["01_ab1"] = "AATTGTCGGATAGCCTGGCGATAACGACGC" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
88 oligo_dict["02_ab3"] = "CACACGCGGGCCGGAACTGCCGCAAATGAC" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
89 oligo_dict["03_ab5"] = "GCTGAAGCGGCAGACCGGCAGAACGAATAT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
90 oligo_dict["04_mel"] = "TGTCGCGCGTCAAGCGGCGTGAAATCTCTG" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
91 oligo_dict["05_suis1"] = "TGCGTTGCCGTGAAGCTTAATTCGGCTGAT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
92 oligo_dict["06_suis2"] = "GGCAATCATGCGCAGGGCTTTGCATTCGTC" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
93 oligo_dict["07_suis3"] = "CAAGGCAGATGCACATAATCCGGCGACCCG" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
94 oligo_dict["08_ceti1"] = "GTGAATATAGGGTGAATTGATCTTCAGCCG" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
95 oligo_dict["09_ceti2"] = "TTACAAGCAGGCCTATGAGCGCGGCGTGAA" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
96 oligo_dict["10_canis4"] = "CTGCTACATAAAGCACCCGGCGACCGAGTT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
97 oligo_dict["11_canis"] = "ATCGTTTTGCGGCATATCGCTGACCACAGC" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
98 oligo_dict["12_ovis"] = "CACTCAATCTTCTCTACGGGCGTGGTATCC" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
99 oligo_dict["13_ether2"] = "CGAAATCGTGGTGAAGGACGGGACCGAACC" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
100 oligo_dict["14_63B1"] = "CCTGTTTAAAAGAATCGTCGGAACCGCTCT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
101 oligo_dict["15_16M0"] = "TCCCGCCGCCATGCCGCCGAAAGTCGCCGT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
102 oligo_dict["16_mel1b"] = "TCTGTCCAAACCCCGTGACCGAACAATAGA" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
103 oligo_dict["17_tb157"] = "CTCTTCGTATACCGTTCCGTCGTCACCATGGTCCT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
104 oligo_dict["18_tb7"] = "TCACGCAGCCAACGATATTCGTGTACCGCGACGGT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
105 oligo_dict["19_tbbov"] = "CTGGGCGACCCGGCCGACCTGCACACCGCGCATCA" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
106 oligo_dict["20_tb5"] = "CCGTGGTGGCGTATCGGGCCCCTGGATCGCGCCCT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
107 oligo_dict["21_tb2"] = "ATGTCTGCGTAAAGAAGTTCCATGTCCGGGAAGTA" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
108 oligo_dict["22_tb3"] = "GAAGACCTTGATGCCGATCTGGGTGTCGATCTTGA" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
109 oligo_dict["23_tb4"] = "CGGTGTTGAAGGGTCCCCCGTTCCAGAAGCCGGTG" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
110 oligo_dict["24_tb6"] = "ACGGTGATTCGGGTGGTCGACACCGATGGTTCAGA" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
111 oligo_dict["25_para"] = "CCTTTCTTGAAGGGTGTTCG" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
112 oligo_dict["26_para_sheep"] = "CGTGGTGGCGACGGCGGCGGGCCTGTCTAT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
113 oligo_dict["27_para_cattle"] = "TCTCCTCGGTCGGTGATTCGGGGGCGCGGT" | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
114 return oligo_dict | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
115 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
116 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
117 def get_seq_counts(value, fastq_list, gzipped): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
118 count = 0 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
119 for fastq_file in fastq_list: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
120 if gzipped: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
121 with gzip.open(fastq_file, 'rt') as fh: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
122 for title, seq, qual in FastqGeneralIterator(fh): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
123 count += seq.count(value) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
124 else: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
125 with open(fastq_file, 'r') as fh: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
126 for title, seq, qual in FastqGeneralIterator(fh): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
127 count += seq.count(value) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
128 return(value, count) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
129 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
130 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
131 def get_species_counts(fastq_list, gzipped): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
132 count_summary = {} | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
133 oligo_dict = get_oligo_dict() | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
134 for v1 in oligo_dict.values(): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
135 returned_value, count = get_seq_counts(v1, fastq_list, gzipped) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
136 for key, v2 in oligo_dict.items(): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
137 if returned_value == v2: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
138 count_summary.update({key: count}) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
139 count_list = [] | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
140 for v in count_summary.values(): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
141 count_list.append(v) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
142 brucella_sum = sum(count_list[:16]) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
143 bovis_sum = sum(count_list[16:24]) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
144 para_sum = sum(count_list[24:]) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
145 return count_summary, count_list, brucella_sum, bovis_sum, para_sum | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
146 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
147 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
148 def get_species_strings(count_summary): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
149 binary_dictionary = {} | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
150 for k, v in count_summary.items(): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
151 if v > 1: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
152 binary_dictionary.update({k: 1}) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
153 else: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
154 binary_dictionary.update({k: 0}) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
155 binary_dictionary = OrderedDict(sorted(binary_dictionary.items())) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
156 binary_list = [] | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
157 for v in binary_dictionary.values(): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
158 binary_list.append(v) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
159 brucella_binary = binary_list[:16] | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
160 brucella_string = ''.join(str(e) for e in brucella_binary) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
161 bovis_binary = binary_list[16:24] | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
162 bovis_string = ''.join(str(e) for e in bovis_binary) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
163 para_binary = binary_list[24:] | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
164 para_string = ''.join(str(e) for e in para_binary) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
165 return brucella_string, bovis_string, para_string | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
166 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
167 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
168 def output_dbkey(file_name, dbkey, output_file): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
169 # Output the dbkey. | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
170 with open(output_file, "w") as fh: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
171 fh.write("%s" % dbkey) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
172 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
173 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
174 def output_files(fastq_file, count_list, group, dbkey, dbkey_file, metrics_file): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
175 base_file_name = get_sample_name(fastq_file) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
176 output_dbkey(base_file_name, dbkey, dbkey_file) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
177 output_metrics(base_file_name, count_list, group, dbkey, metrics_file) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
178 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
179 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
180 def output_metrics(file_name, count_list, group, dbkey, output_file): | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
181 # Output the metrics. | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
182 with open(output_file, "w") as fh: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
183 fh.write("Sample: %s\n" % file_name) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
184 fh.write("Brucella counts: ") | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
185 for i in count_list[:16]: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
186 fh.write("%d," % i) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
187 fh.write("\nTB counts: ") | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
188 for i in count_list[16:24]: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
189 fh.write("%d," % i) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
190 fh.write("\nPara counts: ") | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
191 for i in count_list[24:]: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
192 fh.write("%d," % i) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
193 fh.write("\nGroup: %s" % group) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
194 fh.write("\ndbkey: %s\n" % dbkey) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
195 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
196 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
197 if __name__ == '__main__': | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
198 parser = argparse.ArgumentParser() | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
199 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
200 parser.add_argument('--dnaprint_fields', action='append', dest='dnaprint_fields', nargs=2, help="List of dnaprints data table value, name and path fields") | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
201 parser.add_argument('--read1', action='store', dest='read1', help='Required: single read') | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
202 parser.add_argument('--read2', action='store', dest='read2', required=False, default=None, help='Optional: paired read') | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
203 parser.add_argument('--gzipped', action='store_true', dest='gzipped', help='Input files are gzipped') | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
204 parser.add_argument('--output_dbkey', action='store', dest='output_dbkey', help='Output reference file') | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
205 parser.add_argument('--output_metrics', action='store', dest='output_metrics', help='Output metrics file') | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
206 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
207 args = parser.parse_args() | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
208 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
209 fastq_list = [args.read1] | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
210 if args.read2 is not None: | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
211 fastq_list.append(args.read2) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
212 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
213 # The value of dnaprint_fields is a list of lists, where each list is | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
214 # the [value, name, path] components of the vsnp_dnaprints data table. | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
215 # The data_manager_vsnp_dnaprints tool assigns the dbkey column from the | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
216 # all_fasta data table to the value column in the vsnp_dnaprints data | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
217 # table to ensure a proper mapping for discovering the dbkey. | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
218 dnaprints_dict = get_dnaprints_dict(args.dnaprint_fields) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
219 | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
220 # Here fastq_list consists of either a single read | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
221 # or a set of paired reads, producing single outputs. | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
222 count_summary, count_list, brucella_sum, bovis_sum, para_sum = get_species_counts(fastq_list, args.gzipped) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
223 brucella_string, bovis_string, para_string = get_species_strings(count_summary) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
224 group, dbkey = get_group_and_dbkey(dnaprints_dict, brucella_string, brucella_sum, bovis_string, bovis_sum, para_string, para_sum) | 
| 
 
ec6e02f4eab7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
 
iuc 
parents:  
diff
changeset
 | 
225 output_files(args.read1, count_list, group, dbkey, dbkey_file=args.output_dbkey, metrics_file=args.output_metrics) | 
