annotate vsnp_determine_ref_from_data.py @ 3:ae7b1b97a2a0 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 9e9e1db5b9ca53d690ed795aab1e7437f0a1c864"
author iuc
date Sat, 09 Apr 2022 09:01:11 +0000
parents ec6e02f4eab7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
2
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
3 import argparse
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
4 import gzip
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
5 import os
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
6 from collections import OrderedDict
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
7
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
8 import yaml
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
9 from Bio.SeqIO.QualityIO import FastqGeneralIterator
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
10
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
11 OUTPUT_DBKEY_DIR = 'output_dbkey'
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
12 OUTPUT_METRICS_DIR = 'output_metrics'
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
13
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
14
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
15 def get_sample_name(file_path):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
16 base_file_name = os.path.basename(file_path)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
17 if base_file_name.find(".") > 0:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
18 # Eliminate the extension.
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
19 return os.path.splitext(base_file_name)[0]
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
20 return base_file_name
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
21
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
22
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
23 def get_dbkey(dnaprints_dict, key, s):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
24 # dnaprints_dict looks something like this:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
25 # {'brucella': {'NC_002945v4': ['11001110', '11011110', '11001100']}
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
26 # {'bovis': {'NC_006895': ['11111110', '00010010', '01111011']}}
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
27 d = dnaprints_dict.get(key, {})
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
28 for data_table_value, v_list in d.items():
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
29 if s in v_list:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
30 return data_table_value
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
31 return ""
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
32
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
33
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
34 def get_dnaprints_dict(dnaprint_fields):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
35 # A dndprint_fields entry looks something liek this.
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
36 # [['AF2122', '/galaxy/tool-data/vsnp/AF2122/dnaprints/NC_002945v4.yml']]
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
37 dnaprints_dict = {}
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
38 for item in dnaprint_fields:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
39 # Here item is a 2-element list of data
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
40 # table components, # value and path.
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
41 value = item[0]
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
42 path = item[1].strip()
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
43 with open(path, "rt") as fh:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
44 # The format of all dnaprints yaml
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
45 # files is something like this:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
46 # brucella:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
47 # - 0111111111111111
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
48 print_dict = yaml.load(fh, Loader=yaml.Loader)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
49 for print_dict_k, print_dict_v in print_dict.items():
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
50 dnaprints_v_dict = dnaprints_dict.get(print_dict_k, {})
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
51 if len(dnaprints_v_dict) > 0:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
52 # dnaprints_dict already contains k (e.g., 'brucella',
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
53 # and dnaprints_v_dict will be a dictionary # that
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
54 # looks something like this:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
55 # {'NC_002945v4': ['11001110', '11011110', '11001100']}
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
56 value_list = dnaprints_v_dict.get(value, [])
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
57 value_list = value_list + print_dict_v
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
58 dnaprints_v_dict[value] = value_list
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
59 else:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
60 # dnaprints_v_dict is an empty dictionary.
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
61 dnaprints_v_dict[value] = print_dict_v
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
62 dnaprints_dict[print_dict_k] = dnaprints_v_dict
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
63 # dnaprints_dict looks something like this:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
64 # {'brucella': {'NC_002945v4': ['11001110', '11011110', '11001100']}
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
65 # {'bovis': {'NC_006895': ['11111110', '00010010', '01111011']}}
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
66 return dnaprints_dict
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
67
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
68
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
69 def get_group_and_dbkey(dnaprints_dict, brucella_string, brucella_sum, bovis_string, bovis_sum, para_string, para_sum):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
70 if brucella_sum > 3:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
71 group = "Brucella"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
72 dbkey = get_dbkey(dnaprints_dict, "brucella", brucella_string)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
73 elif bovis_sum > 3:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
74 group = "TB"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
75 dbkey = get_dbkey(dnaprints_dict, "bovis", bovis_string)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
76 elif para_sum >= 1:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
77 group = "paraTB"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
78 dbkey = get_dbkey(dnaprints_dict, "para", para_string)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
79 else:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
80 group = ""
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
81 dbkey = ""
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
82 return group, dbkey
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
83
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
84
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
85 def get_oligo_dict():
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
86 oligo_dict = {}
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
87 oligo_dict["01_ab1"] = "AATTGTCGGATAGCCTGGCGATAACGACGC"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
88 oligo_dict["02_ab3"] = "CACACGCGGGCCGGAACTGCCGCAAATGAC"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
89 oligo_dict["03_ab5"] = "GCTGAAGCGGCAGACCGGCAGAACGAATAT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
90 oligo_dict["04_mel"] = "TGTCGCGCGTCAAGCGGCGTGAAATCTCTG"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
91 oligo_dict["05_suis1"] = "TGCGTTGCCGTGAAGCTTAATTCGGCTGAT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
92 oligo_dict["06_suis2"] = "GGCAATCATGCGCAGGGCTTTGCATTCGTC"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
93 oligo_dict["07_suis3"] = "CAAGGCAGATGCACATAATCCGGCGACCCG"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
94 oligo_dict["08_ceti1"] = "GTGAATATAGGGTGAATTGATCTTCAGCCG"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
95 oligo_dict["09_ceti2"] = "TTACAAGCAGGCCTATGAGCGCGGCGTGAA"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
96 oligo_dict["10_canis4"] = "CTGCTACATAAAGCACCCGGCGACCGAGTT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
97 oligo_dict["11_canis"] = "ATCGTTTTGCGGCATATCGCTGACCACAGC"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
98 oligo_dict["12_ovis"] = "CACTCAATCTTCTCTACGGGCGTGGTATCC"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
99 oligo_dict["13_ether2"] = "CGAAATCGTGGTGAAGGACGGGACCGAACC"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
100 oligo_dict["14_63B1"] = "CCTGTTTAAAAGAATCGTCGGAACCGCTCT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
101 oligo_dict["15_16M0"] = "TCCCGCCGCCATGCCGCCGAAAGTCGCCGT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
102 oligo_dict["16_mel1b"] = "TCTGTCCAAACCCCGTGACCGAACAATAGA"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
103 oligo_dict["17_tb157"] = "CTCTTCGTATACCGTTCCGTCGTCACCATGGTCCT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
104 oligo_dict["18_tb7"] = "TCACGCAGCCAACGATATTCGTGTACCGCGACGGT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
105 oligo_dict["19_tbbov"] = "CTGGGCGACCCGGCCGACCTGCACACCGCGCATCA"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
106 oligo_dict["20_tb5"] = "CCGTGGTGGCGTATCGGGCCCCTGGATCGCGCCCT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
107 oligo_dict["21_tb2"] = "ATGTCTGCGTAAAGAAGTTCCATGTCCGGGAAGTA"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
108 oligo_dict["22_tb3"] = "GAAGACCTTGATGCCGATCTGGGTGTCGATCTTGA"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
109 oligo_dict["23_tb4"] = "CGGTGTTGAAGGGTCCCCCGTTCCAGAAGCCGGTG"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
110 oligo_dict["24_tb6"] = "ACGGTGATTCGGGTGGTCGACACCGATGGTTCAGA"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
111 oligo_dict["25_para"] = "CCTTTCTTGAAGGGTGTTCG"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
112 oligo_dict["26_para_sheep"] = "CGTGGTGGCGACGGCGGCGGGCCTGTCTAT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
113 oligo_dict["27_para_cattle"] = "TCTCCTCGGTCGGTGATTCGGGGGCGCGGT"
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
114 return oligo_dict
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
115
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
116
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
117 def get_seq_counts(value, fastq_list, gzipped):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
118 count = 0
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
119 for fastq_file in fastq_list:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
120 if gzipped:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
121 with gzip.open(fastq_file, 'rt') as fh:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
122 for title, seq, qual in FastqGeneralIterator(fh):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
123 count += seq.count(value)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
124 else:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
125 with open(fastq_file, 'r') as fh:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
126 for title, seq, qual in FastqGeneralIterator(fh):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
127 count += seq.count(value)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
128 return(value, count)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
129
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
130
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
131 def get_species_counts(fastq_list, gzipped):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
132 count_summary = {}
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
133 oligo_dict = get_oligo_dict()
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
134 for v1 in oligo_dict.values():
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
135 returned_value, count = get_seq_counts(v1, fastq_list, gzipped)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
136 for key, v2 in oligo_dict.items():
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
137 if returned_value == v2:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
138 count_summary.update({key: count})
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
139 count_list = []
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
140 for v in count_summary.values():
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
141 count_list.append(v)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
142 brucella_sum = sum(count_list[:16])
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
143 bovis_sum = sum(count_list[16:24])
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
144 para_sum = sum(count_list[24:])
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
145 return count_summary, count_list, brucella_sum, bovis_sum, para_sum
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
146
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
147
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
148 def get_species_strings(count_summary):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
149 binary_dictionary = {}
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
150 for k, v in count_summary.items():
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
151 if v > 1:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
152 binary_dictionary.update({k: 1})
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
153 else:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
154 binary_dictionary.update({k: 0})
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
155 binary_dictionary = OrderedDict(sorted(binary_dictionary.items()))
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
156 binary_list = []
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
157 for v in binary_dictionary.values():
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
158 binary_list.append(v)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
159 brucella_binary = binary_list[:16]
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
160 brucella_string = ''.join(str(e) for e in brucella_binary)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
161 bovis_binary = binary_list[16:24]
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
162 bovis_string = ''.join(str(e) for e in bovis_binary)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
163 para_binary = binary_list[24:]
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
164 para_string = ''.join(str(e) for e in para_binary)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
165 return brucella_string, bovis_string, para_string
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
166
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
167
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
168 def output_dbkey(file_name, dbkey, output_file):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
169 # Output the dbkey.
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
170 with open(output_file, "w") as fh:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
171 fh.write("%s" % dbkey)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
172
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
173
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
174 def output_files(fastq_file, count_list, group, dbkey, dbkey_file, metrics_file):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
175 base_file_name = get_sample_name(fastq_file)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
176 output_dbkey(base_file_name, dbkey, dbkey_file)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
177 output_metrics(base_file_name, count_list, group, dbkey, metrics_file)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
178
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
179
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
180 def output_metrics(file_name, count_list, group, dbkey, output_file):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
181 # Output the metrics.
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
182 with open(output_file, "w") as fh:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
183 fh.write("Sample: %s\n" % file_name)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
184 fh.write("Brucella counts: ")
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
185 for i in count_list[:16]:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
186 fh.write("%d," % i)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
187 fh.write("\nTB counts: ")
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
188 for i in count_list[16:24]:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
189 fh.write("%d," % i)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
190 fh.write("\nPara counts: ")
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
191 for i in count_list[24:]:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
192 fh.write("%d," % i)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
193 fh.write("\nGroup: %s" % group)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
194 fh.write("\ndbkey: %s\n" % dbkey)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
195
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
196
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
197 if __name__ == '__main__':
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
198 parser = argparse.ArgumentParser()
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
199
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
200 parser.add_argument('--dnaprint_fields', action='append', dest='dnaprint_fields', nargs=2, help="List of dnaprints data table value, name and path fields")
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
201 parser.add_argument('--read1', action='store', dest='read1', help='Required: single read')
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
202 parser.add_argument('--read2', action='store', dest='read2', required=False, default=None, help='Optional: paired read')
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
203 parser.add_argument('--gzipped', action='store_true', dest='gzipped', help='Input files are gzipped')
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
204 parser.add_argument('--output_dbkey', action='store', dest='output_dbkey', help='Output reference file')
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
205 parser.add_argument('--output_metrics', action='store', dest='output_metrics', help='Output metrics file')
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
206
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
207 args = parser.parse_args()
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
208
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
209 fastq_list = [args.read1]
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
210 if args.read2 is not None:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
211 fastq_list.append(args.read2)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
212
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
213 # The value of dnaprint_fields is a list of lists, where each list is
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
214 # the [value, name, path] components of the vsnp_dnaprints data table.
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
215 # The data_manager_vsnp_dnaprints tool assigns the dbkey column from the
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
216 # all_fasta data table to the value column in the vsnp_dnaprints data
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
217 # table to ensure a proper mapping for discovering the dbkey.
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
218 dnaprints_dict = get_dnaprints_dict(args.dnaprint_fields)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
219
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
220 # Here fastq_list consists of either a single read
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
221 # or a set of paired reads, producing single outputs.
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
222 count_summary, count_list, brucella_sum, bovis_sum, para_sum = get_species_counts(fastq_list, args.gzipped)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
223 brucella_string, bovis_string, para_string = get_species_strings(count_summary)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
224 group, dbkey = get_group_and_dbkey(dnaprints_dict, brucella_string, brucella_sum, bovis_string, bovis_sum, para_string, para_sum)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
225 output_files(args.read1, count_list, group, dbkey, dbkey_file=args.output_dbkey, metrics_file=args.output_metrics)