Mercurial > repos > iuc > data_manager_primer_scheme_bedfiles
comparison data_manager/install_primer_scheme_bedfiles.py @ 6:dd451e45681c draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_primer_scheme_bedfiles commit 4880dcfcdddd9ed8415ccde01b2f8e2c28dab5c3"
author | iuc |
---|---|
date | Tue, 16 Nov 2021 08:22:08 +0000 |
parents | 157a5c9ea72a |
children |
comparison
equal
deleted
inserted
replaced
5:be70da9dc013 | 6:dd451e45681c |
---|---|
16 import requests | 16 import requests |
17 | 17 |
18 DATA_TABLE_NAME = "primer_scheme_bedfiles" | 18 DATA_TABLE_NAME = "primer_scheme_bedfiles" |
19 | 19 |
20 | 20 |
21 def write_artic_style_bed(input_file, bed_output_filename): | 21 def convert_and_write_bed(input_file, bed_output_filename, scheme_name, force_string=True): |
22 with open(bed_output_filename, "w") as bed_output_file: | 22 with open(bed_output_filename, "w") as bed_output_file: |
23 for line in input_file: | 23 for line in input_file: |
24 fields = line.split("\t") | 24 fields = line.strip().split("\t") |
25 if len(fields) < 6: | 25 if "Midnight" in scheme_name: |
26 # too short to encode the strand format | 26 # Midnight primers are distributed in a tabular file, not a BED file |
27 exit("invalid format in BED file: {}".format(line.rstrip())) | 27 if line.startswith("Primer Name"): |
28 continue | |
29 if len(fields) != 8: | |
30 exit("Unexpected format in Midnight primer file: {}".format(line.rstrip())) | |
31 (primer_name, _, pool, _, _, _, start, end) = fields | |
32 strand = '+' if primer_name.endswith('LEFT') else '-' | |
33 if strand == '-': | |
34 start, end = end, start | |
35 fields = ["MN908947.3", start, end, primer_name, pool, strand] | |
36 else: | |
37 if len(fields) < 5: | |
38 # too short to encode the "ARTIC style BED" format | |
39 exit("invalid format in BED file: {}".format(line.rstrip())) | |
40 # 'BED' format used by ARTIC pipeline uses | |
41 # chrom start end primer_name pool_name | |
42 # see this: https://github.com/artic-network/fieldbioinformatics/blob/master/artic/vcftagprimersites.py#L76 | |
43 # for ARTIC minion and | |
44 # this: https://github.com/andersen-lab/ivar/blob/master/src/primer_bed.cpp#L125 | |
45 # for ivar trim (ivar trim treats the file as BED following the standard but also allows the ARTIC format) | |
28 try: | 46 try: |
29 # try and parse field 5 as a number | 47 float(fields[4]) |
30 score = float(fields[4]) | |
31 except ValueError: | 48 except ValueError: |
32 # Alright, this is an ARTIC-style bed, | 49 # this is a string, we can leave it as is |
33 # which is actually against the specs, but required by the | |
34 # ARTIC pipeline. | |
35 pass | 50 pass |
36 else: | 51 else: |
37 # This is a regular bed with numbers in the score column. | 52 # ensure that it is forced to be a string |
38 # We need to "fix" it for the ARTIC pipeline. | 53 fields[4] = '_{0}'.format(fields[4]) |
39 fields[4] = '_{0}'.format(score) | 54 print('\t'.join(fields), file=bed_output_file) |
40 bed_output_file.write("\t".join(fields)) | |
41 | 55 |
42 | 56 |
43 def fetch_artic_primers(output_directory, primers): | 57 def fetch_primers(output_directory, primers): |
44 primer_sets = { | 58 primer_sets = { |
45 "SARS-CoV-2-ARTICv1": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed", | 59 "SARS-CoV-2-ARTICv1": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed", |
46 "SARS-CoV-2-ARTICv2": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed", | 60 "SARS-CoV-2-ARTICv2": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed", |
47 "SARS-CoV-2-ARTICv3": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed", | 61 "SARS-CoV-2-ARTICv3": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed", |
62 "SARS-CoV-2-ARTICv4": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V4/SARS-CoV-2.scheme.bed", | |
63 "VarSkip-V1a": "https://raw.githubusercontent.com/nebiolabs/VarSkip/main/schemes/NEB_VarSkip/V1a/NEB_VarSkip.scheme.bed", | |
64 "Midnight-v1": "https://zenodo.org/record/3897530/files/SARS-CoV-2_primer_sets_RBK004_nanopore_sequencing.tab?download=1" | |
48 } | 65 } |
49 | 66 |
50 data = [] | 67 data = [] |
51 for name, url in primer_sets.items(): | 68 for name, url in primer_sets.items(): |
52 if name not in primers: | 69 if name not in primers: |
60 response.status_code, | 77 response.status_code, |
61 file=sys.stderr, | 78 file=sys.stderr, |
62 ) | 79 ) |
63 exit(response.status_code) | 80 exit(response.status_code) |
64 bed_output_filename = os.path.join(output_directory, name + ".bed") | 81 bed_output_filename = os.path.join(output_directory, name + ".bed") |
65 write_artic_style_bed(StringIO(response.text), bed_output_filename) | 82 convert_and_write_bed(StringIO(response.text), bed_output_filename, name) |
66 description = name[:-2] + " " + name[-2:] + " primer set" | 83 if 'ARTIC' in name: |
84 # split the vX from the rest of the name in ARTIC primer set description | |
85 description = name[:-2] + " " + name[-2:] + " primer set" | |
86 else: | |
87 description = name + " primer set" | |
67 data.append(dict(value=name, path=bed_output_filename, description=description)) | 88 data.append(dict(value=name, path=bed_output_filename, description=description)) |
68 return data | 89 return data |
69 | 90 |
70 | 91 |
71 def install_primer_file( | 92 def install_primer_file( |
72 output_directory, input_filename, primer_name, primer_description | 93 output_directory, input_filename, scheme_name, primer_description |
73 ): | 94 ): |
74 name = re.sub(r"\W", "", str(primer_name).replace(" ", "_")) | 95 name = re.sub(r"[^\w-]", "", str(scheme_name).replace(" ", "_")) |
75 output_filename = os.path.join(output_directory, name + ".bed") | 96 output_filename = os.path.join(output_directory, name + ".bed") |
76 with open(input_filename) as input_file: | 97 with open(input_filename) as input_file: |
77 write_artic_style_bed(input_file, output_filename) | 98 convert_and_write_bed(input_file, output_filename, scheme_name) |
78 data = [dict(value=name, description=primer_description, path=output_filename)] | 99 data = [dict(value=name, description=primer_description, path=output_filename)] |
79 return data | 100 return data |
80 | 101 |
81 | 102 |
82 class SplitArgs(argparse.Action): | 103 class SplitArgs(argparse.Action): |
84 setattr(namespace, self.dest, values.split(",")) | 105 setattr(namespace, self.dest, values.split(",")) |
85 | 106 |
86 | 107 |
87 if __name__ == "__main__": | 108 if __name__ == "__main__": |
88 parser = argparse.ArgumentParser( | 109 parser = argparse.ArgumentParser( |
89 description="Fetch ARTIC SARS-CoV-2 primer files for Galaxy/IRIDA use" | 110 description="Fetch ARTIC, VarSkip and Midnight SARS-CoV-2 primer files for Galaxy/IRIDA use" |
90 ) | 111 ) |
91 parser.add_argument( | 112 parser.add_argument( |
92 "--output_directory", default="tmp", help="Directory to write output to" | 113 "--output_directory", default="tmp", help="Directory to write output to" |
93 ) | 114 ) |
94 primer_file = parser.add_argument_group() | 115 primer_file = parser.add_argument_group() |
144 if not os.path.isdir(output_directory): | 165 if not os.path.isdir(output_directory): |
145 os.makedirs(output_directory) | 166 os.makedirs(output_directory) |
146 | 167 |
147 data_manager_dict = {} | 168 data_manager_dict = {} |
148 data_manager_dict["data_tables"] = config.get("data_tables", {}) | 169 data_manager_dict["data_tables"] = config.get("data_tables", {}) |
149 data_manager_dict["data_tables"][DATA_TABLE_NAME] = data_manager_dict[ | 170 data_manager_dict["data_tables"][DATA_TABLE_NAME] = [] |
150 "data_tables" | |
151 ].get(DATA_TABLE_NAME, []) | |
152 | 171 |
153 if args.artic_primers: | 172 if args.artic_primers: |
154 data = fetch_artic_primers(output_directory, args.artic_primers) | 173 data = fetch_primers(output_directory, args.artic_primers) |
155 else: | 174 else: |
156 data = install_primer_file( | 175 data = install_primer_file( |
157 output_directory, | 176 output_directory, |
158 args.primer_file, | 177 args.primer_file, |
159 args.primer_name, | 178 args.primer_name, |