comparison data_manager/install_primer_scheme_bedfiles.py @ 6:dd451e45681c draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_primer_scheme_bedfiles commit 4880dcfcdddd9ed8415ccde01b2f8e2c28dab5c3"
author iuc
date Tue, 16 Nov 2021 08:22:08 +0000
parents 157a5c9ea72a
children
comparison
equal deleted inserted replaced
5:be70da9dc013 6:dd451e45681c
16 import requests 16 import requests
17 17
18 DATA_TABLE_NAME = "primer_scheme_bedfiles" 18 DATA_TABLE_NAME = "primer_scheme_bedfiles"
19 19
20 20
21 def write_artic_style_bed(input_file, bed_output_filename): 21 def convert_and_write_bed(input_file, bed_output_filename, scheme_name, force_string=True):
22 with open(bed_output_filename, "w") as bed_output_file: 22 with open(bed_output_filename, "w") as bed_output_file:
23 for line in input_file: 23 for line in input_file:
24 fields = line.split("\t") 24 fields = line.strip().split("\t")
25 if len(fields) < 6: 25 if "Midnight" in scheme_name:
26 # too short to encode the strand format 26 # Midnight primers are distributed in a tabular file, not a BED file
27 exit("invalid format in BED file: {}".format(line.rstrip())) 27 if line.startswith("Primer Name"):
28 continue
29 if len(fields) != 8:
30 exit("Unexpected format in Midnight primer file: {}".format(line.rstrip()))
31 (primer_name, _, pool, _, _, _, start, end) = fields
32 strand = '+' if primer_name.endswith('LEFT') else '-'
33 if strand == '-':
34 start, end = end, start
35 fields = ["MN908947.3", start, end, primer_name, pool, strand]
36 else:
37 if len(fields) < 5:
38 # too short to encode the "ARTIC style BED" format
39 exit("invalid format in BED file: {}".format(line.rstrip()))
40 # 'BED' format used by ARTIC pipeline uses
41 # chrom start end primer_name pool_name
42 # see this: https://github.com/artic-network/fieldbioinformatics/blob/master/artic/vcftagprimersites.py#L76
43 # for ARTIC minion and
44 # this: https://github.com/andersen-lab/ivar/blob/master/src/primer_bed.cpp#L125
45 # for ivar trim (ivar trim treats the file as BED following the standard but also allows the ARTIC format)
28 try: 46 try:
29 # try and parse field 5 as a number 47 float(fields[4])
30 score = float(fields[4])
31 except ValueError: 48 except ValueError:
32 # Alright, this is an ARTIC-style bed, 49 # this is a string, we can leave it as is
33 # which is actually against the specs, but required by the
34 # ARTIC pipeline.
35 pass 50 pass
36 else: 51 else:
37 # This is a regular bed with numbers in the score column. 52 # ensure that it is forced to be a string
38 # We need to "fix" it for the ARTIC pipeline. 53 fields[4] = '_{0}'.format(fields[4])
39 fields[4] = '_{0}'.format(score) 54 print('\t'.join(fields), file=bed_output_file)
40 bed_output_file.write("\t".join(fields))
41 55
42 56
43 def fetch_artic_primers(output_directory, primers): 57 def fetch_primers(output_directory, primers):
44 primer_sets = { 58 primer_sets = {
45 "SARS-CoV-2-ARTICv1": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed", 59 "SARS-CoV-2-ARTICv1": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed",
46 "SARS-CoV-2-ARTICv2": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed", 60 "SARS-CoV-2-ARTICv2": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed",
47 "SARS-CoV-2-ARTICv3": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed", 61 "SARS-CoV-2-ARTICv3": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed",
62 "SARS-CoV-2-ARTICv4": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V4/SARS-CoV-2.scheme.bed",
63 "VarSkip-V1a": "https://raw.githubusercontent.com/nebiolabs/VarSkip/main/schemes/NEB_VarSkip/V1a/NEB_VarSkip.scheme.bed",
64 "Midnight-v1": "https://zenodo.org/record/3897530/files/SARS-CoV-2_primer_sets_RBK004_nanopore_sequencing.tab?download=1"
48 } 65 }
49 66
50 data = [] 67 data = []
51 for name, url in primer_sets.items(): 68 for name, url in primer_sets.items():
52 if name not in primers: 69 if name not in primers:
60 response.status_code, 77 response.status_code,
61 file=sys.stderr, 78 file=sys.stderr,
62 ) 79 )
63 exit(response.status_code) 80 exit(response.status_code)
64 bed_output_filename = os.path.join(output_directory, name + ".bed") 81 bed_output_filename = os.path.join(output_directory, name + ".bed")
65 write_artic_style_bed(StringIO(response.text), bed_output_filename) 82 convert_and_write_bed(StringIO(response.text), bed_output_filename, name)
66 description = name[:-2] + " " + name[-2:] + " primer set" 83 if 'ARTIC' in name:
84 # split the vX from the rest of the name in ARTIC primer set description
85 description = name[:-2] + " " + name[-2:] + " primer set"
86 else:
87 description = name + " primer set"
67 data.append(dict(value=name, path=bed_output_filename, description=description)) 88 data.append(dict(value=name, path=bed_output_filename, description=description))
68 return data 89 return data
69 90
70 91
71 def install_primer_file( 92 def install_primer_file(
72 output_directory, input_filename, primer_name, primer_description 93 output_directory, input_filename, scheme_name, primer_description
73 ): 94 ):
74 name = re.sub(r"\W", "", str(primer_name).replace(" ", "_")) 95 name = re.sub(r"[^\w-]", "", str(scheme_name).replace(" ", "_"))
75 output_filename = os.path.join(output_directory, name + ".bed") 96 output_filename = os.path.join(output_directory, name + ".bed")
76 with open(input_filename) as input_file: 97 with open(input_filename) as input_file:
77 write_artic_style_bed(input_file, output_filename) 98 convert_and_write_bed(input_file, output_filename, scheme_name)
78 data = [dict(value=name, description=primer_description, path=output_filename)] 99 data = [dict(value=name, description=primer_description, path=output_filename)]
79 return data 100 return data
80 101
81 102
82 class SplitArgs(argparse.Action): 103 class SplitArgs(argparse.Action):
84 setattr(namespace, self.dest, values.split(",")) 105 setattr(namespace, self.dest, values.split(","))
85 106
86 107
87 if __name__ == "__main__": 108 if __name__ == "__main__":
88 parser = argparse.ArgumentParser( 109 parser = argparse.ArgumentParser(
89 description="Fetch ARTIC SARS-CoV-2 primer files for Galaxy/IRIDA use" 110 description="Fetch ARTIC, VarSkip and Midnight SARS-CoV-2 primer files for Galaxy/IRIDA use"
90 ) 111 )
91 parser.add_argument( 112 parser.add_argument(
92 "--output_directory", default="tmp", help="Directory to write output to" 113 "--output_directory", default="tmp", help="Directory to write output to"
93 ) 114 )
94 primer_file = parser.add_argument_group() 115 primer_file = parser.add_argument_group()
144 if not os.path.isdir(output_directory): 165 if not os.path.isdir(output_directory):
145 os.makedirs(output_directory) 166 os.makedirs(output_directory)
146 167
147 data_manager_dict = {} 168 data_manager_dict = {}
148 data_manager_dict["data_tables"] = config.get("data_tables", {}) 169 data_manager_dict["data_tables"] = config.get("data_tables", {})
149 data_manager_dict["data_tables"][DATA_TABLE_NAME] = data_manager_dict[ 170 data_manager_dict["data_tables"][DATA_TABLE_NAME] = []
150 "data_tables"
151 ].get(DATA_TABLE_NAME, [])
152 171
153 if args.artic_primers: 172 if args.artic_primers:
154 data = fetch_artic_primers(output_directory, args.artic_primers) 173 data = fetch_primers(output_directory, args.artic_primers)
155 else: 174 else:
156 data = install_primer_file( 175 data = install_primer_file(
157 output_directory, 176 output_directory,
158 args.primer_file, 177 args.primer_file,
159 args.primer_name, 178 args.primer_name,