data_manager_primer_scheme_bedfiles: data_manager/install_primer_scheme

comparison data_manager/install_primer_scheme_bedfiles.py @ 6:dd451e45681c draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_primer_scheme_bedfiles commit 4880dcfcdddd9ed8415ccde01b2f8e2c28dab5c3"

author	iuc
date	Tue, 16 Nov 2021 08:22:08 +0000
parents	157a5c9ea72a
children

comparison

equal deleted inserted replaced

-:be70da9dc013
+:dd451e45681c
 import requests
 DATA_TABLE_NAME = "primer_scheme_bedfiles"
-def write_artic_style_bed(input_file, bed_output_filename):
+def convert_and_write_bed(input_file, bed_output_filename, scheme_name, force_string=True):
 with open(bed_output_filename, "w") as bed_output_file:
 for line in input_file:
-fields = line.split("\t")
+fields = line.strip().split("\t")
-if len(fields) < 6:
+if "Midnight" in scheme_name:
-# too short to encode the strand format
+# Midnight primers are distributed in a tabular file, not a BED file
-exit("invalid format in BED file: {}".format(line.rstrip()))
+if line.startswith("Primer Name"):
+continue
+if len(fields) != 8:
+exit("Unexpected format in Midnight primer file: {}".format(line.rstrip()))
+(primer_name, _, pool, _, _, _, start, end) = fields
+strand = '+' if primer_name.endswith('LEFT') else '-'
+if strand == '-':
+start, end = end, start
+fields = ["MN908947.3", start, end, primer_name, pool, strand]
+else:
+if len(fields) < 5:
+# too short to encode the "ARTIC style BED" format
+exit("invalid format in BED file: {}".format(line.rstrip()))
+# 'BED' format used by ARTIC pipeline uses
+# chrom  start  end  primer_name  pool_name
+# see this: https://github.com/artic-network/fieldbioinformatics/blob/master/artic/vcftagprimersites.py#L76
+# for ARTIC minion and
+# this: https://github.com/andersen-lab/ivar/blob/master/src/primer_bed.cpp#L125
+# for ivar trim (ivar trim treats the file as BED following the standard but also allows the ARTIC format)
 try:
-# try and parse field 5 as a number
+float(fields[4])
-score = float(fields[4])
 except ValueError:
-# Alright, this is an ARTIC-style bed,
+# this is a string, we can leave it as is
-# which is actually against the specs, but required by the
-# ARTIC pipeline.
 pass
 else:
-# This is a regular bed with numbers in the score column.
+# ensure that it is forced to be a string
-# We need to "fix" it for the ARTIC pipeline.
+fields[4] = '_{0}'.format(fields[4])
-fields[4] = '_{0}'.format(score)
+print('\t'.join(fields), file=bed_output_file)
-bed_output_file.write("\t".join(fields))
-def fetch_artic_primers(output_directory, primers):
+def fetch_primers(output_directory, primers):
 primer_sets = {
 "SARS-CoV-2-ARTICv1": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed",
 "SARS-CoV-2-ARTICv2": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed",
 "SARS-CoV-2-ARTICv3": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed",
+"SARS-CoV-2-ARTICv4": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V4/SARS-CoV-2.scheme.bed",
+"VarSkip-V1a": "https://raw.githubusercontent.com/nebiolabs/VarSkip/main/schemes/NEB_VarSkip/V1a/NEB_VarSkip.scheme.bed",
+"Midnight-v1": "https://zenodo.org/record/3897530/files/SARS-CoV-2_primer_sets_RBK004_nanopore_sequencing.tab?download=1"
 }
 data = []
 for name, url in primer_sets.items():
 if name not in primers:
 response.status_code,
 file=sys.stderr,
 )
 exit(response.status_code)
 bed_output_filename = os.path.join(output_directory, name + ".bed")
-write_artic_style_bed(StringIO(response.text), bed_output_filename)
+convert_and_write_bed(StringIO(response.text), bed_output_filename, name)
-description = name[:-2] + " " + name[-2:] + " primer set"
+if 'ARTIC' in name:
+# split the vX from the rest of the name in ARTIC primer set description
+description = name[:-2] + " " + name[-2:] + " primer set"
+else:
+description = name + " primer set"
 data.append(dict(value=name, path=bed_output_filename, description=description))
 return data
 def install_primer_file(
-output_directory, input_filename, primer_name, primer_description
+output_directory, input_filename, scheme_name, primer_description
 ):
-name = re.sub(r"\W", "", str(primer_name).replace(" ", "_"))
+name = re.sub(r"[^\w-]", "", str(scheme_name).replace(" ", "_"))
 output_filename = os.path.join(output_directory, name + ".bed")
 with open(input_filename) as input_file:
-write_artic_style_bed(input_file, output_filename)
+convert_and_write_bed(input_file, output_filename, scheme_name)
 data = [dict(value=name, description=primer_description, path=output_filename)]
 return data
 class SplitArgs(argparse.Action):
 setattr(namespace, self.dest, values.split(","))
 if __name__ == "__main__":
 parser = argparse.ArgumentParser(
-description="Fetch ARTIC SARS-CoV-2 primer files for Galaxy/IRIDA use"
+description="Fetch ARTIC, VarSkip and Midnight SARS-CoV-2 primer files for Galaxy/IRIDA use"
 )
 parser.add_argument(
 "--output_directory", default="tmp", help="Directory to write output to"
 )
 primer_file = parser.add_argument_group()
 if not os.path.isdir(output_directory):
 os.makedirs(output_directory)
 data_manager_dict = {}
 data_manager_dict["data_tables"] = config.get("data_tables", {})
-data_manager_dict["data_tables"][DATA_TABLE_NAME] = data_manager_dict[
+data_manager_dict["data_tables"][DATA_TABLE_NAME] = []
-"data_tables"
-].get(DATA_TABLE_NAME, [])
 if args.artic_primers:
-data = fetch_artic_primers(output_directory, args.artic_primers)
+data = fetch_primers(output_directory, args.artic_primers)
 else:
 data = install_primer_file(
 output_directory,
 args.primer_file,
 args.primer_name,

Mercurial > repos > iuc > data_manager_primer_scheme_bedfiles

comparison data_manager/install_primer_scheme_bedfiles.py @ 6:dd451e45681c draft default tip