# HG changeset patch
# User estrain
# Date 1653392779 0
# Node ID 5ba68abd41f6b5e6290305f8cc79157988ea92d6
Uploaded
diff -r 000000000000 -r 5ba68abd41f6 data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# Errol Strain, estrain@gmail.com
+# Database downloads for NCBI AMRFinderPlus
+
+import sys
+import os
+import tempfile
+import shutil
+import json
+import re
+import argparse
+from ftplib import FTP
+
+
+def download_from_ncbi(output_directory):
+ NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov'
+ FILENAME = 'version.txt'
+ NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/'
+
+ email = 'anonymous@example.com'
+
+ cwd = os.getcwd()
+ os.chdir(output_directory)
+
+ ftp = FTP( NCBI_FTP_SERVER )
+ ftp.login( 'anonymous', email)
+ ftp.cwd(NCBI_DOWNLOAD_PATH)
+
+ #exclude the allele counts folder
+ files = ftp.nlst()
+ files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
+
+ for f in files:
+ ftp.retrbinary("RETR " + f, open(f, 'wb').write)
+
+ files = ftp.nlst()
+ files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
+ pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files)
+ pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts)
+
+
+ # Make blast databases
+ blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null"
+ os.system(blastcmd)
+ blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null"
+ os.system(blastcmd)
+
+ for f in pointmuts:
+ blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null"
+ os.system(blastcmd)
+
+ # Make HMM indexes
+ hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null"
+ os.system(hmmcmd)
+
+ # Read in version
+ with open("version.txt") as f:
+ version = f.readline().rstrip()
+
+ ftp.quit()
+
+ os.chdir(cwd)
+
+ return version
+
+def print_json (version,argspath,argsname,argsout):
+
+ data_table_entry = {
+ 'data_tables' : {
+ 'amrfinderplus': [
+ {
+ "value":version,
+ "name":argsname,
+ "path":argspath,
+ }
+ ]
+ }
+ }
+
+ with open(argsout, 'w') as fh:
+ json.dump(data_table_entry, fh, indent=2, sort_keys=True)
+
+def main():
+
+ parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases')
+ parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name')
+ parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
+
+ args = parser.parse_args()
+
+ with open(args.out[0]) as fh:
+ params = json.load(fh)
+
+ output_directory = params['output_data'][0]['extra_files_path']
+ os.mkdir(output_directory)
+ data_manager_dict = {}
+
+ #Fetch the files and build blast databases
+ version=download_from_ncbi(output_directory)
+
+ tablename = "AMRFinderPlus Database " + version
+
+ #shutil.copytree("amrdb",args.path[0])
+ print_json(version,output_directory,tablename,args.out[0])
+
+if __name__ == "__main__": main()
diff -r 000000000000 -r 5ba68abd41f6 data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,24 @@
+
+
+ blast
+ hmmer
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/ for latest database version number.
+
+
diff -r 000000000000 -r 5ba68abd41f6 data_manager_amrfinderplus/data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/data_manager_conf.xml Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 5ba68abd41f6 data_manager_amrfinderplus/test-data/amrfinder_databases.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/test-data/amrfinder_databases.loc Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of amrfinder databases
+#
+# the columns are:
+# value name path
+#
+# for example
+# 2021-04-14 May22DB /tmp/tool-data/amrfinderdatabases/May22DB
diff -r 000000000000 -r 5ba68abd41f6 data_manager_amrfinderplus/tool-data/amrfinder_databases.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/tool-data/amrfinder_databases.loc Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of amrfinder databases
+#
+# the columns are:
+# value name path
+#
+# for example
+# 2021-04-14 May22DB /tmp/tool-data/amrfinderdatabases/May22DB
diff -r 000000000000 -r 5ba68abd41f6 data_manager_amrfinderplus/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/tool_data_table_conf.xml.sample Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,6 @@
+
+
+
diff -r 000000000000 -r 5ba68abd41f6 data_manager_amrfinderplus/tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/tool_data_table_conf.xml.test Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,6 @@
+
+
+