15
|
1 #!/usr/local/bin/python3
|
|
2
|
|
3 '''
|
|
4 This script downloads lookup tables and integrates these into the Galaxy instance
|
|
5
|
|
6 USAGE
|
|
7 config_lookup.py --galaxy GALAXY --acclinks ACCLINKS [--acclists ACCLISTS]
|
|
8
|
|
9 OPTIONS
|
|
10 -h, --help show this help message and exit
|
|
11
|
|
12 '''
|
|
13
|
|
14 import os
|
|
15 import argparse
|
|
16 import requests
|
|
17 import sys
|
|
18 import shutil
|
|
19
|
|
20 def main():
|
|
21 # parse arguments
|
|
22 parser = argparse.ArgumentParser(description='incorporate the accession lists in GLASSgo/Galaxy to enable clade-specific searches')
|
|
23 parser.add_argument('--galaxy', required=True, help='(absolute) path to the root directory of the Galaxy instance')
|
|
24 parser.add_argument('--acclinks', help='(absolute) path to file containing URLs to the accession lists')
|
|
25 parser.add_argument('--acclists', help='(absolute) path to directory to save the accession lists to')
|
|
26 args = parser.parse_args()
|
|
27
|
22
|
28 # load taxonomic rank and
|
|
29 rank = {}
|
|
30 rank["Alphaproteobacteria"] = {"tax": 28211, "rank": "class"}
|
|
31 rank["Aquificae"] = {"tax": 200783, "rank": "phylum"}
|
|
32 rank["Archaea"] = {"tax": 2157, "rank": "superkingdom"}
|
|
33 rank["Armatimonadetes"] = {"tax": 67819, "rank": "phylum"}
|
|
34 rank["Bacteria"] = {"tax": 2, "rank": "superkingdom"}
|
|
35 rank["Bacteroidetes"] = {"tax": 976, "rank": "phylum"}
|
|
36 rank["Caldiserica"] = {"tax": 67814, "rank": "phylum"}
|
|
37 rank["Chlamydiae"] = {"tax": 204428, "rank": "phylum"}
|
|
38 rank["Chloroflexi"] = {"tax": 200795, "rank": "phylum"}
|
|
39 rank["Chrysiogenetes"] = {"tax": 200938, "rank": "phylum"}
|
|
40 rank["Cyanobacteria"] = {"tax": 1117, "rank":"phylum"}
|
|
41 rank["Deferribacteres"] = {"tax": 200930, "rank": "phylum"}
|
|
42 rank["Deinococcus-thermus"] = {"tax": 1297, "rank": "phylum"}
|
|
43 rank["Dictyoglomi"] = {"tax": 68297, "rank": "phylum"}
|
|
44 rank["Elusimicrobia"] = {"tax": 74152, "rank": "phylum"}
|
|
45 rank["Fibrobacteres"] = {"tax": 65842, "rank": "phylum"}
|
|
46 rank["Firmicutes"] = {"tax": 1239, "rank": "phylum"}
|
|
47 rank["Fusobacteria"] = {"tax": 32066, "rank": "phylum"}
|
|
48 rank["Gemmatimonadetes"] = {"tax": 142182, "rank": "phylum"}
|
|
49 rank["Nitrospinae"] = {"tax": 1293497, "rank": "phylum"}
|
|
50 rank["Nitrospirae"] = {"tax": 40117, "rank": "phylum"}
|
|
51 rank["Planctomycetes"] = {"tax": 203682, "rank": "phylum"}
|
|
52 rank["Proteobacteria"] = {"tax": 1224, "rank": "phylum"}
|
|
53 rank["Spirochaetes"] = {"tax": 203691, "rank": "phylum"}
|
|
54 rank["Synergistetes"] = {"tax": 508458, "rank": "phylum"}
|
|
55 rank["Tenericutes"] = {"tax": 544448, "rank": "phylum"}
|
|
56 rank["Thermodesulfobacteria"] = {"tax": 200940, "rank": "phylum"}
|
|
57 rank["Thermotogae"] = {"tax": 200918, "rank": "phylum"}
|
|
58 rank["Viruses"] = {"tax": 10239, "rank": "phylum"}
|
|
59
|
15
|
60
|
|
61 # ./accession_lists_links.txt as default
|
|
62 if args.acclinks == None:
|
|
63 args.acclinks = os.path.join(os.getcwd(), 'accession_lists_links.txt')
|
|
64
|
|
65 # ./acclists as default folder for the accession lists
|
|
66 if args.acclists == None:
|
|
67 args.acclists = os.path.join(os.getcwd(),'acclists')
|
|
68
|
|
69 # check for existence of the folders for galaxy and URLs to the accession lists
|
|
70 if not os.path.exists(args.galaxy):
|
|
71 print('\tERROR: ' + args.galaxy + ' could not be found!')
|
|
72 sys.exit()
|
|
73 if not os.path.exists(args.acclinks):
|
|
74 print('\tERROR: ' + args.acclinks + ' could not be found!')
|
|
75 sys.exit()
|
|
76
|
|
77 print('################ configure the accession lists ################')
|
|
78 print('### the accession lists will be saved to ' + args.acclists)
|
|
79
|
|
80 # create folder for accession lists
|
|
81 if not os.path.exists(args.acclists):
|
|
82 os.makedirs(args.acclists)
|
|
83
|
|
84 #
|
|
85 with open(args.acclinks, 'r') as link:
|
|
86 # create list with lookup tables that populates the user interface
|
|
87 accDataTableFile = os.path.join(os.getcwd(),'tool-data/glassgo_accession_list.txt')
|
|
88
|
|
89 accDataTable = open(accDataTableFile,'w')
|
|
90 accDataTable.write('global\tglobal\n')
|
|
91 # fetch accession lists
|
|
92 for url in link:
|
|
93 acc = requests.get(url)
|
|
94 filename = str(os.path.basename(url)).replace('\n','')
|
22
|
95 filenameStem = str(os.path.splitext(filename)[0])
|
|
96
|
|
97 #filename = str(os.path.basename(url)).replace('\n','')
|
15
|
98 print('### fetch: ' + filename)
|
|
99 open(os.path.join(args.acclists,filename),'wb').write(acc.content)
|
|
100
|
|
101 #
|
22
|
102 if filenameStem in rank:
|
|
103 taxid = str(rank[filenameStem]["tax"])
|
|
104 rankname = rank[filenameStem]["rank"]
|
|
105 filenameStem = filenameStem + ' (tax:' + taxid + ', rank:' + rankname + ')'
|
|
106
|
|
107 accDataTable.write(filenameStem + '\t')
|
15
|
108 accDataTable.write(os.path.join(args.acclists,filename) + '\n')
|
|
109
|
|
110 accDataTable.close()
|
|
111 print('### create tab-separated list '+ accDataTableFile)
|
|
112
|
|
113 # move list with accession list to /galaxy/tool-data
|
|
114 shutil.copy(accDataTableFile,os.path.join(args.galaxy,'tool-data/'))
|
|
115 print('### move tab-separated list to ' + str(os.path.join(args.galaxy,'tool-data/')))
|
|
116
|
|
117 #
|
|
118 if __name__ == "__main__":
|
|
119 main()
|