comparison config_lookup.py @ 22:ba52692d7a95 draft

Uploaded
author computationaltranscriptomics
date Thu, 26 Mar 2020 08:57:29 -0400
parents 4ac32c671a40
children
comparison
equal deleted inserted replaced
21:875cdb72f965 22:ba52692d7a95
22 parser = argparse.ArgumentParser(description='incorporate the accession lists in GLASSgo/Galaxy to enable clade-specific searches') 22 parser = argparse.ArgumentParser(description='incorporate the accession lists in GLASSgo/Galaxy to enable clade-specific searches')
23 parser.add_argument('--galaxy', required=True, help='(absolute) path to the root directory of the Galaxy instance') 23 parser.add_argument('--galaxy', required=True, help='(absolute) path to the root directory of the Galaxy instance')
24 parser.add_argument('--acclinks', help='(absolute) path to file containing URLs to the accession lists') 24 parser.add_argument('--acclinks', help='(absolute) path to file containing URLs to the accession lists')
25 parser.add_argument('--acclists', help='(absolute) path to directory to save the accession lists to') 25 parser.add_argument('--acclists', help='(absolute) path to directory to save the accession lists to')
26 args = parser.parse_args() 26 args = parser.parse_args()
27
28 # load taxonomic rank and
29 rank = {}
30 rank["Alphaproteobacteria"] = {"tax": 28211, "rank": "class"}
31 rank["Aquificae"] = {"tax": 200783, "rank": "phylum"}
32 rank["Archaea"] = {"tax": 2157, "rank": "superkingdom"}
33 rank["Armatimonadetes"] = {"tax": 67819, "rank": "phylum"}
34 rank["Bacteria"] = {"tax": 2, "rank": "superkingdom"}
35 rank["Bacteroidetes"] = {"tax": 976, "rank": "phylum"}
36 rank["Caldiserica"] = {"tax": 67814, "rank": "phylum"}
37 rank["Chlamydiae"] = {"tax": 204428, "rank": "phylum"}
38 rank["Chloroflexi"] = {"tax": 200795, "rank": "phylum"}
39 rank["Chrysiogenetes"] = {"tax": 200938, "rank": "phylum"}
40 rank["Cyanobacteria"] = {"tax": 1117, "rank":"phylum"}
41 rank["Deferribacteres"] = {"tax": 200930, "rank": "phylum"}
42 rank["Deinococcus-thermus"] = {"tax": 1297, "rank": "phylum"}
43 rank["Dictyoglomi"] = {"tax": 68297, "rank": "phylum"}
44 rank["Elusimicrobia"] = {"tax": 74152, "rank": "phylum"}
45 rank["Fibrobacteres"] = {"tax": 65842, "rank": "phylum"}
46 rank["Firmicutes"] = {"tax": 1239, "rank": "phylum"}
47 rank["Fusobacteria"] = {"tax": 32066, "rank": "phylum"}
48 rank["Gemmatimonadetes"] = {"tax": 142182, "rank": "phylum"}
49 rank["Nitrospinae"] = {"tax": 1293497, "rank": "phylum"}
50 rank["Nitrospirae"] = {"tax": 40117, "rank": "phylum"}
51 rank["Planctomycetes"] = {"tax": 203682, "rank": "phylum"}
52 rank["Proteobacteria"] = {"tax": 1224, "rank": "phylum"}
53 rank["Spirochaetes"] = {"tax": 203691, "rank": "phylum"}
54 rank["Synergistetes"] = {"tax": 508458, "rank": "phylum"}
55 rank["Tenericutes"] = {"tax": 544448, "rank": "phylum"}
56 rank["Thermodesulfobacteria"] = {"tax": 200940, "rank": "phylum"}
57 rank["Thermotogae"] = {"tax": 200918, "rank": "phylum"}
58 rank["Viruses"] = {"tax": 10239, "rank": "phylum"}
27 59
28 60
29 # ./accession_lists_links.txt as default 61 # ./accession_lists_links.txt as default
30 if args.acclinks == None: 62 if args.acclinks == None:
31 args.acclinks = os.path.join(os.getcwd(), 'accession_lists_links.txt') 63 args.acclinks = os.path.join(os.getcwd(), 'accession_lists_links.txt')
58 accDataTable.write('global\tglobal\n') 90 accDataTable.write('global\tglobal\n')
59 # fetch accession lists 91 # fetch accession lists
60 for url in link: 92 for url in link:
61 acc = requests.get(url) 93 acc = requests.get(url)
62 filename = str(os.path.basename(url)).replace('\n','') 94 filename = str(os.path.basename(url)).replace('\n','')
95 filenameStem = str(os.path.splitext(filename)[0])
96
97 #filename = str(os.path.basename(url)).replace('\n','')
63 print('### fetch: ' + filename) 98 print('### fetch: ' + filename)
64 open(os.path.join(args.acclists,filename),'wb').write(acc.content) 99 open(os.path.join(args.acclists,filename),'wb').write(acc.content)
65 100
66 # 101 #
67 accDataTable.write(filename + '\t') 102 if filenameStem in rank:
103 taxid = str(rank[filenameStem]["tax"])
104 rankname = rank[filenameStem]["rank"]
105 filenameStem = filenameStem + ' (tax:' + taxid + ', rank:' + rankname + ')'
106
107 accDataTable.write(filenameStem + '\t')
68 accDataTable.write(os.path.join(args.acclists,filename) + '\n') 108 accDataTable.write(os.path.join(args.acclists,filename) + '\n')
69 109
70 accDataTable.close() 110 accDataTable.close()
71 print('### create tab-separated list '+ accDataTableFile) 111 print('### create tab-separated list '+ accDataTableFile)
72 112