changeset 22:ba52692d7a95 draft

Uploaded
author computationaltranscriptomics
date Thu, 26 Mar 2020 08:57:29 -0400
parents 875cdb72f965
children 6af9965ab41c
files config_lookup.py
diffstat 1 files changed, 41 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/config_lookup.py	Thu Mar 26 08:57:02 2020 -0400
+++ b/config_lookup.py	Thu Mar 26 08:57:29 2020 -0400
@@ -25,6 +25,38 @@
 	parser.add_argument('--acclists', help='(absolute) path to directory to save the accession lists to')
 	args = parser.parse_args()
 
+	# load taxonomic rank and 
+	rank = {}
+	rank["Alphaproteobacteria"] = {"tax": 28211, "rank": "class"}
+	rank["Aquificae"] = {"tax": 200783, "rank": "phylum"}
+	rank["Archaea"] = {"tax": 2157, "rank": "superkingdom"}
+	rank["Armatimonadetes"] = {"tax": 67819, "rank": "phylum"}
+	rank["Bacteria"] = {"tax": 2, "rank": "superkingdom"}
+	rank["Bacteroidetes"] = {"tax": 976, "rank": "phylum"}
+	rank["Caldiserica"] = {"tax": 67814, "rank": "phylum"}
+	rank["Chlamydiae"] = {"tax": 204428, "rank": "phylum"}
+	rank["Chloroflexi"] = {"tax": 200795, "rank": "phylum"}
+	rank["Chrysiogenetes"] = {"tax": 200938, "rank": "phylum"}
+	rank["Cyanobacteria"] = {"tax": 1117, "rank":"phylum"}
+	rank["Deferribacteres"] = {"tax": 200930, "rank": "phylum"}
+	rank["Deinococcus-thermus"] = {"tax": 1297, "rank": "phylum"}
+	rank["Dictyoglomi"] = {"tax": 68297, "rank": "phylum"}
+	rank["Elusimicrobia"] = {"tax": 74152, "rank": "phylum"}
+	rank["Fibrobacteres"] = {"tax": 65842, "rank": "phylum"}
+	rank["Firmicutes"] = {"tax": 1239, "rank": "phylum"}
+	rank["Fusobacteria"] = {"tax": 32066, "rank": "phylum"}
+	rank["Gemmatimonadetes"] = {"tax": 142182, "rank": "phylum"}
+	rank["Nitrospinae"] = {"tax": 1293497, "rank": "phylum"}
+	rank["Nitrospirae"] = {"tax": 40117, "rank": "phylum"}
+	rank["Planctomycetes"] = {"tax": 203682, "rank": "phylum"}
+	rank["Proteobacteria"] = {"tax": 1224, "rank": "phylum"}
+	rank["Spirochaetes"] = {"tax": 203691, "rank": "phylum"}
+	rank["Synergistetes"] = {"tax": 508458, "rank": "phylum"}
+	rank["Tenericutes"] = {"tax": 544448, "rank": "phylum"}
+	rank["Thermodesulfobacteria"] = {"tax": 200940, "rank": "phylum"}
+	rank["Thermotogae"] = {"tax": 200918, "rank": "phylum"}
+	rank["Viruses"] = {"tax": 10239, "rank": "phylum"}
+
 
 	# ./accession_lists_links.txt as default
 	if args.acclinks == None:
@@ -60,11 +92,19 @@
 		for url in link:
 			acc = requests.get(url)
 			filename = str(os.path.basename(url)).replace('\n','')
+			filenameStem = str(os.path.splitext(filename)[0])
+
+			#filename = str(os.path.basename(url)).replace('\n','')
 			print('### fetch: ' + filename)
 			open(os.path.join(args.acclists,filename),'wb').write(acc.content)
 			
 			# 
-			accDataTable.write(filename + '\t')
+			if filenameStem in rank:
+				taxid = str(rank[filenameStem]["tax"])
+				rankname = rank[filenameStem]["rank"]
+				filenameStem = filenameStem + ' (tax:' + taxid + ', rank:' + rankname + ')'
+				
+			accDataTable.write(filenameStem + '\t')	
 			accDataTable.write(os.path.join(args.acclists,filename) + '\n')
 
 		accDataTable.close()