# HG changeset patch # User computationaltranscriptomics # Date 1585227449 14400 # Node ID ba52692d7a953ee71728aa9c46a2a9cfdeda59b3 # Parent 875cdb72f965215242d5d47fe513b419196b8c86 Uploaded diff -r 875cdb72f965 -r ba52692d7a95 config_lookup.py --- a/config_lookup.py Thu Mar 26 08:57:02 2020 -0400 +++ b/config_lookup.py Thu Mar 26 08:57:29 2020 -0400 @@ -25,6 +25,38 @@ parser.add_argument('--acclists', help='(absolute) path to directory to save the accession lists to') args = parser.parse_args() + # load taxonomic rank and + rank = {} + rank["Alphaproteobacteria"] = {"tax": 28211, "rank": "class"} + rank["Aquificae"] = {"tax": 200783, "rank": "phylum"} + rank["Archaea"] = {"tax": 2157, "rank": "superkingdom"} + rank["Armatimonadetes"] = {"tax": 67819, "rank": "phylum"} + rank["Bacteria"] = {"tax": 2, "rank": "superkingdom"} + rank["Bacteroidetes"] = {"tax": 976, "rank": "phylum"} + rank["Caldiserica"] = {"tax": 67814, "rank": "phylum"} + rank["Chlamydiae"] = {"tax": 204428, "rank": "phylum"} + rank["Chloroflexi"] = {"tax": 200795, "rank": "phylum"} + rank["Chrysiogenetes"] = {"tax": 200938, "rank": "phylum"} + rank["Cyanobacteria"] = {"tax": 1117, "rank":"phylum"} + rank["Deferribacteres"] = {"tax": 200930, "rank": "phylum"} + rank["Deinococcus-thermus"] = {"tax": 1297, "rank": "phylum"} + rank["Dictyoglomi"] = {"tax": 68297, "rank": "phylum"} + rank["Elusimicrobia"] = {"tax": 74152, "rank": "phylum"} + rank["Fibrobacteres"] = {"tax": 65842, "rank": "phylum"} + rank["Firmicutes"] = {"tax": 1239, "rank": "phylum"} + rank["Fusobacteria"] = {"tax": 32066, "rank": "phylum"} + rank["Gemmatimonadetes"] = {"tax": 142182, "rank": "phylum"} + rank["Nitrospinae"] = {"tax": 1293497, "rank": "phylum"} + rank["Nitrospirae"] = {"tax": 40117, "rank": "phylum"} + rank["Planctomycetes"] = {"tax": 203682, "rank": "phylum"} + rank["Proteobacteria"] = {"tax": 1224, "rank": "phylum"} + rank["Spirochaetes"] = {"tax": 203691, "rank": "phylum"} + rank["Synergistetes"] = {"tax": 508458, "rank": "phylum"} + rank["Tenericutes"] = {"tax": 544448, "rank": "phylum"} + rank["Thermodesulfobacteria"] = {"tax": 200940, "rank": "phylum"} + rank["Thermotogae"] = {"tax": 200918, "rank": "phylum"} + rank["Viruses"] = {"tax": 10239, "rank": "phylum"} + # ./accession_lists_links.txt as default if args.acclinks == None: @@ -60,11 +92,19 @@ for url in link: acc = requests.get(url) filename = str(os.path.basename(url)).replace('\n','') + filenameStem = str(os.path.splitext(filename)[0]) + + #filename = str(os.path.basename(url)).replace('\n','') print('### fetch: ' + filename) open(os.path.join(args.acclists,filename),'wb').write(acc.content) # - accDataTable.write(filename + '\t') + if filenameStem in rank: + taxid = str(rank[filenameStem]["tax"]) + rankname = rank[filenameStem]["rank"] + filenameStem = filenameStem + ' (tax:' + taxid + ', rank:' + rankname + ')' + + accDataTable.write(filenameStem + '\t') accDataTable.write(os.path.join(args.acclists,filename) + '\n') accDataTable.close()