Mercurial > repos > computationaltranscriptomics > glassgo
comparison config_lookup.py @ 22:ba52692d7a95 draft
Uploaded
| author | computationaltranscriptomics |
|---|---|
| date | Thu, 26 Mar 2020 08:57:29 -0400 |
| parents | 4ac32c671a40 |
| children |
comparison
equal
deleted
inserted
replaced
| 21:875cdb72f965 | 22:ba52692d7a95 |
|---|---|
| 22 parser = argparse.ArgumentParser(description='incorporate the accession lists in GLASSgo/Galaxy to enable clade-specific searches') | 22 parser = argparse.ArgumentParser(description='incorporate the accession lists in GLASSgo/Galaxy to enable clade-specific searches') |
| 23 parser.add_argument('--galaxy', required=True, help='(absolute) path to the root directory of the Galaxy instance') | 23 parser.add_argument('--galaxy', required=True, help='(absolute) path to the root directory of the Galaxy instance') |
| 24 parser.add_argument('--acclinks', help='(absolute) path to file containing URLs to the accession lists') | 24 parser.add_argument('--acclinks', help='(absolute) path to file containing URLs to the accession lists') |
| 25 parser.add_argument('--acclists', help='(absolute) path to directory to save the accession lists to') | 25 parser.add_argument('--acclists', help='(absolute) path to directory to save the accession lists to') |
| 26 args = parser.parse_args() | 26 args = parser.parse_args() |
| 27 | |
| 28 # load taxonomic rank and | |
| 29 rank = {} | |
| 30 rank["Alphaproteobacteria"] = {"tax": 28211, "rank": "class"} | |
| 31 rank["Aquificae"] = {"tax": 200783, "rank": "phylum"} | |
| 32 rank["Archaea"] = {"tax": 2157, "rank": "superkingdom"} | |
| 33 rank["Armatimonadetes"] = {"tax": 67819, "rank": "phylum"} | |
| 34 rank["Bacteria"] = {"tax": 2, "rank": "superkingdom"} | |
| 35 rank["Bacteroidetes"] = {"tax": 976, "rank": "phylum"} | |
| 36 rank["Caldiserica"] = {"tax": 67814, "rank": "phylum"} | |
| 37 rank["Chlamydiae"] = {"tax": 204428, "rank": "phylum"} | |
| 38 rank["Chloroflexi"] = {"tax": 200795, "rank": "phylum"} | |
| 39 rank["Chrysiogenetes"] = {"tax": 200938, "rank": "phylum"} | |
| 40 rank["Cyanobacteria"] = {"tax": 1117, "rank":"phylum"} | |
| 41 rank["Deferribacteres"] = {"tax": 200930, "rank": "phylum"} | |
| 42 rank["Deinococcus-thermus"] = {"tax": 1297, "rank": "phylum"} | |
| 43 rank["Dictyoglomi"] = {"tax": 68297, "rank": "phylum"} | |
| 44 rank["Elusimicrobia"] = {"tax": 74152, "rank": "phylum"} | |
| 45 rank["Fibrobacteres"] = {"tax": 65842, "rank": "phylum"} | |
| 46 rank["Firmicutes"] = {"tax": 1239, "rank": "phylum"} | |
| 47 rank["Fusobacteria"] = {"tax": 32066, "rank": "phylum"} | |
| 48 rank["Gemmatimonadetes"] = {"tax": 142182, "rank": "phylum"} | |
| 49 rank["Nitrospinae"] = {"tax": 1293497, "rank": "phylum"} | |
| 50 rank["Nitrospirae"] = {"tax": 40117, "rank": "phylum"} | |
| 51 rank["Planctomycetes"] = {"tax": 203682, "rank": "phylum"} | |
| 52 rank["Proteobacteria"] = {"tax": 1224, "rank": "phylum"} | |
| 53 rank["Spirochaetes"] = {"tax": 203691, "rank": "phylum"} | |
| 54 rank["Synergistetes"] = {"tax": 508458, "rank": "phylum"} | |
| 55 rank["Tenericutes"] = {"tax": 544448, "rank": "phylum"} | |
| 56 rank["Thermodesulfobacteria"] = {"tax": 200940, "rank": "phylum"} | |
| 57 rank["Thermotogae"] = {"tax": 200918, "rank": "phylum"} | |
| 58 rank["Viruses"] = {"tax": 10239, "rank": "phylum"} | |
| 27 | 59 |
| 28 | 60 |
| 29 # ./accession_lists_links.txt as default | 61 # ./accession_lists_links.txt as default |
| 30 if args.acclinks == None: | 62 if args.acclinks == None: |
| 31 args.acclinks = os.path.join(os.getcwd(), 'accession_lists_links.txt') | 63 args.acclinks = os.path.join(os.getcwd(), 'accession_lists_links.txt') |
| 58 accDataTable.write('global\tglobal\n') | 90 accDataTable.write('global\tglobal\n') |
| 59 # fetch accession lists | 91 # fetch accession lists |
| 60 for url in link: | 92 for url in link: |
| 61 acc = requests.get(url) | 93 acc = requests.get(url) |
| 62 filename = str(os.path.basename(url)).replace('\n','') | 94 filename = str(os.path.basename(url)).replace('\n','') |
| 95 filenameStem = str(os.path.splitext(filename)[0]) | |
| 96 | |
| 97 #filename = str(os.path.basename(url)).replace('\n','') | |
| 63 print('### fetch: ' + filename) | 98 print('### fetch: ' + filename) |
| 64 open(os.path.join(args.acclists,filename),'wb').write(acc.content) | 99 open(os.path.join(args.acclists,filename),'wb').write(acc.content) |
| 65 | 100 |
| 66 # | 101 # |
| 67 accDataTable.write(filename + '\t') | 102 if filenameStem in rank: |
| 103 taxid = str(rank[filenameStem]["tax"]) | |
| 104 rankname = rank[filenameStem]["rank"] | |
| 105 filenameStem = filenameStem + ' (tax:' + taxid + ', rank:' + rankname + ')' | |
| 106 | |
| 107 accDataTable.write(filenameStem + '\t') | |
| 68 accDataTable.write(os.path.join(args.acclists,filename) + '\n') | 108 accDataTable.write(os.path.join(args.acclists,filename) + '\n') |
| 69 | 109 |
| 70 accDataTable.close() | 110 accDataTable.close() |
| 71 print('### create tab-separated list '+ accDataTableFile) | 111 print('### create tab-separated list '+ accDataTableFile) |
| 72 | 112 |
