annotate config_lookup.py @ 15:4ac32c671a40 draft

Uploaded
author computationaltranscriptomics
date Wed, 15 Jan 2020 13:34:02 -0500
parents
children ba52692d7a95
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
1 #!/usr/local/bin/python3
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
2
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
3 '''
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
4 This script downloads lookup tables and integrates these into the Galaxy instance
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
5
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
6 USAGE
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
7 config_lookup.py --galaxy GALAXY --acclinks ACCLINKS [--acclists ACCLISTS]
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
8
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
9 OPTIONS
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
10 -h, --help show this help message and exit
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
11
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
12 '''
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
13
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
14 import os
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
15 import argparse
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
16 import requests
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
17 import sys
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
18 import shutil
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
19
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
20 def main():
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
21 # parse arguments
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
22 parser = argparse.ArgumentParser(description='incorporate the accession lists in GLASSgo/Galaxy to enable clade-specific searches')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
23 parser.add_argument('--galaxy', required=True, help='(absolute) path to the root directory of the Galaxy instance')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
24 parser.add_argument('--acclinks', help='(absolute) path to file containing URLs to the accession lists')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
25 parser.add_argument('--acclists', help='(absolute) path to directory to save the accession lists to')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
26 args = parser.parse_args()
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
27
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
28
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
29 # ./accession_lists_links.txt as default
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
30 if args.acclinks == None:
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
31 args.acclinks = os.path.join(os.getcwd(), 'accession_lists_links.txt')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
32
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
33 # ./acclists as default folder for the accession lists
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
34 if args.acclists == None:
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
35 args.acclists = os.path.join(os.getcwd(),'acclists')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
36
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
37 # check for existence of the folders for galaxy and URLs to the accession lists
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
38 if not os.path.exists(args.galaxy):
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
39 print('\tERROR: ' + args.galaxy + ' could not be found!')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
40 sys.exit()
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
41 if not os.path.exists(args.acclinks):
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
42 print('\tERROR: ' + args.acclinks + ' could not be found!')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
43 sys.exit()
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
44
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
45 print('################ configure the accession lists ################')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
46 print('### the accession lists will be saved to ' + args.acclists)
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
47
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
48 # create folder for accession lists
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
49 if not os.path.exists(args.acclists):
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
50 os.makedirs(args.acclists)
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
51
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
52 #
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
53 with open(args.acclinks, 'r') as link:
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
54 # create list with lookup tables that populates the user interface
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
55 accDataTableFile = os.path.join(os.getcwd(),'tool-data/glassgo_accession_list.txt')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
56
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
57 accDataTable = open(accDataTableFile,'w')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
58 accDataTable.write('global\tglobal\n')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
59 # fetch accession lists
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
60 for url in link:
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
61 acc = requests.get(url)
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
62 filename = str(os.path.basename(url)).replace('\n','')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
63 print('### fetch: ' + filename)
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
64 open(os.path.join(args.acclists,filename),'wb').write(acc.content)
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
65
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
66 #
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
67 accDataTable.write(filename + '\t')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
68 accDataTable.write(os.path.join(args.acclists,filename) + '\n')
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
69
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
70 accDataTable.close()
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
71 print('### create tab-separated list '+ accDataTableFile)
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
72
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
73 # move list with accession list to /galaxy/tool-data
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
74 shutil.copy(accDataTableFile,os.path.join(args.galaxy,'tool-data/'))
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
75 print('### move tab-separated list to ' + str(os.path.join(args.galaxy,'tool-data/')))
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
76
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
77 #
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
78 if __name__ == "__main__":
4ac32c671a40 Uploaded
computationaltranscriptomics
parents:
diff changeset
79 main()