annotate data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py @ 4:6f0be85be8fb draft

Uploaded
author charles-bernard
date Thu, 27 Oct 2016 06:49:58 -0400
parents 016200d4e379
children 54c2c66e45a9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
1 #!/usr/bin/python
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
2
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
3 import sys
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
4 import shutil
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
5 import re
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
6 import urllib2
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
7 import subprocess
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
8 import gzip
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
9 import os
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
10 import tempfile
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
11 import logging
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
12 from optparse import OptionParser
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
13 from galaxy.util.json import from_json_string, to_json_string
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
14
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
15
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
16 def get_arg():
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
17 parser = OptionParser()
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
18 parser.add_option("-e", "--ensembl", dest = 'ensembl_info', action = "store", nargs = 2, metavar = ("kingdom", "species_name"), type = "str")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
19 parser.add_option("-o", "--output", dest='output_filename', action="store", nargs = 1, metavar = 'JSON_FILE')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
20 (options, args) = parser.parse_args()
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
21 return options, args
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
22
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
23 def cleanup_before_exit(tmp_dir):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
24 if tmp_dir and os.path.exists(tmp_dir):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
25 shutil.rmtree(tmp_dir)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
26
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
27 def get_page_content(url):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
28 req = urllib2.Request(url)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
29 page = urllib2.urlopen(req)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
30 return page.read()
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
31
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
32
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
33 def download_file(link, local_file_name):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
34 req = urllib2.Request(link)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
35 src_file = urllib2.urlopen(req)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
36 local_file = open(local_file_name, 'wb')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
37 local_file.write(src_file.read())
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
38 local_file.close()
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
39
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
40
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
41 def uncompress_gz(gz_file_name, uncompressed_file_name):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
42 logging.info("____________________________________________________________")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
43 logging.info("*** Uncompressing %s" % gz_file_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
44 uncompressed_file = open(uncompressed_file_name, 'wb')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
45 with gzip.open(gz_file_name, 'rb') as src_file:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
46 uncompressed_file.write(src_file.read())
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
47 uncompressed_file.close()
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
48 logging.info("-> Uncompressed !\n")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
49
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
50
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
51 def add_data_table_entry( data_manager_dict, data_table_entry ):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
52 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
53 data_manager_dict['data_tables']['alfa_indexes'] = data_manager_dict['data_tables'].get( 'alfa_indexes', data_table_entry )
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
54 return data_manager_dict
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
55
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
56
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
57 def standardize_species_name(species_name):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
58 standard_species_name = re.sub(r'[)]$', '', species_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
59 standard_species_name = re.sub(r'[ _),-.(=]+ *', '_', standard_species_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
60 return standard_species_name.lower()
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
61
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
62
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
63 def get_ensembl_url_root(kingdom):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
64 logging.info("____________________________________________________________")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
65 logging.info("*** Determining Ensembl ftp root url")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
66 if kingdom == 'vertebrates':
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
67 root = 'ftp://ftp.ensembl.org/pub/current_gtf/'
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
68 else:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
69 root = 'ftp://ftp.ensemblgenomes.org/pub/%s/current/' % kingdom
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
70 logging.info("-> Determined !\n")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
71 return root
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
72
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
73
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
74 def test_ensembl_species_exists(kingdom, url, species_name):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
75 logging.info("____________________________________________________________")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
76 logging.info ("*** Testing whether %s is referenced in Ensembl %s" % (species_name, kingdom))
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
77 list_species_file_name = 'species_Ensembl%s%s.txt' % (kingdom[0].upper(), kingdom[1:])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
78 if kingdom=='vertebrates':
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
79 download_file(url, list_species_file_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
80 else:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
81 download_file(url + list_species_file_name, list_species_file_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
82
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
83 grep_result = subprocess.Popen(['grep', species_name, list_species_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
84 species_lines_matched, grep_error = grep_result.communicate()
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
85 if grep_error != None or species_lines_matched == "":
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
86 msg = 'The species \'%s\' is not referenced on Ensembl (%s)' % (species_name, kingdom)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
87 logging.critical(msg)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
88 sys.exit(msg)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
89
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
90 species_lines = species_lines_matched.split('\n')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
91 del species_lines[-1]
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
92 nb_lines = len(species_lines)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
93
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
94 if nb_lines == 1:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
95 columns = species_lines[0].split('\t')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
96 found_species_name = columns[1]
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
97 if species_name != found_species_name:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
98 logging.info('-> \'%s\' has been replace with the complete species name \'%s\'' % (species_name, found_species_name))
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
99 return found_species_name, species_lines_matched
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
100 logging.info("-> Referenced !\n")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
101 return species_name, species_lines_matched
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
102 else:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
103 list_species = [''] * nb_lines
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
104 for i in range(0, nb_lines):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
105 columns = species_lines[i].split('\t')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
106 list_species[i] = columns[1]
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
107 exact_match = re.search('^%s$' % species_name, list_species[i])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
108 if exact_match:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
109 logging.info("-> Referenced !\n")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
110 return species_name, species_lines[i]
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
111 msg = 'The string \'%s\' has been matched against the list of Ensembl Species but is not a complete species name.\nPlease retry with one of the following species names:\n%s' % (species_name, list_species[0:])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
112 logging.critical(msg)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
113 sys.exit(msg)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
114
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
115
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
116 def get_ensembl_collection(kingdom, species_line):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
117 logging.info("*** Extracting the %s_collection of the species" % kingdom)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
118 collection_regex = re.compile('%s_.+_collection' % kingdom.lower())
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
119 collection_match = re.search(collection_regex, species_line)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
120 if not collection_match:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
121 logging.info("-> Skiped: this species is not classified in a Ensembl %s collection\n" % kingdom)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
122 return None
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
123 logging.info("-> Extracted !\n")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
124 return collection_match.group(0)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
125
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
126
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
127 def get_ensembl_gtf_archive_name(url_dir, species_name):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
128 logging.info("____________________________________________________________")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
129 logging.info("*** Extracting the gtf archive name of %s" % species_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
130 gtf_archive_regex = re.compile('%s\..*\.[0-9]+\.gtf\.gz' % species_name, flags = re.IGNORECASE)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
131 dir_content = get_page_content(url_dir)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
132 gtf_archive_match = re.search(gtf_archive_regex, dir_content)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
133 if not gtf_archive_match:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
134 sys.exit('The species is referenced on Ensembl but error of nomenclature led to download failure')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
135 gtf_archive_name = gtf_archive_match.group(0)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
136 logging.info("-> Extracted !\n")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
137 return gtf_archive_name
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
138
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
139
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
140 def get_ensembl_gtf_archive(kingdom, url, species_name, species_line):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
141 if kingdom != 'vertebrates':
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
142 url = url + 'gtf/'
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
143 if kingdom == 'bacteria' or kingdom == 'protists' or kingdom == 'fungi':
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
144 collection = get_ensembl_collection(kingdom, species_line)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
145 if collection != None:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
146 url = url + "%s/" % collection
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
147 final_url = url + species_name + '/'
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
148 gtf_archive_name = get_ensembl_gtf_archive_name(final_url, species_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
149 logging.info("____________________________________________________________")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
150 logging.info("*** Download the gtf archive of %s" % species_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
151 download_file(final_url + gtf_archive_name, gtf_archive_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
152 logging.info("-> Downloaded !\n")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
153 return gtf_archive_name
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
154
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
155
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
156 def generate_alfa_indexes(path_to_alfa, gtf_file_name):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
157 logging.info("____________________________________________________________")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
158 logging.info("*** Generating alfa indexes from %s" % gtf_file_name)
4
6f0be85be8fb Uploaded
charles-bernard
parents: 0
diff changeset
159 alfa_result = subprocess.Popen(['python', path_to_alfa, '-a', gtf_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
0
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
160 alfa_out, alfa_err = alfa_result.communicate()
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
161 if alfa_err != None and not re.search('### End of program', alfa_err):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
162 msg = 'Generation Failed due an alfa error: %s' % (alfa_err)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
163 logging.critical(msg)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
164 sys.exit(msg)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
165 logging.info("-> Generated !\n")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
166
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
167
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
168 def get_data_table_new_entry(gtf_archive_name):
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
169 info_list = gtf_archive_name.split('.')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
170 species = info_list[0]
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
171 version = info_list[1]
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
172 release = info_list[2]
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
173 value = '%s_%s_%s' % (species, version, release)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
174 dbkey = value
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
175 name = '%s: %s (release %s)' % (species, version, release)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
176 prefix = '%s.%s.%s' % (species, version, release)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
177 entry_dict = { 'species': species, 'version': version, 'release': release, 'value': value, 'dbkey': dbkey, 'name': name, 'prefix': prefix }
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
178 return entry_dict
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
179
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
180
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
181 def main():
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
182 options, args = get_arg()
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
183 galaxy_root_dir = args[0]
4
6f0be85be8fb Uploaded
charles-bernard
parents: 0
diff changeset
184 tool_dir = args[1]
0
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
185
4
6f0be85be8fb Uploaded
charles-bernard
parents: 0
diff changeset
186 path_to_alfa = os.path.join(tool_dir, 'ALFA.py')
0
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
187 path_to_tmp_dir = os.path.join(galaxy_root_dir, 'database/tmp/')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
188
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
189 if options.output_filename == None:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
190 msg = 'No json output file specified'
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
191 logging.critical(msg)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
192 sys.exit(msg)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
193 output_filename = options.output_filename
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
194 params = from_json_string(open(output_filename).read())
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
195 target_directory = params['output_data'][0]['extra_files_path']
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
196 os.mkdir(target_directory)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
197
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
198 tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='', dir=path_to_tmp_dir)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
199 os.chdir(tmp_dir)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
200 log_file_name = 'galaxy_log_report.log'
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
201 logging.basicConfig(level=logging.INFO, filename=log_file_name, filemode="a+", format='%(message)s')
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
202 data_manager_dict = {}
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
203
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
204 if options.ensembl_info:
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
205 kingdom, species_name = options.ensembl_info
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
206 species_name = standardize_species_name(species_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
207 url = get_ensembl_url_root(kingdom)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
208 species_name, species_line = test_ensembl_species_exists(kingdom, url, species_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
209 gtf_archive_name = get_ensembl_gtf_archive(kingdom, url, species_name, species_line)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
210 data_table_entry = get_data_table_new_entry(gtf_archive_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
211 gtf_file_name = '%s.gtf' % data_table_entry['prefix']
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
212 uncompress_gz(gtf_archive_name, gtf_file_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
213 generate_alfa_indexes(path_to_alfa, gtf_file_name)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
214 stranded_index_name = '%s.stranded.index' % data_table_entry['prefix']
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
215 unstranded_index_name = '%s.unstranded.index' % data_table_entry['prefix']
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
216 add_data_table_entry(data_manager_dict, data_table_entry)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
217
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
218 logging.info("____________________________________________________________")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
219 logging.info("*** General Info")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
220 logging.info("TMP DIR:\t%s" % tmp_dir)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
221 logging.info("TARGET DIR:\t%s" % target_directory)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
222 logging.info("URL ROOT:\t%s" % url)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
223 logging.info("SPECIES:\t%s" % data_table_entry['species'])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
224 logging.info("VERSION:\t%s" % data_table_entry['version'])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
225 logging.info("RELEASE:\t%s" % data_table_entry['release'])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
226 logging.info("VALUE:\t%s" % data_table_entry['value'])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
227 logging.info("DBKEY:\t%s" % data_table_entry['dbkey'])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
228 logging.info("NAME:\t%s" % data_table_entry['name'])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
229 logging.info("PREFIX:\t%s" % data_table_entry['prefix'])
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
230 logging.info("____________________________________________________________")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
231 logging.info("*** Intial dictionary")
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
232 logging.info("%s" % params)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
233
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
234 shutil.copyfile(stranded_index_name, os.path.join(target_directory, stranded_index_name))
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
235 shutil.copyfile(unstranded_index_name, os.path.join(target_directory, unstranded_index_name))
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
236 shutil.copyfile(log_file_name, os.path.join(target_directory, log_file_name))
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
237
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
238 cleanup_before_exit(tmp_dir)
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
239
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
240 open(output_filename, 'wb').write(to_json_string(data_manager_dict))
016200d4e379 Uploaded
charles-bernard
parents:
diff changeset
241 main()