Mercurial > repos > charles-bernard > data_manager_build_alfa_indexes
comparison data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py @ 24:424d6204a298 draft
Uploaded
author | charles-bernard |
---|---|
date | Tue, 01 Nov 2016 06:40:26 -0400 |
parents | fbb5a2468fa8 |
children | 5dafa8e43d3e |
comparison
equal
deleted
inserted
replaced
23:c7dbef0e9ed7 | 24:424d6204a298 |
---|---|
6 import urllib2 | 6 import urllib2 |
7 import subprocess | 7 import subprocess |
8 import gzip | 8 import gzip |
9 import os | 9 import os |
10 import tempfile | 10 import tempfile |
11 import logging | |
12 from optparse import OptionParser | 11 from optparse import OptionParser |
13 from galaxy.util.json import from_json_string, to_json_string | 12 from galaxy.util.json import from_json_string, to_json_string |
14 | 13 |
15 def get_arg(): | 14 def get_arg(): |
16 parser = OptionParser() | 15 parser = OptionParser() |
78 | 77 |
79 grep_result = subprocess.Popen(['grep', species_name, list_species_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | 78 grep_result = subprocess.Popen(['grep', species_name, list_species_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
80 species_lines_matched, grep_error = grep_result.communicate() | 79 species_lines_matched, grep_error = grep_result.communicate() |
81 if grep_error != None or species_lines_matched == "": | 80 if grep_error != None or species_lines_matched == "": |
82 msg = 'The species \'%s\' is not referenced on Ensembl (%s)' % (species_name, kingdom) | 81 msg = 'The species \'%s\' is not referenced on Ensembl (%s)' % (species_name, kingdom) |
83 logging.critical(msg) | |
84 sys.exit(msg) | 82 sys.exit(msg) |
85 | 83 |
86 species_lines = species_lines_matched.split('\n') | 84 species_lines = species_lines_matched.split('\n') |
87 del species_lines[-1] | 85 del species_lines[-1] |
88 nb_lines = len(species_lines) | 86 nb_lines = len(species_lines) |
113 exact_match = re.search('^%s$' % species_name, list_species[i]) | 111 exact_match = re.search('^%s$' % species_name, list_species[i]) |
114 if exact_match: | 112 if exact_match: |
115 print("-> Referenced !\n") | 113 print("-> Referenced !\n") |
116 return species_name, species_lines[i] | 114 return species_name, species_lines[i] |
117 msg = 'The string \'%s\' has been matched against the list of Ensembl Species but is not a complete species name.\nPlease retry with one of the following species names:\n%s' % (species_name, list_species[0:]) | 115 msg = 'The string \'%s\' has been matched against the list of Ensembl Species but is not a complete species name.\nPlease retry with one of the following species names:\n%s' % (species_name, list_species[0:]) |
118 logging.critical(msg) | |
119 sys.exit(msg) | 116 sys.exit(msg) |
120 | 117 |
121 def get_ensembl_collection(kingdom, species_line): | 118 def get_ensembl_collection(kingdom, species_line): |
122 print("*** Extracting the %s_collection of the species" % kingdom) | 119 print("*** Extracting the %s_collection of the species" % kingdom) |
123 collection_regex = re.compile('%s_.+_collection' % kingdom.lower()) | 120 collection_regex = re.compile('%s_.+_collection' % kingdom.lower()) |
160 print("*** Generating alfa indexes from %s" % gtf_file_name) | 157 print("*** Generating alfa indexes from %s" % gtf_file_name) |
161 alfa_result = subprocess.Popen(['python', path_to_alfa, '-a', gtf_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | 158 alfa_result = subprocess.Popen(['python', path_to_alfa, '-a', gtf_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
162 alfa_out, alfa_err = alfa_result.communicate() | 159 alfa_out, alfa_err = alfa_result.communicate() |
163 if alfa_err != None and not re.search('### End of program', alfa_err): | 160 if alfa_err != None and not re.search('### End of program', alfa_err): |
164 msg = 'Generation Failed due an alfa error: %s' % (alfa_err) | 161 msg = 'Generation Failed due an alfa error: %s' % (alfa_err) |
165 logging.critical(msg) | |
166 sys.exit(msg) | 162 sys.exit(msg) |
167 print("Alfa prompt:\n%s" % alfa_out) | 163 print("Alfa prompt:\n%s" % alfa_out) |
168 print("-> Generated !\n") | 164 print("-> Generated !\n") |
169 | 165 |
170 def get_data_table_new_entry(gtf_archive_name): | 166 def get_data_table_new_entry(gtf_archive_name): |
185 | 181 |
186 path_to_alfa = os.path.join(tool_dir, 'ALFA.py') | 182 path_to_alfa = os.path.join(tool_dir, 'ALFA.py') |
187 | 183 |
188 if options.output_filename == None: | 184 if options.output_filename == None: |
189 msg = 'No json output file specified' | 185 msg = 'No json output file specified' |
190 logging.critical(msg) | |
191 sys.exit(msg) | 186 sys.exit(msg) |
192 output_filename = options.output_filename | 187 output_filename = options.output_filename |
193 params = from_json_string(open(output_filename).read()) | 188 params = from_json_string(open(output_filename).read()) |
194 target_directory = params['output_data'][0]['extra_files_path'] | 189 target_directory = params['output_data'][0]['extra_files_path'] |
195 os.mkdir(target_directory) | 190 os.mkdir(target_directory) |