comparison data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py @ 24:424d6204a298 draft

Uploaded
author charles-bernard
date Tue, 01 Nov 2016 06:40:26 -0400
parents fbb5a2468fa8
children 5dafa8e43d3e
comparison
equal deleted inserted replaced
23:c7dbef0e9ed7 24:424d6204a298
6 import urllib2 6 import urllib2
7 import subprocess 7 import subprocess
8 import gzip 8 import gzip
9 import os 9 import os
10 import tempfile 10 import tempfile
11 import logging
12 from optparse import OptionParser 11 from optparse import OptionParser
13 from galaxy.util.json import from_json_string, to_json_string 12 from galaxy.util.json import from_json_string, to_json_string
14 13
15 def get_arg(): 14 def get_arg():
16 parser = OptionParser() 15 parser = OptionParser()
78 77
79 grep_result = subprocess.Popen(['grep', species_name, list_species_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 78 grep_result = subprocess.Popen(['grep', species_name, list_species_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
80 species_lines_matched, grep_error = grep_result.communicate() 79 species_lines_matched, grep_error = grep_result.communicate()
81 if grep_error != None or species_lines_matched == "": 80 if grep_error != None or species_lines_matched == "":
82 msg = 'The species \'%s\' is not referenced on Ensembl (%s)' % (species_name, kingdom) 81 msg = 'The species \'%s\' is not referenced on Ensembl (%s)' % (species_name, kingdom)
83 logging.critical(msg)
84 sys.exit(msg) 82 sys.exit(msg)
85 83
86 species_lines = species_lines_matched.split('\n') 84 species_lines = species_lines_matched.split('\n')
87 del species_lines[-1] 85 del species_lines[-1]
88 nb_lines = len(species_lines) 86 nb_lines = len(species_lines)
113 exact_match = re.search('^%s$' % species_name, list_species[i]) 111 exact_match = re.search('^%s$' % species_name, list_species[i])
114 if exact_match: 112 if exact_match:
115 print("-> Referenced !\n") 113 print("-> Referenced !\n")
116 return species_name, species_lines[i] 114 return species_name, species_lines[i]
117 msg = 'The string \'%s\' has been matched against the list of Ensembl Species but is not a complete species name.\nPlease retry with one of the following species names:\n%s' % (species_name, list_species[0:]) 115 msg = 'The string \'%s\' has been matched against the list of Ensembl Species but is not a complete species name.\nPlease retry with one of the following species names:\n%s' % (species_name, list_species[0:])
118 logging.critical(msg)
119 sys.exit(msg) 116 sys.exit(msg)
120 117
121 def get_ensembl_collection(kingdom, species_line): 118 def get_ensembl_collection(kingdom, species_line):
122 print("*** Extracting the %s_collection of the species" % kingdom) 119 print("*** Extracting the %s_collection of the species" % kingdom)
123 collection_regex = re.compile('%s_.+_collection' % kingdom.lower()) 120 collection_regex = re.compile('%s_.+_collection' % kingdom.lower())
160 print("*** Generating alfa indexes from %s" % gtf_file_name) 157 print("*** Generating alfa indexes from %s" % gtf_file_name)
161 alfa_result = subprocess.Popen(['python', path_to_alfa, '-a', gtf_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 158 alfa_result = subprocess.Popen(['python', path_to_alfa, '-a', gtf_file_name], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
162 alfa_out, alfa_err = alfa_result.communicate() 159 alfa_out, alfa_err = alfa_result.communicate()
163 if alfa_err != None and not re.search('### End of program', alfa_err): 160 if alfa_err != None and not re.search('### End of program', alfa_err):
164 msg = 'Generation Failed due an alfa error: %s' % (alfa_err) 161 msg = 'Generation Failed due an alfa error: %s' % (alfa_err)
165 logging.critical(msg)
166 sys.exit(msg) 162 sys.exit(msg)
167 print("Alfa prompt:\n%s" % alfa_out) 163 print("Alfa prompt:\n%s" % alfa_out)
168 print("-> Generated !\n") 164 print("-> Generated !\n")
169 165
170 def get_data_table_new_entry(gtf_archive_name): 166 def get_data_table_new_entry(gtf_archive_name):
185 181
186 path_to_alfa = os.path.join(tool_dir, 'ALFA.py') 182 path_to_alfa = os.path.join(tool_dir, 'ALFA.py')
187 183
188 if options.output_filename == None: 184 if options.output_filename == None:
189 msg = 'No json output file specified' 185 msg = 'No json output file specified'
190 logging.critical(msg)
191 sys.exit(msg) 186 sys.exit(msg)
192 output_filename = options.output_filename 187 output_filename = options.output_filename
193 params = from_json_string(open(output_filename).read()) 188 params = from_json_string(open(output_filename).read())
194 target_directory = params['output_data'][0]['extra_files_path'] 189 target_directory = params['output_data'][0]['extra_files_path']
195 os.mkdir(target_directory) 190 os.mkdir(target_directory)