data_manager_build_alfa_indexes: data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa

comparison data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py @ 28:9139892d06a2 draft

Uploaded

author	charles-bernard
date	Thu, 08 Dec 2016 03:43:26 -0500
parents	5dafa8e43d3e
children	0c821f76e2e5

comparison

equal deleted inserted replaced

-:4f70c9afd89d
+:9139892d06a2
 def get_page_content(url):
 req = urllib2.Request(url)
 page = urllib2.urlopen(req)
 return page.read()
 def download_file(link, local_file_name):
 req = urllib2.Request(link)
 src_file = urllib2.urlopen(req)
 local_file = open(local_file_name, 'wb')
 local_file.write(src_file.read())
 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
 data_manager_dict['data_tables']['alfa_indexes'] = data_manager_dict['data_tables'].get( 'alfa_indexes', data_table_entry )
 return data_manager_dict
 def standardize_species_name(species_name):
+# substitute all capital letters, replace every succession of chars that are not letters to one underscore
 standard_species_name = re.sub(r'[)]$', '', species_name)
 standard_species_name = re.sub(r'[ _),-.(=]+ *', '_', standard_species_name)
 return standard_species_name.lower()
 def get_ensembl_url_root(kingdom):
 root = 'ftp://ftp.ensemblgenomes.org/pub/%s/current/' % kingdom
 print("-> Determined !\n")
 return root
 def test_ensembl_species_exists(kingdom, url, species_name):
+"""
+Test if a species exist on the ftp & return the species name with the species_line if so.
+if the species_name matches a single string, then this string will be returned as the species name
+if the species_name matches several strings, then an error is printed with all the possible species to enter for a new run
+"""
 print("____________________________________________________________")
 print ("*** Testing whether %s is referenced in Ensembl %s" % (species_name, kingdom))
 list_species_file_name = 'species_Ensembl%s%s.txt' % (kingdom[0].upper(), kingdom[1:])
 if kingdom=='vertebrates':
 download_file(url, list_species_file_name)
 list_species[i] = columns[1]
 exact_match = re.search('^%s$' % species_name, list_species[i])
 if exact_match:
 print("-> Referenced !\n")
 return species_name, species_lines[i]
-msg = 'The string \'%s\' has been matched against the list of Ensembl Species but is not a complete species name.\nPlease retry with one of the following species names:\n%s' % (species_name, list_species[0:])
+msg = ("The string \'%s\' has been matched against the list of Ensembl Species but is not a complete species name.\n"
+"Please retry with one of these following species names:\n" % species_name)
+for s in list_species:
+msg = ("%s- %s\n" % (msg, s))
 sys.exit(msg)
 def get_ensembl_collection(kingdom, species_line):
 print("*** Extracting the %s_collection of the species" % kingdom)
 collection_regex = re.compile('%s_.+_collection' % kingdom.lower())
 if options.output_filename == None:
 msg = 'No json output file specified'
 sys.exit(msg)
 output_filename = options.output_filename
+# Interestingly the output file to return is not empty initially.
+# it contains a dictionary, with notably the path to the dir where the alfa_indexes
+# are expected to be found
 params = from_json_string(open(output_filename).read())
 target_directory = params['output_data'][0]['extra_files_path']
 os.mkdir(target_directory)
 tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='')
 unstranded_index_name = '%s.unstranded.index' % data_table_entry['prefix']
 add_data_table_entry(data_manager_dict, data_table_entry)
 print("____________________________________________________________")
 print("*** General Info")
-print("TMP DIR:\t%s" % tmp_dir)
-print("TARGET DIR:\t%s" % target_directory)
 print("URL ROOT:\t%s" % url)
 print("SPECIES:\t%s" % data_table_entry['species'])
 print("VERSION:\t%s" % data_table_entry['version'])
 print("RELEASE:\t%s" % data_table_entry['release'])
 print("VALUE:\t%s" % data_table_entry['value'])
 print("DBKEY:\t%s" % data_table_entry['dbkey'])
 print("NAME:\t%s" % data_table_entry['name'])
 print("PREFIX:\t%s" % data_table_entry['prefix'])
-print("____________________________________________________________")
-print("*** Intial dictionary")
-print("%s" % params)
 shutil.copyfile(stranded_index_name, os.path.join(target_directory, stranded_index_name))
 shutil.copyfile(unstranded_index_name, os.path.join(target_directory, unstranded_index_name))
 cleanup_before_exit(tmp_dir)

Mercurial > repos > charles-bernard > data_manager_build_alfa_indexes

comparison data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py @ 28:9139892d06a2 draft