0
|
1 #!/usr/bin/env python
|
|
2 import argparse
|
|
3 import os
|
|
4 import shutil
|
|
5 import string
|
|
6 import sys
|
|
7 import threading
|
|
8 import time
|
|
9
|
|
10 from bioblend import galaxy
|
|
11 from six.moves import configparser
|
|
12
|
|
13 parser = argparse.ArgumentParser()
|
|
14 parser.add_argument('--affy_metadata', dest='affy_metadata', help='Input Affymetrix 96 well plate metadata file')
|
|
15 parser.add_argument('--annot', dest='annot', help='Probeset annotation file')
|
|
16 parser.add_argument('--api_key', dest='api_key', help='Current user API key')
|
|
17 parser.add_argument('--calls', dest='calls', help='Apt-probeset genotype calls file')
|
|
18 parser.add_argument('--confidences', dest='confidences', help='Apt-probeset genotype confidences file')
|
|
19 parser.add_argument('--config_file', dest='config_file', help='qgw_config.ini')
|
|
20 parser.add_argument('--dbkey', dest='dbkey', help='Reference genome dbkey')
|
|
21 parser.add_argument('--reference_genome', dest='reference_genome', help='Reference genome')
|
|
22 parser.add_argument('--history_id', dest='history_id', help='Encoded id of current history')
|
|
23 parser.add_argument('--output', dest='output', help='Output dataset')
|
|
24 parser.add_argument('--report', dest='report', help='Apt-probeset genotype report file')
|
|
25 parser.add_argument('--sample_attributes', dest='sample_attributes', help='Sample attributes tabular file')
|
|
26 parser.add_argument('--snp-posteriors', dest='snp-posteriors', help='Apt-probeset genotype snp-posteriors file')
|
|
27 parser.add_argument('--summary', dest='summary', help='Apt-probeset genotype summary file')
|
|
28 args = parser.parse_args()
|
|
29
|
|
30
|
|
31 def add_library_dataset_to_history(gi, history_id, dataset_id, history_datasets, outputfh):
|
|
32 # Add a data library dataset to a history.
|
|
33 outputfh.write('\nImporting dataset into current history.\n')
|
|
34 new_hda_dict = gi.histories.upload_dataset_from_library(history_id, dataset_id)
|
|
35 new_hda_name = new_hda_dict['name']
|
|
36 history_datasets[new_hda_name] = new_hda_dict
|
|
37 return history_datasets
|
|
38
|
|
39
|
|
40 def copy_history_dataset_to_library(gi, library_id, dataset_id, outputfh):
|
|
41 # Copy a history dataset to a data library.
|
|
42 outputfh.write('\nCopying history dataset with id %s to data library with id %s.\n' % (str(dataset_id), str(library_id)))
|
|
43 new_library_dataset_dict = gi.libraries.copy_from_dataset(library_id, dataset_id)
|
|
44 return new_library_dataset_dict
|
|
45
|
|
46
|
|
47 def copy_dataset_to_storage(src_path, dst_base_path, dataset_name, output_fh):
|
|
48 # Copy a dataset via its file path to a storage directory on disk.
|
|
49 if not os.path.isdir(dst_base_path):
|
|
50 os.makedirs(dst_base_path)
|
|
51 dst_path = os.path.join(dst_base_path, dataset_name)
|
|
52 shutil.copyfile(src_path, dst_path)
|
|
53 outputfh.write("Copied %s to storage.\n" % dataset_name)
|
|
54
|
|
55
|
|
56 def delete_history_dataset(gi, history_id, dataset_id, outputfh, purge=False):
|
|
57 # Delete a history dataset.
|
|
58 outputfh.write("\nDeleting history dataset with id %s.\n" % dataset_id)
|
|
59 gi.histories.delete_dataset(history_id, dataset_id, purge=purge)
|
|
60
|
|
61
|
|
62 def delete_library_dataset(gi, library_id, dataset_id, outputfh, purged=False):
|
|
63 # Delete a library dataset.
|
|
64 outputfh.write("\nDeleting library dataset with id %s.\n" % dataset_id)
|
|
65 deleted_dataset_dict = gi.libraries.delete_library_dataset(library_id, dataset_id, purged=purged)
|
|
66 return deleted_dataset_dict
|
|
67
|
|
68
|
|
69 def get_config_settings(config_file, section='defaults'):
|
|
70 # Return a dictionary consisting of the key / value pairs
|
|
71 # of the defaults section of config_file.
|
|
72 d = {}
|
|
73 config_parser = configparser.ConfigParser()
|
|
74 config_parser.read(config_file)
|
|
75 for key, value in config_parser.items(section):
|
|
76 if section == 'defaults':
|
|
77 d[string.upper(key)] = value
|
|
78 else:
|
|
79 d[key] = value
|
|
80 return d
|
|
81
|
|
82
|
|
83 def get_data_library_dict(gi, name, outputfh):
|
|
84 # Use the Galaxy API to get the data library named name.
|
|
85 outputfh.write("\nSearching for data library named %s.\n" % name)
|
|
86 # The following is not correctly filtering out deleted libraries.
|
|
87 data_lib_dicts = gi.libraries.get_libraries(library_id=None, name=name, deleted=False)
|
|
88 for data_lib_dict in data_lib_dicts:
|
|
89 if data_lib_dict['name'] == name and data_lib_dict['deleted'] not in [True, 'true', 'True']:
|
|
90 outputfh.write("Found data library named %s.\n" % name)
|
|
91 outputfh.write("%s\n" % str(data_lib_dict))
|
|
92 return data_lib_dict
|
|
93 return None
|
|
94
|
|
95
|
|
96 def get_history_status(gi, history_id):
|
|
97 return gi.histories.get_status(history_id)
|
|
98
|
|
99
|
|
100 def get_history_dataset_id_by_name(gi, history_id, dataset_name, outputfh):
|
|
101 # Use the Galaxy API to get the bcftools merge dataset id
|
|
102 # from the current history.
|
|
103 outputfh.write("\nSearching for history dataset named %s.\n" % str(dataset_name))
|
|
104 history_dataset_dicts = get_history_datasets(gi, history_id)
|
|
105 for name, hd_dict in history_dataset_dicts.items():
|
|
106 name = name.lower()
|
|
107 if name.startswith(dataset_name.lower()):
|
|
108 outputfh.write("Found dataset named %s.\n" % str(dataset_name))
|
|
109 return hd_dict['id']
|
|
110 return None
|
|
111
|
|
112
|
|
113 def get_history_datasets(gi, history_id):
|
|
114 history_datasets = {}
|
|
115 history_dict = gi.histories.show_history(history_id, contents=True, deleted='false', details=None)
|
|
116 for contents_dict in history_dict:
|
|
117 if contents_dict['history_content_type'] == 'dataset':
|
|
118 dataset_name = contents_dict['name']
|
|
119 # Don't include the "Queue genotype workflow" dataset.
|
|
120 if dataset_name.startswith("Queue genotype workflow"):
|
|
121 continue
|
|
122 history_datasets[dataset_name] = contents_dict
|
|
123 return history_datasets
|
|
124
|
|
125
|
|
126 def get_library_dataset_file_path(gi, library_id, dataset_id, outputfh):
|
|
127 dataset_dict = gi.libraries.show_dataset(library_id, dataset_id)
|
|
128 outputfh.write("\nReturning file path of library dataset.\n")
|
|
129 return dataset_dict.get('file_name', None)
|
|
130
|
|
131
|
|
132 def get_library_dataset_id_by_name(gi, data_lib_id, dataset_name, outputfh):
|
|
133 # Use the Galaxy API to get the all_genotyped_samples.vcf dataset id.
|
|
134 # We're assuming it is in the root folder.
|
|
135 outputfh.write("\nSearching for library dataset named %s.\n" % str(dataset_name))
|
|
136 lib_item_dicts = gi.libraries.show_library(data_lib_id, contents=True)
|
|
137 for lib_item_dict in lib_item_dicts:
|
|
138 if lib_item_dict['type'] == 'file':
|
|
139 dataset_name = lib_item_dict['name'].lstrip('/').lower()
|
|
140 if dataset_name.startswith(dataset_name):
|
|
141 outputfh.write("Found dataset named %s.\n" % str(dataset_name))
|
|
142 return lib_item_dict['id']
|
|
143 return None
|
|
144
|
|
145
|
|
146 def get_value_from_config(config_defaults, value):
|
|
147 return config_defaults.get(value, None)
|
|
148
|
|
149
|
|
150 def get_workflow(gi, name, outputfh, galaxy_base_url=None, api_key=None):
|
|
151 outputfh.write("\nSearching for workflow named %s\n" % name)
|
|
152 workflow_info_dicts = gi.workflows.get_workflows(name=name, published=True)
|
|
153 if len(workflow_info_dicts) == 0:
|
|
154 return None, None
|
|
155 wf_info_dict = workflow_info_dicts[0]
|
|
156 workflow_id = wf_info_dict['id']
|
|
157 # Get the complete workflow.
|
|
158 workflow_dict = gi.workflows.show_workflow(workflow_id)
|
|
159 outputfh.write("Found workflow named %s.\n" % name)
|
|
160 return workflow_id, workflow_dict
|
|
161
|
|
162
|
|
163 def get_workflow_input_datasets(gi, history_datasets, workflow_name, workflow_dict, outputfh):
|
|
164 # Map the history datasets to the input datasets for the workflow.
|
|
165 workflow_inputs = {}
|
|
166 outputfh.write("\nMapping datasets from history to workflow %s.\n" % workflow_name)
|
|
167 steps_dict = workflow_dict.get('steps', None)
|
|
168 if steps_dict is not None:
|
|
169 for step_index, step_dict in steps_dict.items():
|
|
170 # Dicts that define dataset inputs for a workflow
|
|
171 # look like this.
|
|
172 # "0": {
|
|
173 # "tool_id": null,
|
|
174 # "tool_version": null,
|
|
175 # "id": 0,
|
|
176 # "input_steps": {},
|
|
177 # "tool_inputs": {},
|
|
178 # "type": "data_input",
|
|
179 # "annotation": null
|
|
180 # },
|
|
181 tool_id = step_dict.get('tool_id', None)
|
|
182 tool_type = step_dict.get('type', None)
|
|
183 # This requires the workflow input dataset annotation to be a
|
|
184 # string # (e.g., report) that enables it to be appropriatey
|
|
185 # matched to a dataset (e.g., axiongt1_report.txt).
|
|
186 # 1. affy_metadata.tabular - must have the word "metadata" in
|
|
187 # the file name.
|
|
188 # 2. sample_attributes.tabular - must have the word "attributes"
|
|
189 # in the file name.
|
|
190 # 3. probeset_annotation.csv - must have the word "annotation" in
|
|
191 # the file name.
|
|
192 # 4. <summary file>.txt - must have the the word "summary" in the
|
|
193 # file name.
|
|
194 # 5. <snp-posteriors file>.txt - must have the the word
|
|
195 # "snp-posteriors" in the file name.
|
|
196 # 6. <report file>.txt - must have the the word "report" in the
|
|
197 # file name.
|
|
198 # 7. <confidences file>.txt - must have the the word "confidences"
|
|
199 # in the file name.
|
|
200 # 8. <calls file>.txt - must have the the word "calls" in the
|
|
201 # file name.
|
|
202 # 9. all_genotyped_samples.vcf - must have "all_genotyped_samples"
|
|
203 # in the file name.
|
|
204 annotation = step_dict.get('annotation', None)
|
|
205 if tool_id is None and tool_type == 'data_input' and annotation is not None:
|
|
206 annotation_check = annotation.lower()
|
|
207 # inputs is a list and workflow input datasets
|
|
208 # have no inputs.
|
|
209 for input_hda_name, input_hda_dict in history_datasets.items():
|
|
210 input_hda_name_check = input_hda_name.lower()
|
|
211 if input_hda_name_check.find(annotation_check) >= 0:
|
|
212 workflow_inputs[step_index] = {'src': 'hda', 'id': input_hda_dict['id']}
|
|
213 outputfh.write(" - Mapped dataset %s from history to workflow input dataset with annotation %s.\n" % (input_hda_name, annotation))
|
|
214 break
|
|
215 return workflow_inputs
|
|
216
|
|
217
|
|
218 def start_workflow(gi, workflow_id, workflow_name, inputs, params, history_id, outputfh):
|
|
219 outputfh.write("\nExecuting workflow %s.\n" % workflow_name)
|
|
220 workflow_invocation_dict = gi.workflows.invoke_workflow(workflow_id, inputs=inputs, params=params, history_id=history_id)
|
|
221 outputfh.write("Response from executing workflow %s:\n" % workflow_name)
|
|
222 outputfh.write("%s\n" % str(workflow_invocation_dict))
|
|
223
|
|
224
|
|
225 def rename_library_dataset(gi, dataset_id, name, outputfh):
|
|
226 outputfh.write("\nRenaming library dataset with id %s to be named %s.\n" % (str(dataset_id), str(name)))
|
|
227 library_dataset_dict = gi.libraries.update_library_dataset(dataset_id, name=name)
|
|
228 return library_dataset_dict
|
|
229
|
|
230
|
|
231 def update_workflow_params(workflow_dict, dbkey, outputfh):
|
|
232 parameter_updates = None
|
|
233 name = workflow_dict['name']
|
|
234 outputfh.write("\nChecking for tool parameter updates for workflow %s using dbkey %s.\n" % (name, dbkey))
|
|
235 step_dicts = workflow_dict.get('steps', None)
|
|
236 for step_id, step_dict in step_dicts.items():
|
|
237 tool_id = step_dict['tool_id']
|
|
238 if tool_id is None:
|
|
239 continue
|
|
240 # Handle reference_source entries
|
|
241 if tool_id.find('affy2vcf') > 0:
|
|
242 tool_inputs_dict = step_dict['tool_inputs']
|
|
243 # The queue_genotype_workflow tool provides a selection of only
|
|
244 # a locally cached reference genome (not a history item), so dbkey
|
|
245 # will always refer to a locally cached genome.
|
|
246 # The affy2vcf tool allows the user to select either a locally
|
|
247 # cached reference genome or a history item, but the workflow is
|
|
248 # defined to use a locally cached reference genome by default.
|
|
249 reference_genome_source_cond_dict = tool_inputs_dict['reference_genome_source_cond']
|
|
250 # The value of reference_genome_source_cond_dict['reference_genome_source']
|
|
251 # will always be 'cached'.
|
|
252 workflow_db_key = reference_genome_source_cond_dict['locally_cached_item']
|
|
253 if dbkey != workflow_db_key:
|
|
254 reference_genome_source_cond_dict['locally_cached_item'] = dbkey
|
|
255 parameter_updates = {}
|
|
256 parameter_updates[step_id] = reference_genome_source_cond_dict
|
|
257 outputfh.write("Updated step id %s with the following entry:\n%s\n" % (step_id, str(reference_genome_source_cond_dict)))
|
|
258 return parameter_updates
|
|
259
|
|
260
|
|
261 outputfh = open(args.output, "w")
|
|
262 config_defaults = get_config_settings(args.config_file)
|
|
263 user_api_key = open(args.api_key, 'r').read()
|
|
264 admin_api_key = get_value_from_config(config_defaults, 'ADMIN_API_KEY')
|
|
265 galaxy_base_url = get_value_from_config(config_defaults, 'GALAXY_BASE_URL')
|
|
266 gi = galaxy.GalaxyInstance(url=galaxy_base_url, key=user_api_key)
|
|
267 ags_dataset_name = get_value_from_config(config_defaults, 'ALL_GENOTYPED_SAMPLES_DATASET_NAME')
|
|
268 ags_library_name = get_value_from_config(config_defaults, 'ALL_GENOTYPED_SAMPLES_LIBRARY_NAME')
|
|
269 ags_storage_dir = get_value_from_config(config_defaults, 'ALL_GENOTYPED_SAMPLES_STORAGE_DIR')
|
|
270 coralsnp_workflow_name = get_value_from_config(config_defaults, 'CORALSNP_WORKFLOW_NAME')
|
|
271 es_workflow_name = get_value_from_config(config_defaults, 'ENSURE_SYNCED_WORKFLOW_NAME')
|
|
272 vam_workflow_name = get_value_from_config(config_defaults, 'VALIDATE_AFFY_METADATA_WORKFLOW_NAME')
|
|
273
|
|
274 affy_metadata_is_valid = False
|
|
275 datasets_have_queued = False
|
|
276 stag_database_updated = False
|
|
277 synced = False
|
|
278 lock = threading.Lock()
|
|
279 lock.acquire(True)
|
|
280 try:
|
|
281 # Get the current history datasets. At this point, the
|
|
282 # history will ideally contain only the datasets to be
|
|
283 # used as inputs to the 3 workflows, EnsureSynced,
|
|
284 # ValidateAffyMetadata and CoralSNP.
|
|
285 history_datasets = get_history_datasets(gi, args.history_id)
|
|
286
|
|
287 # Get the All Genotyped Samples data library.
|
|
288 ags_data_library_dict = get_data_library_dict(gi, ags_library_name, outputfh)
|
|
289 ags_library_id = ags_data_library_dict['id']
|
|
290 # Get the public all_genotyped_samples.vcf library dataset id.
|
|
291 ags_ldda_id = get_library_dataset_id_by_name(gi, ags_library_id, ags_dataset_name, outputfh)
|
|
292
|
|
293 # Import the public all_genotyped_samples dataset from
|
|
294 # the data library to the current history.
|
|
295 history_datasets = add_library_dataset_to_history(gi, args.history_id, ags_ldda_id, history_datasets, outputfh)
|
|
296 outputfh.write("\nSleeping for 5 seconds...\n")
|
|
297 time.sleep(5)
|
|
298
|
|
299 # Get the EnsureSynced workflow
|
|
300 es_workflow_id, es_workflow_dict = get_workflow(gi, es_workflow_name, outputfh)
|
|
301 outputfh.write("\nEnsureSynced workflow id: %s\n" % str(es_workflow_id))
|
|
302 # Map the history datasets to the input datasets for
|
|
303 # the EnsureSynced workflow.
|
|
304 es_workflow_input_datasets = get_workflow_input_datasets(gi, history_datasets, es_workflow_name, es_workflow_dict, outputfh)
|
|
305 # Start the EnsureSynced workflow.
|
|
306 start_workflow(gi, es_workflow_id, es_workflow_name, es_workflow_input_datasets, None, args.history_id, outputfh)
|
|
307 outputfh.write("\nSleeping for 15 seconds...\n")
|
|
308 time.sleep(15)
|
|
309 # Poll the history datasets, checking the statuses, and wait until
|
|
310 # the workflow is finished. The workflow itself simply schedules
|
|
311 # all of the jobs, so it cannot be checked for a state.
|
|
312 while True:
|
|
313 history_status_dict = get_history_status(gi, args.history_id)
|
|
314 sd_dict = history_status_dict['state_details']
|
|
315 outputfh.write("\nsd_dict: %s\n" % str(sd_dict))
|
|
316 # The queue_genotype_workflow tool will continue to be in a
|
|
317 # "running" state while inside this for loop, so we know that
|
|
318 # the workflow has completed if only 1 dataset has this state.
|
|
319 if sd_dict['running'] <= 1:
|
|
320 if sd_dict['error'] == 0:
|
|
321 # The all_genotyped_samples.vcf file is
|
|
322 # in sync with the stag database.
|
|
323 synced = True
|
|
324 break
|
|
325 outputfh.write("\nSleeping for 5 seconds...\n")
|
|
326 time.sleep(5)
|
|
327
|
|
328 if synced:
|
|
329 # Get the ValidateAffyMetadata workflow.
|
|
330 vam_workflow_id, vam_workflow_dict = get_workflow(gi, vam_workflow_name, outputfh)
|
|
331 outputfh.write("\nValidateAffyMetadata workflow id: %s\n" % str(vam_workflow_id))
|
|
332 # Map the history datasets to the input datasets for
|
|
333 # the ValidateAffyMetadata workflow.
|
|
334 vam_workflow_input_datasets = get_workflow_input_datasets(gi, history_datasets, vam_workflow_name, vam_workflow_dict, outputfh)
|
|
335 # Start the ValidateAffyMetadata workflow.
|
|
336 start_workflow(gi, vam_workflow_id, vam_workflow_name, vam_workflow_input_datasets, None, args.history_id, outputfh)
|
|
337 outputfh.write("\nSleeping for 15 seconds...\n")
|
|
338 time.sleep(15)
|
|
339 # Poll the history datasets, checking the statuses, and wait until
|
|
340 # the workflow is finished.
|
|
341 while True:
|
|
342 history_status_dict = get_history_status(gi, args.history_id)
|
|
343 sd_dict = history_status_dict['state_details']
|
|
344 outputfh.write("\nsd_dict: %s\n" % str(sd_dict))
|
|
345 # The queue_genotype_workflow tool will continue to be in a
|
|
346 # "running" state while inside this for loop, so we know that
|
|
347 # the workflow has completed if only 1 dataset has this state.
|
|
348 if sd_dict['running'] <= 1:
|
|
349 if sd_dict['error'] == 0:
|
|
350 # The metadata is valid.
|
|
351 affy_metadata_is_valid = True
|
|
352 break
|
|
353 outputfh.write("\nSleeping for 5 seconds...\n")
|
|
354 time.sleep(5)
|
|
355 else:
|
|
356 outputfh.write("\nProcessing ended in error...\n")
|
|
357 outputfh.close()
|
|
358 lock.release()
|
|
359 sys.exit(1)
|
|
360
|
|
361 if affy_metadata_is_valid:
|
|
362 # Get the CoralSNP workflow.
|
|
363 coralsnp_workflow_id, coralsnp_workflow_dict = get_workflow(gi, coralsnp_workflow_name, outputfh)
|
|
364 outputfh.write("\nCoralSNP workflow id: %s\n" % str(coralsnp_workflow_id))
|
|
365 # Map the history datasets to the input datasets for
|
|
366 # the CoralSNP workflow.
|
|
367 coralsnp_workflow_input_datasets = get_workflow_input_datasets(gi, history_datasets, coralsnp_workflow_name, coralsnp_workflow_dict, outputfh)
|
|
368 outputfh.write("\nCoralSNP workflow input datasets: %s\n" % str(coralsnp_workflow_input_datasets))
|
|
369 # Get the CoralSNP workflow params that could be updated.
|
|
370 coralsnp_params = update_workflow_params(coralsnp_workflow_dict, args.dbkey, outputfh)
|
|
371 outputfh.write("\nCoralSNP params: %s\n" % str(coralsnp_params))
|
|
372 # Start the CoralSNP workflow.
|
|
373 start_workflow(gi, coralsnp_workflow_id, coralsnp_workflow_name, coralsnp_workflow_input_datasets, coralsnp_params, args.history_id, outputfh)
|
|
374 outputfh.write("\nSleeping for 15 seconds...\n")
|
|
375 time.sleep(15)
|
|
376 # Poll the history datasets, checking the statuses, and wait until
|
|
377 # the workflow is finished. The workflow itself simply schedules
|
|
378 # all of the jobs, so it cannot be checked for a state.
|
|
379 while True:
|
|
380 history_status_dict = get_history_status(gi, args.history_id)
|
|
381 sd_dict = history_status_dict['state_details']
|
|
382 outputfh.write("\nsd_dict: %s\n" % str(sd_dict))
|
|
383 # The queue_genotype_workflow tool will continue to be in a
|
|
384 # "running" state while inside this for loop, so we know that
|
|
385 # the workflow has completed if no datasets are in the "new" or
|
|
386 # "queued" state and there is only 1 dataset in the "running"
|
|
387 # state. We cannot filter on datasets in the "paused" state
|
|
388 # because any datasets downstream from one in an "error" state
|
|
389 # will automatically be given a "paused" state. Of course, we'll
|
|
390 # always break if any datasets are in the "error" state. At
|
|
391 # least one dataset must have reached the "queued" state before
|
|
392 # the workflow is complete.
|
|
393 if not datasets_have_queued:
|
|
394 if sd_dict['queued'] > 0:
|
|
395 datasets_have_queued = True
|
|
396 if sd_dict['error'] != 0:
|
|
397 break
|
|
398 if datasets_have_queued and sd_dict['queued'] == 0 and sd_dict['new'] == 0 and sd_dict['running'] <= 1:
|
|
399 # The stag database has been updated.
|
|
400 stag_database_updated = True
|
|
401 break
|
|
402 outputfh.write("\nSleeping for 5 seconds...\n")
|
|
403 time.sleep(5)
|
|
404 if stag_database_updated:
|
|
405 # Get the id of the "bcftools merge" dataset in the current history.
|
|
406 bcftools_merge_dataset_id = get_history_dataset_id_by_name(gi, args.history_id, "bcftools merge", outputfh)
|
|
407 # Create a new dataset in the All Genotyped Samples data library by
|
|
408 # importing the "bcftools merge" dataset from the current history.
|
|
409 # We'll do this as the coraldmin user.
|
|
410 admin_gi = galaxy.GalaxyInstance(url=galaxy_base_url, key=admin_api_key)
|
|
411 new_ags_dataset_dict = copy_history_dataset_to_library(admin_gi, ags_library_id, bcftools_merge_dataset_id, outputfh)
|
|
412 # Rename the ldda to be all_genotyped_samples.vcf.
|
|
413 new_ags_ldda_id = new_ags_dataset_dict['id']
|
|
414 renamed_ags_dataset_dict = rename_library_dataset(admin_gi, new_ags_ldda_id, ags_dataset_name, outputfh)
|
|
415 # Get the full path of the all_genotyped_samples.vcf library dataset.
|
|
416 ags_ldda_file_path = get_library_dataset_file_path(gi, ags_library_id, ags_ldda_id, outputfh)
|
|
417 # Copy the all_genotyped_samples.vcf dataset to storage. We
|
|
418 # will only keep a single copy of this file since this tool
|
|
419 # will end in an error before the CoralSNP workflow is started
|
|
420 # if the all_genotyped_samples.vcf file is not sync'd with the
|
|
421 # stag database.
|
|
422 copy_dataset_to_storage(ags_ldda_file_path, ags_storage_dir, ags_dataset_name, outputfh)
|
|
423 # Delete the original all_genotyped_samples library dataset.
|
|
424 deleted_dataset_dict = delete_library_dataset(admin_gi, ags_library_id, ags_ldda_id, outputfh)
|
|
425 # To save disk space, delete the all_genotyped_samples hda
|
|
426 # in the current history to enable later purging by an admin.
|
|
427 ags_hda_id = get_history_dataset_id_by_name(gi, args.history_id, "all_genotyped_samples", outputfh)
|
|
428 delete_history_dataset(gi, args.history_id, ags_hda_id, outputfh)
|
|
429 else:
|
|
430 outputfh.write("\nProcessing ended in error...\n")
|
|
431 outputfh.close()
|
|
432 lock.release()
|
|
433 sys.exit(1)
|
|
434 except Exception as e:
|
|
435 outputfh.write("Exception preparing or executing either the ValidateAffyMetadata workflow or the CoralSNP workflow:\n%s\n" % str(e))
|
|
436 outputfh.write("\nProcessing ended in error...\n")
|
|
437 outputfh.close()
|
|
438 lock.release()
|
|
439 sys.exit(1)
|
|
440 finally:
|
|
441 lock.release()
|
|
442
|
|
443 outputfh.write("\nFinished processing...\n")
|
|
444 outputfh.close()
|