Mercurial > repos > greg > queue_genotype_workflow

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Thu Aug 15 11:17:33 2019 -0400
@@ -0,0 +1,11 @@
+name: queue_genotype_workflow
+owner: greg
+description: |
+  Contains a tool that uses the Galaxy API to execute the complete multilocus genotype pipeline for corals or symbiants.
+homepage_url: http://baumslab.org
+long_description: |
+  Contains a tool that uses the Galaxy API to execute the complete multilocus genotype pipeline for corals or symbiants.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/corals/queue_genotype_workflow
+type: unrestricted
+categories:
+  - Micro-array Analysis
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample	Thu Aug 15 11:17:33 2019 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qgw_config.ini.sample	Thu Aug 15 11:17:33 2019 -0400
@@ -0,0 +1,15 @@
+# Configuration file for the queue_genotype_workflow tool.
+
+[defaults]
+
+# This section contains default settings for command line parameters that
+# can be overridden when they are passed to executed scripts.
+
+ADMIN_API_KEY = 89erituieru83ihefi3838928358kefdk9
+ALL_GENOTYPED_SAMPLES_STORAGE_DIR = /tmp/all_genotyped_samples
+ALL_GENOTYPED_SAMPLES_DATASET_NAME = all_genotyped_samples.vcf
+ALL_GENOTYPED_SAMPLES_LIBRARY_NAME = All Genotyped Samples
+CORALSNP_WORKFLOW_NAME = CoralSNP
+ENSURE_SYNCED_WORKFLOW_NAME = EnsureSynced
+GALAXY_BASE_URL = http://localhost:8763
+VALIDATE_AFFY_METADATA_WORKFLOW_NAME = ValidateAffyMetadata
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/queue_genotype_workflow.py	Thu Aug 15 11:17:33 2019 -0400
@@ -0,0 +1,444 @@
+#!/usr/bin/env python
+import argparse
+import os
+import shutil
+import string
+import sys
+import threading
+import time
+
+from bioblend import galaxy
+from six.moves import configparser
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--affy_metadata', dest='affy_metadata', help='Input Affymetrix 96 well plate metadata file')
+parser.add_argument('--annot', dest='annot', help='Probeset annotation file')
+parser.add_argument('--api_key', dest='api_key', help='Current user API key')
+parser.add_argument('--calls', dest='calls', help='Apt-probeset genotype calls file')
+parser.add_argument('--confidences', dest='confidences', help='Apt-probeset genotype confidences file')
+parser.add_argument('--config_file', dest='config_file', help='qgw_config.ini')
+parser.add_argument('--dbkey', dest='dbkey', help='Reference genome dbkey')
+parser.add_argument('--reference_genome', dest='reference_genome', help='Reference genome')
+parser.add_argument('--history_id', dest='history_id', help='Encoded id of current history')
+parser.add_argument('--output', dest='output', help='Output dataset')
+parser.add_argument('--report', dest='report', help='Apt-probeset genotype report file')
+parser.add_argument('--sample_attributes', dest='sample_attributes', help='Sample attributes tabular file')
+parser.add_argument('--snp-posteriors', dest='snp-posteriors', help='Apt-probeset genotype snp-posteriors file')
+parser.add_argument('--summary', dest='summary', help='Apt-probeset genotype summary file')
+args = parser.parse_args()
+
+
+def add_library_dataset_to_history(gi, history_id, dataset_id, history_datasets, outputfh):
+    # Add a data library dataset to a history.
+    outputfh.write('\nImporting dataset into current history.\n')
+    new_hda_dict = gi.histories.upload_dataset_from_library(history_id, dataset_id)
+    new_hda_name = new_hda_dict['name']
+    history_datasets[new_hda_name] = new_hda_dict
+    return history_datasets
+
+
+def copy_history_dataset_to_library(gi, library_id, dataset_id, outputfh):
+    # Copy a history dataset to a data library.
+    outputfh.write('\nCopying history dataset with id %s to data library with id %s.\n' % (str(dataset_id), str(library_id)))
+    new_library_dataset_dict = gi.libraries.copy_from_dataset(library_id, dataset_id)
+    return new_library_dataset_dict
+
+
+def copy_dataset_to_storage(src_path, dst_base_path, dataset_name, output_fh):
+    # Copy a dataset via its file path to a storage directory on disk.
+    if not os.path.isdir(dst_base_path):
+        os.makedirs(dst_base_path)
+    dst_path = os.path.join(dst_base_path, dataset_name)
+    shutil.copyfile(src_path, dst_path)
+    outputfh.write("Copied %s to storage.\n" % dataset_name)
+
+
+def delete_history_dataset(gi, history_id, dataset_id, outputfh, purge=False):
+    # Delete a history dataset.
+    outputfh.write("\nDeleting history dataset with id %s.\n" % dataset_id)
+    gi.histories.delete_dataset(history_id, dataset_id, purge=purge)
+
+
+def delete_library_dataset(gi, library_id, dataset_id, outputfh, purged=False):
+    # Delete a library dataset.
+    outputfh.write("\nDeleting library dataset with id %s.\n" % dataset_id)
+    deleted_dataset_dict = gi.libraries.delete_library_dataset(library_id, dataset_id, purged=purged)
+    return deleted_dataset_dict
+
+
+def get_config_settings(config_file, section='defaults'):
+    # Return a dictionary consisting of the key / value pairs
+    # of the defaults section of config_file.
+    d = {}
+    config_parser = configparser.ConfigParser()
+    config_parser.read(config_file)
+    for key, value in config_parser.items(section):
+        if section == 'defaults':
+            d[string.upper(key)] = value
+        else:
+            d[key] = value
+    return d
+
+
+def get_data_library_dict(gi, name, outputfh):
+    # Use the Galaxy API to get the data library named name.
+    outputfh.write("\nSearching for data library named %s.\n" % name)
+    # The following is not correctly filtering out deleted libraries.
+    data_lib_dicts = gi.libraries.get_libraries(library_id=None, name=name, deleted=False)
+    for data_lib_dict in data_lib_dicts:
+        if data_lib_dict['name'] == name and data_lib_dict['deleted'] not in [True, 'true', 'True']:
+            outputfh.write("Found data library named %s.\n" % name)
+            outputfh.write("%s\n" % str(data_lib_dict))
+            return data_lib_dict
+    return None
+
+
+def get_history_status(gi, history_id):
+    return gi.histories.get_status(history_id)
+
+
+def get_history_dataset_id_by_name(gi, history_id, dataset_name, outputfh):
+    # Use the Galaxy API to get the bcftools merge dataset id
+    # from the current history.
+    outputfh.write("\nSearching for history dataset named %s.\n" % str(dataset_name))
+    history_dataset_dicts = get_history_datasets(gi, history_id)
+    for name, hd_dict in history_dataset_dicts.items():
+        name = name.lower()
+        if name.startswith(dataset_name.lower()):
+            outputfh.write("Found dataset named %s.\n" % str(dataset_name))
+            return hd_dict['id']
+    return None
+
+
+def get_history_datasets(gi, history_id):
+    history_datasets = {}
+    history_dict = gi.histories.show_history(history_id, contents=True, deleted='false', details=None)
+    for contents_dict in history_dict:
+        if contents_dict['history_content_type'] == 'dataset':
+            dataset_name = contents_dict['name']
+            # Don't include the "Queue genotype workflow" dataset.
+            if dataset_name.startswith("Queue genotype workflow"):
+                continue
+            history_datasets[dataset_name] = contents_dict
+    return history_datasets
+
+
+def get_library_dataset_file_path(gi, library_id, dataset_id, outputfh):
+    dataset_dict = gi.libraries.show_dataset(library_id, dataset_id)
+    outputfh.write("\nReturning file path of library dataset.\n")
+    return dataset_dict.get('file_name', None)
+
+
+def get_library_dataset_id_by_name(gi, data_lib_id, dataset_name, outputfh):
+    # Use the Galaxy API to get the all_genotyped_samples.vcf dataset id.
+    # We're assuming it is in the root folder.
+    outputfh.write("\nSearching for library dataset named %s.\n" % str(dataset_name))
+    lib_item_dicts = gi.libraries.show_library(data_lib_id, contents=True)
+    for lib_item_dict in lib_item_dicts:
+        if lib_item_dict['type'] == 'file':
+            dataset_name = lib_item_dict['name'].lstrip('/').lower()
+            if dataset_name.startswith(dataset_name):
+                outputfh.write("Found dataset named %s.\n" % str(dataset_name))
+                return lib_item_dict['id']
+    return None
+
+
+def get_value_from_config(config_defaults, value):
+    return config_defaults.get(value, None)
+
+
+def get_workflow(gi, name, outputfh, galaxy_base_url=None, api_key=None):
+    outputfh.write("\nSearching for workflow named %s\n" % name)
+    workflow_info_dicts = gi.workflows.get_workflows(name=name, published=True)
+    if len(workflow_info_dicts) == 0:
+        return None, None
+    wf_info_dict = workflow_info_dicts[0]
+    workflow_id = wf_info_dict['id']
+    # Get the complete workflow.
+    workflow_dict = gi.workflows.show_workflow(workflow_id)
+    outputfh.write("Found workflow named %s.\n" % name)
+    return workflow_id, workflow_dict
+
+
+def get_workflow_input_datasets(gi, history_datasets, workflow_name, workflow_dict, outputfh):
+    # Map the history datasets to the input datasets for the workflow.
+    workflow_inputs = {}
+    outputfh.write("\nMapping datasets from history to workflow %s.\n" % workflow_name)
+    steps_dict = workflow_dict.get('steps', None)
+    if steps_dict is not None:
+        for step_index, step_dict in steps_dict.items():
+            # Dicts that define dataset inputs for a workflow
+            # look like this.
+            # "0": {
+            #      "tool_id": null,
+            #      "tool_version": null,
+            #      "id": 0,
+            #      "input_steps": {},
+            #      "tool_inputs": {},
+            #      "type": "data_input",
+            #      "annotation": null
+            # },
+            tool_id = step_dict.get('tool_id', None)
+            tool_type = step_dict.get('type', None)
+            # This requires the workflow input dataset annotation to be a
+            # string # (e.g., report) that enables it to be appropriatey
+            # matched to a dataset (e.g., axiongt1_report.txt).
+            # 1. affy_metadata.tabular - must have the word "metadata" in
+            #                            the file name.
+            # 2. sample_attributes.tabular - must have the word "attributes"
+            #                                in the file name.
+            # 3. probeset_annotation.csv - must have the word "annotation" in
+            #                              the file name.
+            # 4. <summary file>.txt - must have the the word "summary" in the
+            #                         file name.
+            # 5. <snp-posteriors file>.txt - must have the the word
+            #                                "snp-posteriors" in the file name.
+            # 6. <report file>.txt - must have the the word "report" in the
+            #                        file name.
+            # 7. <confidences file>.txt - must have the the word "confidences"
+            #                             in the file name.
+            # 8. <calls file>.txt - must have the the word "calls" in the
+            #                       file name.
+            # 9. all_genotyped_samples.vcf - must have "all_genotyped_samples"
+            #                                in the file name.
+            annotation = step_dict.get('annotation', None)
+            if tool_id is None and tool_type == 'data_input' and annotation is not None:
+                annotation_check = annotation.lower()
+                # inputs is a list and workflow input datasets
+                # have no inputs.
+                for input_hda_name, input_hda_dict in history_datasets.items():
+                    input_hda_name_check = input_hda_name.lower()
+                    if input_hda_name_check.find(annotation_check) >= 0:
+                        workflow_inputs[step_index] = {'src': 'hda', 'id': input_hda_dict['id']}
+                        outputfh.write(" - Mapped dataset %s from history to workflow input dataset with annotation %s.\n" % (input_hda_name, annotation))
+                        break
+    return workflow_inputs
+
+
+def start_workflow(gi, workflow_id, workflow_name, inputs, params, history_id, outputfh):
+    outputfh.write("\nExecuting workflow %s.\n" % workflow_name)
+    workflow_invocation_dict = gi.workflows.invoke_workflow(workflow_id, inputs=inputs, params=params, history_id=history_id)
+    outputfh.write("Response from executing workflow %s:\n" % workflow_name)
+    outputfh.write("%s\n" % str(workflow_invocation_dict))
+
+
+def rename_library_dataset(gi, dataset_id, name, outputfh):
+    outputfh.write("\nRenaming library dataset with id %s to be named %s.\n" % (str(dataset_id), str(name)))
+    library_dataset_dict = gi.libraries.update_library_dataset(dataset_id, name=name)
+    return library_dataset_dict
+
+
+def update_workflow_params(workflow_dict, dbkey, outputfh):
+    parameter_updates = None
+    name = workflow_dict['name']
+    outputfh.write("\nChecking for tool parameter updates for workflow %s using dbkey %s.\n" % (name, dbkey))
+    step_dicts = workflow_dict.get('steps', None)
+    for step_id, step_dict in step_dicts.items():
+        tool_id = step_dict['tool_id']
+        if tool_id is None:
+            continue
+        # Handle reference_source entries
+        if tool_id.find('affy2vcf') > 0:
+            tool_inputs_dict = step_dict['tool_inputs']
+            # The queue_genotype_workflow tool provides a selection of only
+            # a locally cached reference genome (not a history item), so dbkey
+            # will always refer to a locally cached genome.
+            # The affy2vcf tool allows the user to select either a locally
+            # cached reference genome or a history item, but the workflow is
+            # defined to use a locally cached reference genome by default.
+            reference_genome_source_cond_dict = tool_inputs_dict['reference_genome_source_cond']
+            # The value of reference_genome_source_cond_dict['reference_genome_source']
+            # will always be 'cached'.
+            workflow_db_key = reference_genome_source_cond_dict['locally_cached_item']
+            if dbkey != workflow_db_key:
+                reference_genome_source_cond_dict['locally_cached_item'] = dbkey
+                parameter_updates = {}
+                parameter_updates[step_id] = reference_genome_source_cond_dict
+                outputfh.write("Updated step id %s with the following entry:\n%s\n" % (step_id, str(reference_genome_source_cond_dict)))
+    return parameter_updates
+
+
+outputfh = open(args.output, "w")
+config_defaults = get_config_settings(args.config_file)
+user_api_key = open(args.api_key, 'r').read()
+admin_api_key = get_value_from_config(config_defaults, 'ADMIN_API_KEY')
+galaxy_base_url = get_value_from_config(config_defaults, 'GALAXY_BASE_URL')
+gi = galaxy.GalaxyInstance(url=galaxy_base_url, key=user_api_key)
+ags_dataset_name = get_value_from_config(config_defaults, 'ALL_GENOTYPED_SAMPLES_DATASET_NAME')
+ags_library_name = get_value_from_config(config_defaults, 'ALL_GENOTYPED_SAMPLES_LIBRARY_NAME')
+ags_storage_dir = get_value_from_config(config_defaults, 'ALL_GENOTYPED_SAMPLES_STORAGE_DIR')
+coralsnp_workflow_name = get_value_from_config(config_defaults, 'CORALSNP_WORKFLOW_NAME')
+es_workflow_name = get_value_from_config(config_defaults, 'ENSURE_SYNCED_WORKFLOW_NAME')
+vam_workflow_name = get_value_from_config(config_defaults, 'VALIDATE_AFFY_METADATA_WORKFLOW_NAME')
+
+affy_metadata_is_valid = False
+datasets_have_queued = False
+stag_database_updated = False
+synced = False
+lock = threading.Lock()
+lock.acquire(True)
+try:
+    # Get the current history datasets.  At this point, the
+    # history will ideally contain only the datasets to be
+    # used as inputs to the 3 workflows, EnsureSynced,
+    # ValidateAffyMetadata and CoralSNP.
+    history_datasets = get_history_datasets(gi, args.history_id)
+
+    # Get the All Genotyped Samples data library.
+    ags_data_library_dict = get_data_library_dict(gi, ags_library_name, outputfh)
+    ags_library_id = ags_data_library_dict['id']
+    # Get the public all_genotyped_samples.vcf library dataset id.
+    ags_ldda_id = get_library_dataset_id_by_name(gi, ags_library_id, ags_dataset_name, outputfh)
+
+    # Import the public all_genotyped_samples dataset from
+    # the data library to the current history.
+    history_datasets = add_library_dataset_to_history(gi, args.history_id, ags_ldda_id, history_datasets, outputfh)
+    outputfh.write("\nSleeping for 5 seconds...\n")
+    time.sleep(5)
+
+    # Get the EnsureSynced workflow
+    es_workflow_id, es_workflow_dict = get_workflow(gi, es_workflow_name, outputfh)
+    outputfh.write("\nEnsureSynced workflow id: %s\n" % str(es_workflow_id))
+    # Map the history datasets to the input datasets for
+    # the EnsureSynced workflow.
+    es_workflow_input_datasets = get_workflow_input_datasets(gi, history_datasets, es_workflow_name, es_workflow_dict, outputfh)
+    # Start the EnsureSynced workflow.
+    start_workflow(gi, es_workflow_id, es_workflow_name, es_workflow_input_datasets, None, args.history_id, outputfh)
+    outputfh.write("\nSleeping for 15 seconds...\n")
+    time.sleep(15)
+    # Poll the history datasets, checking the statuses, and wait until
+    # the workflow is finished.  The workflow itself simply schedules
+    # all of the jobs, so it cannot be checked for a state.
+    while True:
+        history_status_dict = get_history_status(gi, args.history_id)
+        sd_dict = history_status_dict['state_details']
+        outputfh.write("\nsd_dict: %s\n" % str(sd_dict))
+        # The queue_genotype_workflow tool will continue to be in a
+        # "running" state while inside this for loop, so  we know that
+        # the workflow has completed if only 1 dataset has this state.
+        if sd_dict['running'] <= 1:
+            if sd_dict['error'] == 0:
+                # The all_genotyped_samples.vcf file is
+                # in sync with the stag database.
+                synced = True
+                break
+        outputfh.write("\nSleeping for 5 seconds...\n")
+        time.sleep(5)
+
+    if synced:
+        # Get the ValidateAffyMetadata workflow.
+        vam_workflow_id, vam_workflow_dict = get_workflow(gi, vam_workflow_name, outputfh)
+        outputfh.write("\nValidateAffyMetadata workflow id: %s\n" % str(vam_workflow_id))
+        # Map the history datasets to the input datasets for
+        # the ValidateAffyMetadata workflow.
+        vam_workflow_input_datasets = get_workflow_input_datasets(gi, history_datasets, vam_workflow_name, vam_workflow_dict, outputfh)
+        # Start the ValidateAffyMetadata workflow.
+        start_workflow(gi, vam_workflow_id, vam_workflow_name, vam_workflow_input_datasets, None, args.history_id, outputfh)
+        outputfh.write("\nSleeping for 15 seconds...\n")
+        time.sleep(15)
+        # Poll the history datasets, checking the statuses, and wait until
+        # the workflow is finished.
+        while True:
+            history_status_dict = get_history_status(gi, args.history_id)
+            sd_dict = history_status_dict['state_details']
+            outputfh.write("\nsd_dict: %s\n" % str(sd_dict))
+            # The queue_genotype_workflow tool will continue to be in a
+            # "running" state while inside this for loop, so  we know that
+            # the workflow has completed if only 1 dataset has this state.
+            if sd_dict['running'] <= 1:
+                if sd_dict['error'] == 0:
+                    # The metadata is valid.
+                    affy_metadata_is_valid = True
+                    break
+            outputfh.write("\nSleeping for 5 seconds...\n")
+            time.sleep(5)
+    else:
+        outputfh.write("\nProcessing ended in error...\n")
+        outputfh.close()
+        lock.release()
+        sys.exit(1)
+
+    if affy_metadata_is_valid:
+        # Get the CoralSNP workflow.
+        coralsnp_workflow_id, coralsnp_workflow_dict = get_workflow(gi, coralsnp_workflow_name, outputfh)
+        outputfh.write("\nCoralSNP workflow id: %s\n" % str(coralsnp_workflow_id))
+        # Map the history datasets to the input datasets for
+        # the CoralSNP workflow.
+        coralsnp_workflow_input_datasets = get_workflow_input_datasets(gi, history_datasets, coralsnp_workflow_name, coralsnp_workflow_dict, outputfh)
+        outputfh.write("\nCoralSNP workflow input datasets: %s\n" % str(coralsnp_workflow_input_datasets))
+        # Get the CoralSNP workflow params that could be updated.
+        coralsnp_params = update_workflow_params(coralsnp_workflow_dict, args.dbkey, outputfh)
+        outputfh.write("\nCoralSNP params: %s\n" % str(coralsnp_params))
+        # Start the CoralSNP workflow.
+        start_workflow(gi, coralsnp_workflow_id, coralsnp_workflow_name, coralsnp_workflow_input_datasets, coralsnp_params, args.history_id, outputfh)
+        outputfh.write("\nSleeping for 15 seconds...\n")
+        time.sleep(15)
+        # Poll the history datasets, checking the statuses, and wait until
+        # the workflow is finished.  The workflow itself simply schedules
+        # all of the jobs, so it cannot be checked for a state.
+        while True:
+            history_status_dict = get_history_status(gi, args.history_id)
+            sd_dict = history_status_dict['state_details']
+            outputfh.write("\nsd_dict: %s\n" % str(sd_dict))
+            # The queue_genotype_workflow tool will continue to be in a
+            # "running" state while inside this for loop, so  we know that
+            # the workflow has completed if no datasets are in the "new" or
+            # "queued" state and there is only 1 dataset in the "running"
+            # state.  We cannot filter on datasets in the "paused" state
+            # because any datasets downstream from one in an "error" state
+            # will automatically be given a "paused" state. Of course, we'll
+            # always break if any datasets are in the "error" state.  At
+            # least one dataset must have reached the "queued" state before
+            # the workflow is complete.
+            if not datasets_have_queued:
+                if sd_dict['queued'] > 0:
+                    datasets_have_queued = True
+            if sd_dict['error'] != 0:
+                break
+            if datasets_have_queued and sd_dict['queued'] == 0 and sd_dict['new'] == 0 and sd_dict['running'] <= 1:
+                # The stag database has been updated.
+                stag_database_updated = True
+                break
+            outputfh.write("\nSleeping for 5 seconds...\n")
+            time.sleep(5)
+        if stag_database_updated:
+            # Get the id of the "bcftools merge" dataset in the current history.
+            bcftools_merge_dataset_id = get_history_dataset_id_by_name(gi, args.history_id, "bcftools merge", outputfh)
+            # Create a new dataset in the All Genotyped Samples data library by
+            # importing the "bcftools merge" dataset from the current history.
+            # We'll do this as the coraldmin user.
+            admin_gi = galaxy.GalaxyInstance(url=galaxy_base_url, key=admin_api_key)
+            new_ags_dataset_dict = copy_history_dataset_to_library(admin_gi, ags_library_id, bcftools_merge_dataset_id, outputfh)
+            # Rename the ldda to be all_genotyped_samples.vcf.
+            new_ags_ldda_id = new_ags_dataset_dict['id']
+            renamed_ags_dataset_dict = rename_library_dataset(admin_gi, new_ags_ldda_id, ags_dataset_name, outputfh)
+            # Get the full path of the all_genotyped_samples.vcf library dataset.
+            ags_ldda_file_path = get_library_dataset_file_path(gi, ags_library_id, ags_ldda_id, outputfh)
+            # Copy the all_genotyped_samples.vcf dataset to storage.  We
+            # will only keep a single copy of this file since this tool
+            # will end in an error before the CoralSNP workflow is started
+            # if the all_genotyped_samples.vcf file is not sync'd with the
+            # stag database.
+            copy_dataset_to_storage(ags_ldda_file_path, ags_storage_dir, ags_dataset_name, outputfh)
+            # Delete the original all_genotyped_samples library dataset.
+            deleted_dataset_dict = delete_library_dataset(admin_gi, ags_library_id, ags_ldda_id, outputfh)
+            # To save disk space, delete the all_genotyped_samples hda
+            # in the current history to enable later purging by an admin.
+            ags_hda_id = get_history_dataset_id_by_name(gi, args.history_id, "all_genotyped_samples", outputfh)
+            delete_history_dataset(gi, args.history_id, ags_hda_id, outputfh)
+    else:
+        outputfh.write("\nProcessing ended in error...\n")
+        outputfh.close()
+        lock.release()
+        sys.exit(1)
+except Exception as e:
+    outputfh.write("Exception preparing or executing either the ValidateAffyMetadata workflow or the CoralSNP workflow:\n%s\n" % str(e))
+    outputfh.write("\nProcessing ended in error...\n")
+    outputfh.close()
+    lock.release()
+    sys.exit(1)
+finally:
+    lock.release()
+
+outputfh.write("\nFinished processing...\n")
+outputfh.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/queue_genotype_workflow.xml	Thu Aug 15 11:17:33 2019 -0400
@@ -0,0 +1,70 @@
+<tool id="queue_genotype_workflow" name="Queue genotype workflow" version="1.0.0">
+    <description></description>
+    <command detect_errors="exit_code"><![CDATA[
+#set history_id = $__app__.security.encode_id($affy_metadata.history.id)
+python $__tool_directory__/queue_genotype_workflow.py
+--affy_metadata '$affy_metadata'
+--annot '$annot'
+--api_key $get_user_api_key
+--calls '$calls'
+--confidences '$confidences'
+--config_file $__tool_directory__/qgw_config.ini
+--history_id $history_id
+--reference_genome '$locally_cached_item.fields.path'
+--dbkey '$locally_cached_item.fields.value'
+--report '$report'
+--sample_attributes '$sample_attributes'
+--snp-posteriors '$snp_posteriors'
+--summary '$summary'
+--output '$output']]></command>
+    <configfiles>
+        <configfile name="get_user_api_key"><![CDATA[#from galaxy.managers import api_keys#${api_keys.ApiKeyManager( $__app__ ).get_or_create_api_key($__user__)} ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name="affy_metadata" type="data" format="tabular" label="Affymetrix 96 well plate metadata file" help="The word 'metadata' must be in the file name.">
+            <validator type="expression" message="96 well plate data must have 32 columns and no more than 96 lines of data"><![CDATA[value is not None and value.metadata.columns==32 and value.metadata.data_lines<=96]]></validator>
+        </param>
+        <param name="sample_attributes" type="data" format="tabular" label="Sample attributes file" help="The word 'attributes' must be in the file name."/>
+        <param name="annot" type="data" format="csv" label="Probeset annotation file" help="The word 'annotation' must be in the file name."/>
+        <param name="summary" type="data" format="txt" label="Apt-probeset genotype summary file" help="The word 'summary' must be in the file name."/>
+        <param name="snp_posteriors" type="data" format="txt" label="Apt-probeset genotype snp-posteriors file" help="The word 'snp-posteriors' must be in the file name."/>
+        <param name="report" type="data" format="txt" label="Apt-probeset genotype report file" help="The word 'report' must be in the file name."/>
+        <param name="confidences" type="data" format="txt" label="Apt-probeset genotype confidences file" help="The word 'confidences' must be in the file name."/>
+        <param name="calls" type="data" format="txt" label="Apt-probeset genotype calls file" help="The word 'calls' must be in the file name."/>
+        <param name="locally_cached_item" type="select" format="fasta" label="Fasta reference sequence">
+            <options from_data_table="all_fasta">
+                <column name="name" index="2"/>
+                <column name="value" index="0"/>
+                <column name="path" index="3"/>
+                <filter type="sort_by" column="1"/>
+                <validator type="no_options" message="No cached Fasta genome references are available for the build associated with the selected probeset annotation file." />
+            </options>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" label="${tool.name} execution log" format="txt" />
+    </outputs>
+    <tests>
+        <test>
+            <!--Testing this tool is a bit difficult at the current time.-->
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Uses the Galaxy/Bioblend API to execute the complete multilocus genotype analysis pipeline for corals or symbiants.
+This tool must be able to access the corals (stag) database.
+
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {Baums I},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {http://baumslab.org}
+        </citation>
+    </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Aug 15 11:17:33 2019 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>