Mercurial > repos > brenninc > data_manager_for_directory_data
changeset 0:43083927dfc5 draft default tip
Uploaded
author | brenninc |
---|---|
date | Sat, 07 May 2016 16:59:47 -0400 |
parents | |
children | |
files | data_manager/directory_data_manager.py data_manager/directory_data_manager.xml data_manager_conf.xml tool-data/directory_data.loc.sample tool_data_table_conf.xml.sample |
diffstat | 5 files changed, 220 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/directory_data_manager.py Sat May 07 16:59:47 2016 -0400 @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +import gzip +import json +import optparse # using optparse as hydra still python 2.6 +import os.path +import shutil + +def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): + data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) + data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) + data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) + return data_manager_dict + + +def get_param(name, params, default=None, check_tab=True): + value = params.get(name) + print name, value + return check_param(name, value, default=default, check_tab=check_tab) + + +def check_param(name, value, default=None, check_tab=True): + if value in [ None, '', '?' ]: + if default: + print "Using {0} for {1} as no value provided".format( default, name ) + value = default + else: + raise Exception( '{0} is not a valid {1}. You must specify a valid {1}.'.format( value, name ) ) + if check_tab and "\t" in value: + raise Exception( '{0} is not a valid {1}. It may not contain a tab because these are used as seperators by galaxy .'.format( value, name ) ) + return value + +def check_extension(extension): + extension = extension.strip() + if extension[0] == ".": + extension = extension[1:] + return extension + + +def check_path(path, original_extension): + files = os.listdir(path) + check = "." + original_extension + for a_file in files: + if a_file.endswith(check): + return True + raise Exception( 'path {0} does not contain any files ending with {1}'.format( path, check ) ) + + +def main(): + + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '--data_table_name', action='store', type="string", default=None, help='path' ) + parser.add_option( '--json_output_file', action='store', type="string", default=None, help='path' ) + (options, args) = parser.parse_args() + + data_table_name = check_param("data_table_name", options.data_table_name) + json_output_file = check_param("json_output_file", options.json_output_file, check_tab=False) + + param_dict = json.loads( open( json_output_file ).read() ) + params = param_dict.get("param_dict") + print "input params:" + print params + + data_table_entry = {} + data_table_entry["original_extension"] = check_extension(get_param("original_extension", params)) + data_table_entry["galaxy_extension"] = check_extension(get_param("galaxy_extension", params)) + data_table_entry["decompress"] = get_param("decompress", params) + if not (data_table_entry["decompress"] in ["No","Yes"]): + raise Exception( "Only legal values for dcompress are No and Yes." ) + data_table_entry["path"] = get_param("path", params) + check_path(data_table_entry["path"], data_table_entry["original_extension"]) + + basename = os.path.basename(data_table_entry["path"]) + filename = os.path.splitext(basename)[0] + data_table_entry["name"] = get_param("name", params, default=filename) + data_table_entry["value"] = get_param("value", params, default=data_table_entry["name"]) + data_table_entry["dbkey"] = get_param("dbkey", params, default=data_table_entry["value"]) + + data_manager_dict = {} + _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) + + print "output:" + print data_manager_dict + # save info to json file + with open( json_output_file, 'wb' ) as output_file: + output_file.write( json.dumps( data_manager_dict ) ) + output_file.write( "\n" ) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/directory_data_manager.xml Sat May 07 16:59:47 2016 -0400 @@ -0,0 +1,86 @@ +<tool id="directory_data_manager" name="directory data manager" tool_type="manage_data" version="0.0.1"> + <description>path inputer</description> + <command interpreter="python"> + directory_data_manager.py + --data_table_name "directory_data" + --json_output_file "${json_output_file}" + </command> + <inputs> + <param name="path" type="text" value="" label="path field for the entry" /> + <param name="original_extension" type="text" value="" label="Extensions as found in directory." /> + <param name="galaxy_extension" type="text" value="" label="Galaxay Extension." /> + <param name="decompress" type="select" label="Will files need to be decompressed."> + <option value="Yes">Yes. Files should decompressed (unzipped)</option> + <option value="No">No. Files are not compressed</option> + </param> + <param name="value" type="text" value="" label="value field for the entry. Defaults to name if left blank." /> + <param name="dbkey" type="text" value="" label="dbkey field for the entry. Defaults to value if left blank." /> + <param name="name" type="text" value="" label="name field for the entry. Defaults to the file name from path if left blank." /> + </inputs> + <outputs> + <data name="json_output_file" format="data_manager_json"/> + </outputs> + + <help> +Adds information for downloading data to the directory_data data table. + +Reguried inputs +=============== + +path +---- + +Full path on the server where galaxy can find the files + +Original Extension +------------------ + +The Extention as found on the server. + +Parts before the extension that could be used to split the data into seubsection such are Read1 and Read2 should not e included. + +Galaxy_Extension +---------------- + +The Extention to give the file before loading into galaxy so galaxy can detect the file type. + +This should be one as listed in galaxy/config/datatypes_conf.xml (or xml.sample) + +For example use tabular for tsv, txt for text and fasta and not fa\n + +Ideally use fastqsanger, fastqsolexa, fastqcssanger, or fastqillumina instead of just fastq as many tools need this level of detail. + +Decompress +---------- + +'Yes' to ask the tool to decompress the files otherwise 'No'. + +Any other value will cause an error. + +Optional Inputs +=============== + +Name +---- + +If not provided the last bit of **path** is used. + +Value +----- + +If value is not provided, the **name** will be used (or its default) + +dbkey +----- + +If not provided, the **value** will be used (or its default) + +=== + +The tool will check the path exists and that it holds at least one file with the required extension. + + </help> + <citations> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Sat May 07 16:59:47 2016 -0400 @@ -0,0 +1,17 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/directory_data_manager.xml" id="directory_data_manager" version="0.0.1"> + <data_table name="directory_data"> + <output> + <column name="value" /> + <column name="dbkey" /> + <column name="name" /> + <column name="original_extension" /> + <column name="galaxy_extension" /> + <column name="decompress" /> + <column name="path" /> + </output> + </data_table> + </data_manager> + +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/directory_data.loc.sample Sat May 07 16:59:47 2016 -0400 @@ -0,0 +1,19 @@ +#This file lists the directories that can be read in + +#This file has the format (white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <original_extension> <galaxy_extension> <decompress> <path> +# +#original_extension should not include the starting . +# +#galaxy_extension should be one listed in galaxy/config/datatypes_conf.xml (or xml.sample) +# +#decompress should be No or Yes +# +#So, data_manager.loc could look something like this: (whitespace is tabs) +# +#john_12 john_12 John's fastq files batch 12 fastq.gz fastqsanger Yes /data/john/batch12 +# +#Your directory_data.loc file should contain an entry for each path and extension pair +# +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Sat May 07 16:59:47 2016 -0400 @@ -0,0 +1,6 @@ +<tables> + <table name="directory_data" comment_char="#"> + <columns>value, dbkey, name, original_extension, galaxy_extension, decompress, path</columns> + <file path="tool-data/directory_data.loc" /> + </table> +</tables>