changeset 0:43083927dfc5 draft default tip

Uploaded
author brenninc
date Sat, 07 May 2016 16:59:47 -0400
parents
children
files data_manager/directory_data_manager.py data_manager/directory_data_manager.xml data_manager_conf.xml tool-data/directory_data.loc.sample tool_data_table_conf.xml.sample
diffstat 5 files changed, 220 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/directory_data_manager.py	Sat May 07 16:59:47 2016 -0400
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+import gzip
+import json
+import optparse  # using optparse as hydra still python 2.6
+import os.path
+import shutil
+
+def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):
+    data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
+    data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] )
+    data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry )
+    return data_manager_dict
+
+
+def get_param(name, params, default=None,  check_tab=True):
+    value = params.get(name)
+    print name, value
+    return check_param(name, value, default=default, check_tab=check_tab)
+
+
+def check_param(name, value, default=None,  check_tab=True):
+    if value in [ None, '', '?' ]:
+        if default:
+            print "Using {0} for {1} as no value provided".format( default, name )
+            value = default
+        else:
+            raise Exception( '{0} is not a valid {1}. You must specify a valid {1}.'.format( value, name ) )
+    if check_tab and "\t" in value:
+        raise Exception( '{0} is not a valid {1}. It may not contain a tab because these are used as seperators by galaxy .'.format( value, name ) )
+    return value
+
+def check_extension(extension):
+    extension = extension.strip()
+    if extension[0] == ".":
+        extension = extension[1:]
+    return extension
+
+
+def check_path(path, original_extension):
+    files = os.listdir(path)
+    check = "." + original_extension
+    for a_file in files:
+        if a_file.endswith(check):
+            return True
+    raise Exception( 'path {0} does not contain any files ending with {1}'.format( path, check ) )
+
+
+def main():
+
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option( '--data_table_name', action='store', type="string", default=None, help='path' )
+    parser.add_option( '--json_output_file', action='store', type="string", default=None, help='path' )
+    (options, args) = parser.parse_args()
+
+    data_table_name = check_param("data_table_name", options.data_table_name)
+    json_output_file = check_param("json_output_file", options.json_output_file, check_tab=False)
+
+    param_dict = json.loads( open( json_output_file ).read() )
+    params = param_dict.get("param_dict")
+    print "input params:"
+    print params
+
+    data_table_entry = {}
+    data_table_entry["original_extension"] = check_extension(get_param("original_extension", params))
+    data_table_entry["galaxy_extension"] = check_extension(get_param("galaxy_extension", params))
+    data_table_entry["decompress"] = get_param("decompress", params)
+    if not (data_table_entry["decompress"] in ["No","Yes"]):
+        raise Exception( "Only legal values for dcompress are No and Yes." )
+    data_table_entry["path"] = get_param("path", params)
+    check_path(data_table_entry["path"], data_table_entry["original_extension"])
+ 
+    basename = os.path.basename(data_table_entry["path"])
+    filename = os.path.splitext(basename)[0]
+    data_table_entry["name"] = get_param("name", params, default=filename)
+    data_table_entry["value"] = get_param("value", params, default=data_table_entry["name"])
+    data_table_entry["dbkey"] = get_param("dbkey", params, default=data_table_entry["value"])
+
+    data_manager_dict = {}
+    _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry )
+
+    print "output:"
+    print data_manager_dict
+    # save info to json file
+    with open( json_output_file, 'wb' ) as output_file:
+        output_file.write( json.dumps( data_manager_dict ) )
+        output_file.write( "\n" )
+
+
+if __name__ == "__main__": 
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/directory_data_manager.xml	Sat May 07 16:59:47 2016 -0400
@@ -0,0 +1,86 @@
+<tool id="directory_data_manager" name="directory data manager" tool_type="manage_data" version="0.0.1">
+    <description>path inputer</description>
+    <command interpreter="python">
+        directory_data_manager.py
+        --data_table_name "directory_data" 
+        --json_output_file "${json_output_file}"
+    </command>
+    <inputs>
+        <param name="path" type="text" value="" label="path field for the entry" />
+        <param name="original_extension" type="text" value="" label="Extensions as found in directory." />
+        <param name="galaxy_extension" type="text" value="" label="Galaxay Extension." />
+        <param name="decompress" type="select" label="Will files need to be decompressed.">
+            <option value="Yes">Yes. Files should decompressed (unzipped)</option>
+            <option value="No">No. Files are not compressed</option>
+        </param>
+        <param name="value" type="text" value="" label="value field for the entry.  Defaults to name if left blank." />
+        <param name="dbkey" type="text" value="" label="dbkey field for the entry.  Defaults to value if left blank." />
+        <param name="name" type="text" value="" label="name field for the entry. Defaults to the file name from path if left blank." />
+    </inputs>
+    <outputs>
+        <data name="json_output_file" format="data_manager_json"/>
+    </outputs>
+
+    <help>
+Adds information for downloading data to the directory_data data table.
+
+Reguried inputs
+===============
+
+path
+----
+
+Full path on the server where galaxy can find the files
+
+Original Extension
+------------------
+
+The Extention as found on the server.
+
+Parts before the extension that could be used to split the data into seubsection such are Read1 and Read2 should not e included.
+
+Galaxy_Extension
+----------------
+
+The Extention to give the file before loading into galaxy so galaxy can detect the file type.
+
+This should be one as listed in galaxy/config/datatypes_conf.xml (or xml.sample)
+
+For example use tabular for tsv, txt for text and fasta and not fa\n
+
+Ideally use fastqsanger, fastqsolexa, fastqcssanger, or fastqillumina instead of just fastq as many tools need this level of detail.
+
+Decompress
+----------
+
+'Yes' to ask the tool to decompress the files otherwise 'No'.
+
+Any other value will cause an error. 
+
+Optional Inputs
+===============
+
+Name
+----
+
+If not provided the last bit of **path**  is used.
+
+Value
+-----
+
+If value is not provided, the **name** will be used (or its default)
+
+dbkey
+-----
+
+If not provided, the **value** will be used (or its default)
+
+===
+
+The tool will check the path exists and that it holds at least one file with the required extension.
+
+    </help>
+    <citations>
+    </citations>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Sat May 07 16:59:47 2016 -0400
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/directory_data_manager.xml" id="directory_data_manager" version="0.0.1">
+        <data_table name="directory_data">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="original_extension" />
+                <column name="galaxy_extension" />
+                <column name="decompress" />
+                <column name="path" />
+            </output>
+        </data_table>
+    </data_manager>
+
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/directory_data.loc.sample	Sat May 07 16:59:47 2016 -0400
@@ -0,0 +1,19 @@
+#This file lists the directories that can be read in
+
+#This file has the format (white space characters are TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<original_extension>	<galaxy_extension>	<decompress>	<path>
+#
+#original_extension should not include the starting .
+#
+#galaxy_extension should be one listed in galaxy/config/datatypes_conf.xml (or xml.sample)
+#
+#decompress should be No or Yes
+#
+#So, data_manager.loc could look something like this: (whitespace is tabs)
+#
+#john_12	john_12	John's fastq files batch 12	fastq.gz	fastqsanger	Yes	/data/john/batch12
+#
+#Your directory_data.loc file should contain an entry for each path and extension pair
+#
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sat May 07 16:59:47 2016 -0400
@@ -0,0 +1,6 @@
+<tables>
+    <table name="directory_data" comment_char="#">
+        <columns>value, dbkey, name, original_extension, galaxy_extension, decompress, path</columns>
+        <file path="tool-data/directory_data.loc" />
+    </table>
+</tables>