Repository 'data_manager_select_index_by_path'
hg clone https://toolshed.g2.bx.psu.edu/repos/rhpvorderman/data_manager_select_index_by_path

Changeset 1:8495c49cd056 (2018-07-16)
Previous changeset 0:5f8d9309058b (2017-09-25)
Commit message:
planemo upload for repository https://github.com/LUMC/lumc-galaxy-tools/tree/master/data_manager_select_index_by_path commit 9061997af3bc94f49653ffd42f10b973578e371d
modified:
data_manager/data_manager_select_index_by_path.xml
data_manager/indexes.yml
data_manager/path_name_value_key_manager.py
data_manager_conf.xml
added:
README
data_manager/.pytest_cache/v/cache/lastfailed
data_manager/.pytest_cache/v/cache/nodeids
data_manager/__pycache__/path_name_value_key_manager.cpython-35.pyc
data_manager/__pycache__/test_path_name_value_key_manager.cpython-35-PYTEST.pyc
tool-data/rnastar_index2.loc.sample
removed:
test.json
b
diff -r 5f8d9309058b -r 8495c49cd056 README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Mon Jul 16 10:58:36 2018 -0400
b
@@ -0,0 +1,11 @@
+This is a fork of the data_manager_all_fasta_by_path data manager
+by Cristian-B (https://github.com/Christian-B)
+(https://github.com/Christian-B/galaxy_shedtools/tree/master/all_fasta_by_path).
+The all_fasta_by_path data manager was forked on 2017-09-07 from
+Christian-B's galaxy_shedtools repository at commit d9f5343.
+(https://github.com/Christian-B/galaxy_shedtools).
+
+The tool has drastically changed since then, with a complete refactoring
+of the python script and the addition of unit tests for all functionality
+within the python script. A lot of indexes are now supported in addition to
+just the all_fasta one.
b
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/.pytest_cache/v/cache/lastfailed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/.pytest_cache/v/cache/lastfailed Mon Jul 16 10:58:36 2018 -0400
b
@@ -0,0 +1,1 @@
+{}
\ No newline at end of file
b
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/.pytest_cache/v/cache/nodeids
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/.pytest_cache/v/cache/nodeids Mon Jul 16 10:58:36 2018 -0400
[
@@ -0,0 +1,27 @@
+[
+  "test_path_name_value_key_manager.py::test_validate_indexes_yaml",
+  "test_path_name_value_key_manager.py::test_schema",
+  "test_path_name_value_key_manager.py::test_schema_fail",
+  "test_path_name_value_key_manager.py::test_application",
+  "test_path_name_value_key_manager.py::test_application_overwrite_file",
+  "test_path_name_value_key_manager.py::test_application_star_index",
+  "test_path_name_value_key_manager.py::test_application_star_index_fail_wrong_yaml",
+  "test_path_name_value_key_manager.py::test_check_tab",
+  "test_path_name_value_key_manager.py::test_check_tab_fail",
+  "test_path_name_value_key_manager.py::test_data_table",
+  "test_path_name_value_key_manager.py::test_non_existing_table",
+  "test_path_name_value_key_manager.py::test_rnastar_index_fail_no_extra_column",
+  "test_path_name_value_key_manager.py::test_rnastar_index_fail_wrong_dir",
+  "test_path_name_value_key_manager.py::test_all_fasta_table_fail_extra_columns",
+  "test_path_name_value_key_manager.py::test_all_fasta_table",
+  "test_path_name_value_key_manager.py::test_index_path_not_exist",
+  "test_path_name_value_key_manager.py::test_index_path_is_a_dir",
+  "test_path_name_value_key_manager.py::test_bowtie2_index",
+  "test_path_name_value_key_manager.py::test_bowtie2_index_fail",
+  "test_path_name_value_key_manager.py::test_bwa_index",
+  "test_path_name_value_key_manager.py::test_bowtie_index",
+  "test_path_name_value_key_manager.py::test_bowtie_index_color",
+  "test_path_name_value_key_manager.py::test_hisat2_index",
+  "test_path_name_value_key_manager.py::test_picard_index",
+  "test_path_name_value_key_manager.py::test_sam_index"
+]
\ No newline at end of file
b
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/__pycache__/path_name_value_key_manager.cpython-35.pyc
b
Binary file data_manager/__pycache__/path_name_value_key_manager.cpython-35.pyc has changed
b
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/__pycache__/test_path_name_value_key_manager.cpython-35-PYTEST.pyc
b
Binary file data_manager/__pycache__/test_path_name_value_key_manager.cpython-35-PYTEST.pyc has changed
b
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/data_manager_select_index_by_path.xml
--- a/data_manager/data_manager_select_index_by_path.xml Mon Sep 25 03:35:26 2017 -0400
+++ b/data_manager/data_manager_select_index_by_path.xml Mon Jul 16 10:58:36 2018 -0400
[
@@ -1,39 +1,56 @@
-<tool id="data_manager_select_index_by_path" name="Select index by path manager" tool_type="manage_data" version="0.0.2">
-    <description>path inputer</description>
-    <command interpreter="python">
-        path_name_value_key_manager.py
+<tool id="data_manager_select_index_by_path" name="Select index by path manager" tool_type="manage_data" version="1.0.0a1" profile="18.09">
+    <requirements>
+        <!-- Away with python 2! -->
+        <requirement type="package" version="3.5">python</requirement>
+        <requirement type="package" version="3.12">pyyaml</requirement>
+        <requirement type="package" version="0.6.6">schema</requirement>
+    </requirements>
+    <description>Link to indexes on the filesystem that have already been built</description>
+    <command detect_errors="exit_code"><![CDATA[
+        python $__tool_directory__/path_name_value_key_manager.py
         --value "${value}"
         --dbkey "${dbkey}"
         --name "${name}"
         --path "${path}"
-        --data_table_name "${data_table}"
+        --data_table_name "${data_table_set.data_table}"
         --json_output_file "${json_output_file}"
-    </command>
+        #if $data_table_set.data_table == "rnastar_index2"
+        --extra-columns '{"with-gtf": "$data_table_set.with_gtf"}'
+        #end if
+        ]]></command>
     <inputs>
         <param name="value" type="text" value="" label="value field for the entry.  Defaults to name if left blank." />
         <param name="dbkey" type="text" value="" label="dbkey field for the entry.  Defaults to value if left blank." />
         <param name="name" type="text" value="" label="name field for the entry. Defaults to the file name from path if left blank." />
         <param name="path" type="text" value="" label="path field for the entry" />
-        <param name="data_table" type="select" value="" label="data table for the index">
-          <option value='all_fasta'>all_fasta</option>
-          <option value='bowtie2_indexes'>bowtie2_indexes</option>
-          <option value='bowtie_indexes'>bowtie_indexes</option>
-          <option value='bowtie_indexes_color'>bowtie_indexes_color</option>
-          <option value='bwa_mem_indexes'>bwa_mem_indexes</option>
-          <option value='bwameth_indexes'>bwameth_indexes</option>
-          <option value='fasta_indexes'>fasta_indexes</option>
-          <option value='gatk_picard_indexes'>gatk_picard_indexes</option>
-          <option value='gene_transfer'>gene_transfer</option>
-          <option value='hisat2_indexes'>hisat2_indexes</option>
-          <option value='kallisto_indexes'>kallisto_indexes</option>
-          <option value='picard_indexes'>picard_indexes</option>
-          <option value='tophat2_indexes'>tophat2_indexes</option>
-        </param>
+        <conditional name="data_table_set">
+            <param name="data_table" type="select" value="" label="data table for the index">
+                <option value='all_fasta'>all_fasta</option>
+                <option value='bowtie2_indexes'>bowtie2_indexes</option>
+                <option value='bowtie_indexes'>bowtie_indexes</option>
+                <option value='bowtie_indexes_color'>bowtie_indexes_color</option>
+                <option value='bwa_mem_indexes'>bwa_mem_indexes</option>
+                <option value='bwameth_indexes'>bwameth_indexes</option>
+                <option value='fasta_indexes'>fasta_indexes</option>
+                <option value='gatk_picard_indexes'>gatk_picard_indexes</option>
+                <option value='gene_transfer'>gene_transfer</option>
+                <option value='hisat2_indexes'>hisat2_indexes</option>
+                <option value='kallisto_indexes'>kallisto_indexes</option>
+                <option value='picard_indexes'>picard_indexes</option>
+                <option value='tophat2_indexes'>tophat2_indexes</option>
+                <option value="rnastar_index2">rnastar_index2</option>
+            </param>
+            <when value="rnastar_index2">
+                <param name="with_gtf" type="select" value="" label="Index with embedded gtf?">
+                    <option value="0">No</option>
+                    <option value="1">Yes</option>
+                </param>
+            </when>
+        </conditional>
     </inputs>
     <outputs>
         <data name="json_output_file" format="data_manager_json"/>
     </outputs>
-
     <help>
 Adds a server path to the selected data table.
 
b
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/indexes.yml
--- a/data_manager/indexes.yml Mon Sep 25 03:35:26 2017 -0400
+++ b/data_manager/indexes.yml Mon Jul 16 10:58:36 2018 -0400
[
@@ -1,20 +1,63 @@
+---
+# This file contains information about all the indexes.
+#
+# Top keys are table names as used in Galaxy.
+# These names can be viewed in the 'local data' part of the admin menu
+#
+# Keys for each table
+#  name:
+#    (STRING) The name of the index.
+#    This is used for error reporting in the program
+#
+#  prefix:
+#    (BOOLEAN) whether the index is a prefix. For example
+#    for bwa_mem-indexes, the index path is 'reference.fa'.
+#    This is a prefix because all the reference files are:
+#    'reference.fa.amb', 'reference.fa.ann' etc.
+#
+#  prefix_strip_extension:
+#    (BOOLEAN) whether the prefix should be stripped
+#    of its extensions. Ie from 'reference.fa' to
+#    'reference'. For a picard index also a 'reference.dict'
+#    should be present, so the prefix needs to be stripped of
+#    its extension to look for the index files.
+#
+#  extensions:
+#    (LIST[STRING]) a list of strings with the extensions:
+#      for example:
+#      extensions:
+#        - .fai
+#
+#  folder:
+#    (LIST[STRING]) Use this when the index is not a prefix but a folder
+#    the program will check if all the files in the list are present.
+#    If they are not, an exception will follow.
+#
+#  extra_columns:
+#    (LIST[STRING]) Usual indexes have 4 columns in the data table: path, name,
+#    value, dbkey. But some indexes have additional columns. rnastar_index2
+#    needs a 'with-gtf' column for instance. Add these columns to the list to
+#    make sure their presence, or non-presence is checked.
+
 all_fasta:
   name: fasta file
-  extensions:
-    - .fa
-  no_prefix: True
+  prefix: false
+
 bowtie2_indexes:
   name: bowtie2 index
   extensions:
     - .bt2
+
 bowtie_indexes:
   name: bowtie index
   extensions:
     - .ebwt
+
 bowtie_indexes_color:
   name: bowtie color index
   extensions:
     - .ebwt
+
 bwa_mem_indexes:
   name: bwa mem index
   extensions:
@@ -23,27 +66,53 @@
     - .bwt
     - .pac
     - .sa
+
 bwameth_indexes:
   name: bwa_meth_index
 fasta_indexes:
   name: fasta index
   extensions:
     - .fai
+
 gatk_picard_index:
   name: picard index for GATK
+
 gene_transfer:
   name: Gene Transfer File
   extensions:
     - .gtf
+
 hisat2_indexes:
   name: hisat2 index
   extensions:
     - .ht2
+
 kallisto_indexes:
   name: kallisto index
-  no_prefix: True
+  prefix: false
+
 picard_indexes:
   name: picard index
+  prefix_strip_extension: true
+  extensions:
+    - ".fa"
+    - ".dict"
+
+rnastar_index2:
+  name: "Star index"
+  prefix: false
+  extra_columns:
+    - with-gtf
+  folder:
+    - chrLength.txt
+    - chrNameLength.txt
+    - chrStart.txt
+    - chrName.txt
+    - Genome
+    - SA
+    - SAindex
+    - genomeParameters.txt
+
 tophat2_indexes:
   name: tophat2 index
   extensions:
b
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/path_name_value_key_manager.py
--- a/data_manager/path_name_value_key_manager.py Mon Sep 25 03:35:26 2017 -0400
+++ b/data_manager/path_name_value_key_manager.py Mon Jul 16 10:58:36 2018 -0400
[
b'@@ -1,104 +1,222 @@\n-#!/usr/bin/env python\n+#!/usr/bin/env python3\n+"""Script to create data manager jsons"""\n \n+import argparse\n import json\n-import argparse\n-import os\n+from pathlib import Path\n+\n import yaml\n+from schema import Schema, Optional\n \n-def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):\n-    data_manager_dict[\'data_tables\'] = data_manager_dict.get( \'data_tables\', {} )\n-    data_manager_dict[\'data_tables\'][ data_table_name ] = data_manager_dict[\'data_tables\'].get( data_table_name, [] )\n-    data_manager_dict[\'data_tables\'][ data_table_name ].append( data_table_entry )\n-    return data_manager_dict\n+\n+def indexes_schema():\n+    return Schema(\n+        {\'name\': str,\n+         Optional(\'prefix\'): bool,\n+         Optional(\'extensions\'): [str],\n+         Optional(\'prefix_strip_extension\'): bool,\n+         Optional(\'extra_columns\'): [str],\n+         Optional(\'folder\'): [str]})\n \n \n-def check_param(name, value, default=None,  check_tab=True):\n-    if value in [ None, \'\', \'?\' ]:\n-        if default:\n-            print "Using {0} for {1} as no value provided".format( default, name )\n-            value = default\n-        else:\n-            raise Exception( \'{0} is not a valid {1}. You must specify a valid {1}.\'.format( value, name ) )\n-    if check_tab and "\\t" in value:\n-        raise Exception( \'{0} is not a valid {1}. It may not contain a tab because these are used as seperators by galaxy .\'.format( value, name ) )\n-    return value\n+def argument_parser():\n+    parser = argparse.ArgumentParser()\n+    parser.add_argument(\'--value\', type=str, help=\'value\')\n+    parser.add_argument(\'--dbkey\', type=str, help=\'dbkey\')\n+    parser.add_argument(\'--name\', type=str, help=\'name\')\n+    parser.add_argument(\'--path\', type=Path, help=\'path\',\n+                        required=True)\n+    parser.add_argument(\'--data_table_name\', action=\'store\', type=str,\n+                        help=\'Name of the data table\',\n+                        required=True)\n+    parser.add_argument(\'--json_output_file\', action=\'store\', type=Path,\n+                        help=\'Json output file\',\n+                        required=True)\n+    parser.add_argument("--extra-columns", type=str,\n+                        help=\'Yaml formatted string with extra columns \'\n+                             \'and their values. For example \'\n+                             \'\\\'{"with-gtf":"0"}\\\' for STAR indexes\')\n+    return parser\n \n-def prefix_exists(directory, prefix):\n-    \'\'\'checks if files exist with prefix in a directory. Returns Boolean\'\'\'\n-    matched_files = []\n-    directory_files = os.listdir(directory)\n-    for directory_file in directory_files:\n-        if directory_file.startswith(prefix):\n-            matched_files.append(directory_file)\n-    # Empty list should return False\n-    return bool(matched_files)\n+\n+def check_tab(name: str, value: str):\n+    if \'\\t\' in value:\n+        raise ValueError(\n+            "\'{0}\' is not a valid \'{1}\'. It may not contain a tab because "\n+            "these are used as seperators by galaxy .".format(\n+                value, name))\n \n-def prefix_plus_extension_exists(directory, prefix, extension):\n-    \'\'\'checks if files exist with prefix in a directory. Returns Boolean\'\'\'\n-    matched_files = []\n-    directory_files = os.listdir(directory)\n-    for directory_file in directory_files:\n-        if directory_file.startswith(prefix) and directory_file.endswith(extension):\n-            matched_files.append(directory_file)\n+\n+def prefix_plus_extension_exists(directory: Path, prefix: str, extension: str):\n+    """checks if files exist with prefix in a directory. Returns Boolean"""\n+    matched_files = [directory_file for directory_file in directory.iterdir()\n+                     if\n+                     directory_file.name.startswith(\n+                         prefix) and directory_file.suffix == extension]\n     # Empty list should return False\n     return bool(matched_files)\n \n-def main():\n+\n+class DataTable('..b'plus_extension_exists(self.index_path.parent,\n+                                                    prefix, extension):\n+                    raise FileNotFoundError(\n+                        "Unable to find files with prefix \'{0}\' "\n+                        "and extension \'{1}\' in {2}. Is this a valid {3}?"\n+                        .format(\n+                            prefix,\n+                            extension,\n+                            str(self.index_path.parent),\n+                            index_name))\n+        elif self.index_properties.get(\'folder\') is not None:\n+            for file in self.index_properties.get(\'folder\'):\n+                if not (self.index_path / Path(file)).exists():\n+                    raise FileNotFoundError(\n+                        "A file named \'{0}\' was not found in \'{1}\'".format(\n+                            file, str(self.index_path)))\n+        elif not self.index_path.exists() and not self.index_path.is_dir():\n+            raise FileNotFoundError(\n+                \'Unable to find path {0}.\'.format(self.index_path))\n+        elif self.index_path.is_dir() and self.index_properties.get(\n+                \'folder\') is None:\n+            raise IsADirectoryError(\n+                \'{0} is a directory not a file\'.format(self.index_path))\n+        elif self.index_path.exists():\n+            pass\n+        else:\n+            raise NotImplementedError("This condition was not expected "\n+                                      "and should not be reached. Please "\n+                                      "contact the developers.")\n+\n+    @property\n+    def data_manager_dict(self) -> dict:\n+        data_table_entry = dict(value=self.value, dbkey=self.dbkey,\n+                                name=self.name,\n+                                path=str(self.index_path),\n+                                **self.extra_columns)\n+        data_manager_dict = dict(data_tables=dict())\n+        data_manager_dict["data_tables"][\n+            self.data_table_name] = [data_table_entry]\n+        return data_manager_dict\n+\n+    @property\n+    def data_manager_json(self) -> str:\n+        return json.dumps(self.data_manager_dict)\n+\n+\n+def main():\n+    options = argument_parser().parse_args()\n+\n+    if options.json_output_file.exists():\n+        pass  # Do not raise error.\n+\n+    if options.extra_columns is None:\n+        extra_columns = dict()\n     else:\n-        if not os.path.exists(path):\n-            raise Exception( \'Unable to find path {0}.\'.format( path ) )\n-\n-    if os.path.exists(json_output_file):\n-        params = json.loads( open( json_output_file ).read() )\n-        print "params", params\n-    else:\n-        params = {}\n+        try:\n+            extra_columns = yaml.safe_load(options.extra_columns)\n+        except yaml.parser.ParserError as e:\n+            raise yaml.parser.ParserError(\n+                "Invalid yaml string for --extra_indexes. \\nError {0}".format(\n+                    e))\n \n-    data_manager_dict = {}\n-    data_table_entry = dict( value=value, dbkey=dbkey, name=name, path=path )\n-    _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry )\n+    index_properties_file = Path(__file__).parent / Path("indexes.yml")\n+    data_table = DataTable(index_path=options.path,\n+                           data_table_name=options.data_table_name,\n+                           name=options.name,\n+                           value=options.value,\n+                           dbkey=options.dbkey,\n+                           indexes_properties_file=index_properties_file,\n+                           extra_columns=extra_columns)\n \n-    #save info to json file\n-    with open( json_output_file, \'wb\' ) as output_file:\n-        output_file.write( json.dumps( data_manager_dict ) )\n-        output_file.write( "\\n" )\n+    # save info to json file\n+    with options.json_output_file.open(\'w\') as output_file:\n+        output_file.write(data_table.data_manager_json)\n+\n \n if __name__ == "__main__":\n     main()\n'
b
diff -r 5f8d9309058b -r 8495c49cd056 data_manager_conf.xml
--- a/data_manager_conf.xml Mon Sep 25 03:35:26 2017 -0400
+++ b/data_manager_conf.xml Mon Jul 16 10:58:36 2018 -0400
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <data_managers>
-    <data_manager tool_file="data_manager/data_manager_select_index_by_path.xml" id="data_manager_select_index_by_path" version="0.0.2">
+    <data_manager tool_file="data_manager/data_manager_select_index_by_path.xml" id="data_manager_select_index_by_path" version="1.0.0a1">
         <data_table name="all_fasta">
             <output>
                 <column name="value" />
@@ -105,6 +105,15 @@
                 <column name="path" />
             </output>
         </data_table>
+        <data_table name="rnastar_index2">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+                <column name="with-gtf" />
+            </output>
+        </data_table>
     </data_manager>
 
 </data_managers>
b
diff -r 5f8d9309058b -r 8495c49cd056 test.json
--- a/test.json Mon Sep 25 03:35:26 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,1 +0,0 @@
-{"data_tables": {"all_fasta": [{"path": "test-data/EboVir3.fa", "dbkey": "EboVir3", "name": "EboVir3", "value": "EboVir3"}]}}
b
diff -r 5f8d9309058b -r 8495c49cd056 tool-data/rnastar_index2.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rnastar_index2.loc.sample Mon Jul 16 10:58:36 2018 -0400
b
@@ -0,0 +1,23 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of rna-star indexed sequences data files. You will
+#need to create these data files and then create a rnastar_index2.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The rnastar_index2.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path> <with-gtf>
+#
+#The <with-gtf> column should be 1 or 0, indicating whether the index was made
+#with an annotation (i.e., --sjdbGTFfile and --sjdbOverhang were used) or not,
+#respecively.
+#
+#Note that STAR indices can become quite large. Consequently, it is only
+#advisable to create indices with annotations if it's known ahead of time that
+#(A) the annotations won't be frequently updated and (B) the read lengths used
+#will also rarely vary. If either of these is not the case, it's advisable to
+#create indices without annotations and then specify an annotation file and
+#maximum read length (minus 1) when running STAR.
+#
+#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar 0
+#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1
+