Previous changeset 0:5f8d9309058b (2017-09-25) |
Commit message:
planemo upload for repository https://github.com/LUMC/lumc-galaxy-tools/tree/master/data_manager_select_index_by_path commit 9061997af3bc94f49653ffd42f10b973578e371d |
modified:
data_manager/data_manager_select_index_by_path.xml data_manager/indexes.yml data_manager/path_name_value_key_manager.py data_manager_conf.xml |
added:
README data_manager/.pytest_cache/v/cache/lastfailed data_manager/.pytest_cache/v/cache/nodeids data_manager/__pycache__/path_name_value_key_manager.cpython-35.pyc data_manager/__pycache__/test_path_name_value_key_manager.cpython-35-PYTEST.pyc tool-data/rnastar_index2.loc.sample |
removed:
test.json |
b |
diff -r 5f8d9309058b -r 8495c49cd056 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Mon Jul 16 10:58:36 2018 -0400 |
b |
@@ -0,0 +1,11 @@ +This is a fork of the data_manager_all_fasta_by_path data manager +by Cristian-B (https://github.com/Christian-B) +(https://github.com/Christian-B/galaxy_shedtools/tree/master/all_fasta_by_path). +The all_fasta_by_path data manager was forked on 2017-09-07 from +Christian-B's galaxy_shedtools repository at commit d9f5343. +(https://github.com/Christian-B/galaxy_shedtools). + +The tool has drastically changed since then, with a complete refactoring +of the python script and the addition of unit tests for all functionality +within the python script. A lot of indexes are now supported in addition to +just the all_fasta one. |
b |
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/.pytest_cache/v/cache/lastfailed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/.pytest_cache/v/cache/lastfailed Mon Jul 16 10:58:36 2018 -0400 |
b |
@@ -0,0 +1,1 @@ +{} \ No newline at end of file |
b |
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/.pytest_cache/v/cache/nodeids --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/.pytest_cache/v/cache/nodeids Mon Jul 16 10:58:36 2018 -0400 |
[ |
@@ -0,0 +1,27 @@ +[ + "test_path_name_value_key_manager.py::test_validate_indexes_yaml", + "test_path_name_value_key_manager.py::test_schema", + "test_path_name_value_key_manager.py::test_schema_fail", + "test_path_name_value_key_manager.py::test_application", + "test_path_name_value_key_manager.py::test_application_overwrite_file", + "test_path_name_value_key_manager.py::test_application_star_index", + "test_path_name_value_key_manager.py::test_application_star_index_fail_wrong_yaml", + "test_path_name_value_key_manager.py::test_check_tab", + "test_path_name_value_key_manager.py::test_check_tab_fail", + "test_path_name_value_key_manager.py::test_data_table", + "test_path_name_value_key_manager.py::test_non_existing_table", + "test_path_name_value_key_manager.py::test_rnastar_index_fail_no_extra_column", + "test_path_name_value_key_manager.py::test_rnastar_index_fail_wrong_dir", + "test_path_name_value_key_manager.py::test_all_fasta_table_fail_extra_columns", + "test_path_name_value_key_manager.py::test_all_fasta_table", + "test_path_name_value_key_manager.py::test_index_path_not_exist", + "test_path_name_value_key_manager.py::test_index_path_is_a_dir", + "test_path_name_value_key_manager.py::test_bowtie2_index", + "test_path_name_value_key_manager.py::test_bowtie2_index_fail", + "test_path_name_value_key_manager.py::test_bwa_index", + "test_path_name_value_key_manager.py::test_bowtie_index", + "test_path_name_value_key_manager.py::test_bowtie_index_color", + "test_path_name_value_key_manager.py::test_hisat2_index", + "test_path_name_value_key_manager.py::test_picard_index", + "test_path_name_value_key_manager.py::test_sam_index" +] \ No newline at end of file |
b |
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/__pycache__/path_name_value_key_manager.cpython-35.pyc |
b |
Binary file data_manager/__pycache__/path_name_value_key_manager.cpython-35.pyc has changed |
b |
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/__pycache__/test_path_name_value_key_manager.cpython-35-PYTEST.pyc |
b |
Binary file data_manager/__pycache__/test_path_name_value_key_manager.cpython-35-PYTEST.pyc has changed |
b |
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/data_manager_select_index_by_path.xml --- a/data_manager/data_manager_select_index_by_path.xml Mon Sep 25 03:35:26 2017 -0400 +++ b/data_manager/data_manager_select_index_by_path.xml Mon Jul 16 10:58:36 2018 -0400 |
[ |
@@ -1,39 +1,56 @@ -<tool id="data_manager_select_index_by_path" name="Select index by path manager" tool_type="manage_data" version="0.0.2"> - <description>path inputer</description> - <command interpreter="python"> - path_name_value_key_manager.py +<tool id="data_manager_select_index_by_path" name="Select index by path manager" tool_type="manage_data" version="1.0.0a1" profile="18.09"> + <requirements> + <!-- Away with python 2! --> + <requirement type="package" version="3.5">python</requirement> + <requirement type="package" version="3.12">pyyaml</requirement> + <requirement type="package" version="0.6.6">schema</requirement> + </requirements> + <description>Link to indexes on the filesystem that have already been built</description> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/path_name_value_key_manager.py --value "${value}" --dbkey "${dbkey}" --name "${name}" --path "${path}" - --data_table_name "${data_table}" + --data_table_name "${data_table_set.data_table}" --json_output_file "${json_output_file}" - </command> + #if $data_table_set.data_table == "rnastar_index2" + --extra-columns '{"with-gtf": "$data_table_set.with_gtf"}' + #end if + ]]></command> <inputs> <param name="value" type="text" value="" label="value field for the entry. Defaults to name if left blank." /> <param name="dbkey" type="text" value="" label="dbkey field for the entry. Defaults to value if left blank." /> <param name="name" type="text" value="" label="name field for the entry. Defaults to the file name from path if left blank." /> <param name="path" type="text" value="" label="path field for the entry" /> - <param name="data_table" type="select" value="" label="data table for the index"> - <option value='all_fasta'>all_fasta</option> - <option value='bowtie2_indexes'>bowtie2_indexes</option> - <option value='bowtie_indexes'>bowtie_indexes</option> - <option value='bowtie_indexes_color'>bowtie_indexes_color</option> - <option value='bwa_mem_indexes'>bwa_mem_indexes</option> - <option value='bwameth_indexes'>bwameth_indexes</option> - <option value='fasta_indexes'>fasta_indexes</option> - <option value='gatk_picard_indexes'>gatk_picard_indexes</option> - <option value='gene_transfer'>gene_transfer</option> - <option value='hisat2_indexes'>hisat2_indexes</option> - <option value='kallisto_indexes'>kallisto_indexes</option> - <option value='picard_indexes'>picard_indexes</option> - <option value='tophat2_indexes'>tophat2_indexes</option> - </param> + <conditional name="data_table_set"> + <param name="data_table" type="select" value="" label="data table for the index"> + <option value='all_fasta'>all_fasta</option> + <option value='bowtie2_indexes'>bowtie2_indexes</option> + <option value='bowtie_indexes'>bowtie_indexes</option> + <option value='bowtie_indexes_color'>bowtie_indexes_color</option> + <option value='bwa_mem_indexes'>bwa_mem_indexes</option> + <option value='bwameth_indexes'>bwameth_indexes</option> + <option value='fasta_indexes'>fasta_indexes</option> + <option value='gatk_picard_indexes'>gatk_picard_indexes</option> + <option value='gene_transfer'>gene_transfer</option> + <option value='hisat2_indexes'>hisat2_indexes</option> + <option value='kallisto_indexes'>kallisto_indexes</option> + <option value='picard_indexes'>picard_indexes</option> + <option value='tophat2_indexes'>tophat2_indexes</option> + <option value="rnastar_index2">rnastar_index2</option> + </param> + <when value="rnastar_index2"> + <param name="with_gtf" type="select" value="" label="Index with embedded gtf?"> + <option value="0">No</option> + <option value="1">Yes</option> + </param> + </when> + </conditional> </inputs> <outputs> <data name="json_output_file" format="data_manager_json"/> </outputs> - <help> Adds a server path to the selected data table. |
b |
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/indexes.yml --- a/data_manager/indexes.yml Mon Sep 25 03:35:26 2017 -0400 +++ b/data_manager/indexes.yml Mon Jul 16 10:58:36 2018 -0400 |
[ |
@@ -1,20 +1,63 @@ +--- +# This file contains information about all the indexes. +# +# Top keys are table names as used in Galaxy. +# These names can be viewed in the 'local data' part of the admin menu +# +# Keys for each table +# name: +# (STRING) The name of the index. +# This is used for error reporting in the program +# +# prefix: +# (BOOLEAN) whether the index is a prefix. For example +# for bwa_mem-indexes, the index path is 'reference.fa'. +# This is a prefix because all the reference files are: +# 'reference.fa.amb', 'reference.fa.ann' etc. +# +# prefix_strip_extension: +# (BOOLEAN) whether the prefix should be stripped +# of its extensions. Ie from 'reference.fa' to +# 'reference'. For a picard index also a 'reference.dict' +# should be present, so the prefix needs to be stripped of +# its extension to look for the index files. +# +# extensions: +# (LIST[STRING]) a list of strings with the extensions: +# for example: +# extensions: +# - .fai +# +# folder: +# (LIST[STRING]) Use this when the index is not a prefix but a folder +# the program will check if all the files in the list are present. +# If they are not, an exception will follow. +# +# extra_columns: +# (LIST[STRING]) Usual indexes have 4 columns in the data table: path, name, +# value, dbkey. But some indexes have additional columns. rnastar_index2 +# needs a 'with-gtf' column for instance. Add these columns to the list to +# make sure their presence, or non-presence is checked. + all_fasta: name: fasta file - extensions: - - .fa - no_prefix: True + prefix: false + bowtie2_indexes: name: bowtie2 index extensions: - .bt2 + bowtie_indexes: name: bowtie index extensions: - .ebwt + bowtie_indexes_color: name: bowtie color index extensions: - .ebwt + bwa_mem_indexes: name: bwa mem index extensions: @@ -23,27 +66,53 @@ - .bwt - .pac - .sa + bwameth_indexes: name: bwa_meth_index fasta_indexes: name: fasta index extensions: - .fai + gatk_picard_index: name: picard index for GATK + gene_transfer: name: Gene Transfer File extensions: - .gtf + hisat2_indexes: name: hisat2 index extensions: - .ht2 + kallisto_indexes: name: kallisto index - no_prefix: True + prefix: false + picard_indexes: name: picard index + prefix_strip_extension: true + extensions: + - ".fa" + - ".dict" + +rnastar_index2: + name: "Star index" + prefix: false + extra_columns: + - with-gtf + folder: + - chrLength.txt + - chrNameLength.txt + - chrStart.txt + - chrName.txt + - Genome + - SA + - SAindex + - genomeParameters.txt + tophat2_indexes: name: tophat2 index extensions: |
b |
diff -r 5f8d9309058b -r 8495c49cd056 data_manager/path_name_value_key_manager.py --- a/data_manager/path_name_value_key_manager.py Mon Sep 25 03:35:26 2017 -0400 +++ b/data_manager/path_name_value_key_manager.py Mon Jul 16 10:58:36 2018 -0400 |
[ |
b'@@ -1,104 +1,222 @@\n-#!/usr/bin/env python\n+#!/usr/bin/env python3\n+"""Script to create data manager jsons"""\n \n+import argparse\n import json\n-import argparse\n-import os\n+from pathlib import Path\n+\n import yaml\n+from schema import Schema, Optional\n \n-def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):\n- data_manager_dict[\'data_tables\'] = data_manager_dict.get( \'data_tables\', {} )\n- data_manager_dict[\'data_tables\'][ data_table_name ] = data_manager_dict[\'data_tables\'].get( data_table_name, [] )\n- data_manager_dict[\'data_tables\'][ data_table_name ].append( data_table_entry )\n- return data_manager_dict\n+\n+def indexes_schema():\n+ return Schema(\n+ {\'name\': str,\n+ Optional(\'prefix\'): bool,\n+ Optional(\'extensions\'): [str],\n+ Optional(\'prefix_strip_extension\'): bool,\n+ Optional(\'extra_columns\'): [str],\n+ Optional(\'folder\'): [str]})\n \n \n-def check_param(name, value, default=None, check_tab=True):\n- if value in [ None, \'\', \'?\' ]:\n- if default:\n- print "Using {0} for {1} as no value provided".format( default, name )\n- value = default\n- else:\n- raise Exception( \'{0} is not a valid {1}. You must specify a valid {1}.\'.format( value, name ) )\n- if check_tab and "\\t" in value:\n- raise Exception( \'{0} is not a valid {1}. It may not contain a tab because these are used as seperators by galaxy .\'.format( value, name ) )\n- return value\n+def argument_parser():\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument(\'--value\', type=str, help=\'value\')\n+ parser.add_argument(\'--dbkey\', type=str, help=\'dbkey\')\n+ parser.add_argument(\'--name\', type=str, help=\'name\')\n+ parser.add_argument(\'--path\', type=Path, help=\'path\',\n+ required=True)\n+ parser.add_argument(\'--data_table_name\', action=\'store\', type=str,\n+ help=\'Name of the data table\',\n+ required=True)\n+ parser.add_argument(\'--json_output_file\', action=\'store\', type=Path,\n+ help=\'Json output file\',\n+ required=True)\n+ parser.add_argument("--extra-columns", type=str,\n+ help=\'Yaml formatted string with extra columns \'\n+ \'and their values. For example \'\n+ \'\\\'{"with-gtf":"0"}\\\' for STAR indexes\')\n+ return parser\n \n-def prefix_exists(directory, prefix):\n- \'\'\'checks if files exist with prefix in a directory. Returns Boolean\'\'\'\n- matched_files = []\n- directory_files = os.listdir(directory)\n- for directory_file in directory_files:\n- if directory_file.startswith(prefix):\n- matched_files.append(directory_file)\n- # Empty list should return False\n- return bool(matched_files)\n+\n+def check_tab(name: str, value: str):\n+ if \'\\t\' in value:\n+ raise ValueError(\n+ "\'{0}\' is not a valid \'{1}\'. It may not contain a tab because "\n+ "these are used as seperators by galaxy .".format(\n+ value, name))\n \n-def prefix_plus_extension_exists(directory, prefix, extension):\n- \'\'\'checks if files exist with prefix in a directory. Returns Boolean\'\'\'\n- matched_files = []\n- directory_files = os.listdir(directory)\n- for directory_file in directory_files:\n- if directory_file.startswith(prefix) and directory_file.endswith(extension):\n- matched_files.append(directory_file)\n+\n+def prefix_plus_extension_exists(directory: Path, prefix: str, extension: str):\n+ """checks if files exist with prefix in a directory. Returns Boolean"""\n+ matched_files = [directory_file for directory_file in directory.iterdir()\n+ if\n+ directory_file.name.startswith(\n+ prefix) and directory_file.suffix == extension]\n # Empty list should return False\n return bool(matched_files)\n \n-def main():\n+\n+class DataTable('..b'plus_extension_exists(self.index_path.parent,\n+ prefix, extension):\n+ raise FileNotFoundError(\n+ "Unable to find files with prefix \'{0}\' "\n+ "and extension \'{1}\' in {2}. Is this a valid {3}?"\n+ .format(\n+ prefix,\n+ extension,\n+ str(self.index_path.parent),\n+ index_name))\n+ elif self.index_properties.get(\'folder\') is not None:\n+ for file in self.index_properties.get(\'folder\'):\n+ if not (self.index_path / Path(file)).exists():\n+ raise FileNotFoundError(\n+ "A file named \'{0}\' was not found in \'{1}\'".format(\n+ file, str(self.index_path)))\n+ elif not self.index_path.exists() and not self.index_path.is_dir():\n+ raise FileNotFoundError(\n+ \'Unable to find path {0}.\'.format(self.index_path))\n+ elif self.index_path.is_dir() and self.index_properties.get(\n+ \'folder\') is None:\n+ raise IsADirectoryError(\n+ \'{0} is a directory not a file\'.format(self.index_path))\n+ elif self.index_path.exists():\n+ pass\n+ else:\n+ raise NotImplementedError("This condition was not expected "\n+ "and should not be reached. Please "\n+ "contact the developers.")\n+\n+ @property\n+ def data_manager_dict(self) -> dict:\n+ data_table_entry = dict(value=self.value, dbkey=self.dbkey,\n+ name=self.name,\n+ path=str(self.index_path),\n+ **self.extra_columns)\n+ data_manager_dict = dict(data_tables=dict())\n+ data_manager_dict["data_tables"][\n+ self.data_table_name] = [data_table_entry]\n+ return data_manager_dict\n+\n+ @property\n+ def data_manager_json(self) -> str:\n+ return json.dumps(self.data_manager_dict)\n+\n+\n+def main():\n+ options = argument_parser().parse_args()\n+\n+ if options.json_output_file.exists():\n+ pass # Do not raise error.\n+\n+ if options.extra_columns is None:\n+ extra_columns = dict()\n else:\n- if not os.path.exists(path):\n- raise Exception( \'Unable to find path {0}.\'.format( path ) )\n-\n- if os.path.exists(json_output_file):\n- params = json.loads( open( json_output_file ).read() )\n- print "params", params\n- else:\n- params = {}\n+ try:\n+ extra_columns = yaml.safe_load(options.extra_columns)\n+ except yaml.parser.ParserError as e:\n+ raise yaml.parser.ParserError(\n+ "Invalid yaml string for --extra_indexes. \\nError {0}".format(\n+ e))\n \n- data_manager_dict = {}\n- data_table_entry = dict( value=value, dbkey=dbkey, name=name, path=path )\n- _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry )\n+ index_properties_file = Path(__file__).parent / Path("indexes.yml")\n+ data_table = DataTable(index_path=options.path,\n+ data_table_name=options.data_table_name,\n+ name=options.name,\n+ value=options.value,\n+ dbkey=options.dbkey,\n+ indexes_properties_file=index_properties_file,\n+ extra_columns=extra_columns)\n \n- #save info to json file\n- with open( json_output_file, \'wb\' ) as output_file:\n- output_file.write( json.dumps( data_manager_dict ) )\n- output_file.write( "\\n" )\n+ # save info to json file\n+ with options.json_output_file.open(\'w\') as output_file:\n+ output_file.write(data_table.data_manager_json)\n+\n \n if __name__ == "__main__":\n main()\n' |
b |
diff -r 5f8d9309058b -r 8495c49cd056 data_manager_conf.xml --- a/data_manager_conf.xml Mon Sep 25 03:35:26 2017 -0400 +++ b/data_manager_conf.xml Mon Jul 16 10:58:36 2018 -0400 |
b |
@@ -1,6 +1,6 @@ <?xml version="1.0"?> <data_managers> - <data_manager tool_file="data_manager/data_manager_select_index_by_path.xml" id="data_manager_select_index_by_path" version="0.0.2"> + <data_manager tool_file="data_manager/data_manager_select_index_by_path.xml" id="data_manager_select_index_by_path" version="1.0.0a1"> <data_table name="all_fasta"> <output> <column name="value" /> @@ -105,6 +105,15 @@ <column name="path" /> </output> </data_table> + <data_table name="rnastar_index2"> + <output> + <column name="value" /> + <column name="dbkey" /> + <column name="name" /> + <column name="path" /> + <column name="with-gtf" /> + </output> + </data_table> </data_manager> </data_managers> |
b |
diff -r 5f8d9309058b -r 8495c49cd056 test.json --- a/test.json Mon Sep 25 03:35:26 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,1 +0,0 @@ -{"data_tables": {"all_fasta": [{"path": "test-data/EboVir3.fa", "dbkey": "EboVir3", "name": "EboVir3", "value": "EboVir3"}]}} |
b |
diff -r 5f8d9309058b -r 8495c49cd056 tool-data/rnastar_index2.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/rnastar_index2.loc.sample Mon Jul 16 10:58:36 2018 -0400 |
b |
@@ -0,0 +1,23 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of rna-star indexed sequences data files. You will +#need to create these data files and then create a rnastar_index2.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The rnastar_index2.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_base_path> <with-gtf> +# +#The <with-gtf> column should be 1 or 0, indicating whether the index was made +#with an annotation (i.e., --sjdbGTFfile and --sjdbOverhang were used) or not, +#respecively. +# +#Note that STAR indices can become quite large. Consequently, it is only +#advisable to create indices with annotations if it's known ahead of time that +#(A) the annotations won't be frequently updated and (B) the read lengths used +#will also rarely vary. If either of these is not the case, it's advisable to +#create indices without annotations and then specify an annotation file and +#maximum read length (minus 1) when running STAR. +# +#hg19 hg19 hg19 full /mnt/galaxyIndices/genomes/hg19/rnastar 0 +#hg19Ensembl hg19Ensembl hg19 full with Ensembl annotation /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1 + |