Mercurial > repos > dfornika > data_manager_build_kraken2_database
changeset 18:f005b6efd096 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit a6877055190331683367394d1d1ca6ff47cf4fa7-dirty
author | dfornika |
---|---|
date | Fri, 24 May 2019 13:39:08 -0400 |
parents | 4c9f9d6098eb |
children | ffeb852407d6 |
files | data_manager/kraken2_build_database.py data_manager/kraken2_build_database.xml data_manager_conf.xml test-data/adapter.fa test-data/adapter.fastq test-data/kraken2_custom_data_manager.json |
diffstat | 6 files changed, 95 insertions(+), 55 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py Mon May 06 19:42:14 2019 -0400 +++ b/data_manager/kraken2_build_database.py Fri May 24 13:39:08 2019 -0400 @@ -50,7 +50,7 @@ return self.value -def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_standard(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") database_value = "_".join([ @@ -92,15 +92,21 @@ subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) data_table_entry = { - "value": database_value, - "name": database_name, - "path": database_path, + 'data_tables': { + data_table_name: [ + { + "value": database_value, + "name": database_name, + "path": database_path, + } + ] + } } - _add_data_table_entry(data_manager_dict, data_table_entry) + return data_table_entry -def kraken2_build_minikraken(data_manager_dict, minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_minikraken(minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") @@ -118,6 +124,8 @@ now + ")" ]) + database_path = database_value + # download the minikraken2 data src = urlopen( 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz' @@ -127,18 +135,27 @@ shutil.copyfileobj(src, dst) # unpack the downloaded archive to the target directory with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: - fh.extractall(target_directory) + for member in fh.getmembers(): + if member.isreg(): + member.name = os.path.basename(member.name) + fh.extract(member, os.path.join(target_directory, database_path)) data_table_entry = { - "value": database_value, - "name": database_name, - "path": database_value, + 'data_tables': { + data_table_name: [ + { + "value": database_value, + "name": database_name, + "path": database_path, + } + ] + } } - _add_data_table_entry(data_manager_dict, data_table_entry) + return data_table_entry -def kraken2_build_special(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_special(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") @@ -187,22 +204,31 @@ subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) data_table_entry = { - "value": database_value, - "name": database_name, - "path": database_path, + 'data_tables': { + data_table_name: [ + { + "value": database_value, + "name": database_name, + "path": database_path, + } + ] + } } - _add_data_table_entry(data_manager_dict, data_table_entry) + return data_table_entry -def kraken2_build_custom(data_manager_dict, kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): args = [ '--threads', str(kraken2_args["threads"]), '--download-taxonomy', - '--db', custom_database_name + '--db', custom_database_name, ] + if kraken2_args['skip_maps']: + args.append('--skip-maps') + subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) args = [ @@ -230,22 +256,21 @@ '--db', custom_database_name ] - subprocess.check_call(['kraken2-build'] + args, target_directory) + subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) data_table_entry = { - "value": custom_database_name, - "name": custom_database_name, - "path": custom_database_name + 'data_tables': { + data_table_name: [ + { + "value": custom_database_name, + "name": custom_database_name, + "path": custom_database_name + } + ] + } } - _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) - - -def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME): - data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) - data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) - data_manager_dict['data_tables'][data_table_name].append( data_table_entry ) - return data_manager_dict + return data_table_entry def main(): @@ -259,7 +284,8 @@ parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') - parser.add_argument( '--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)' ) + parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') + parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') args = parser.parse_args() data_manager_input = json.loads(open(args.data_manager_json).read()) @@ -276,7 +302,6 @@ data_manager_output = {} - print(args.database_type) if str(args.database_type) == 'standard': kraken2_args = { "kmer_len": args.kmer_len, @@ -284,14 +309,12 @@ "minimizer_spaces": args.minimizer_spaces, "threads": args.threads, } - kraken2_build_standard( - data_manager_output, + data_manager_output = kraken2_build_standard( kraken2_args, target_directory, ) elif str(args.database_type) == 'minikraken': - kraken2_build_minikraken( - data_manager_output, + data_manager_output = kraken2_build_minikraken( str(args.minikraken2_version), target_directory ) @@ -303,21 +326,20 @@ "minimizer_spaces": args.minimizer_spaces, "threads": args.threads, } - kraken2_build_special( - data_manager_output, + data_manager_output = kraken2_build_special( kraken2_args, target_directory, ) elif str(args.database_type) == 'custom': kraken2_args = { "custom_fasta": args.custom_fasta, + "skip_maps": args.skip_maps, "kmer_len": args.kmer_len, "minimizer_len": args.minimizer_len, "minimizer_spaces": args.minimizer_spaces, "threads": args.threads, } - kraken2_build_custom( - data_manager_output, + data_manager_output = kraken2_build_custom( kraken2_args, args.custom_database_name, target_directory,
--- a/data_manager/kraken2_build_database.xml Mon May 06 19:42:14 2019 -0400 +++ b/data_manager/kraken2_build_database.xml Fri May 24 13:39:08 2019 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0"?> -<tool id="kraken2_build_database" name="Kraken2 Database Builder" tool_type="manage_data" version="2.0.8_beta"> +<tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="2.0.8_beta"> <macros> <xml name="common_params"> <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" /> @@ -32,6 +32,7 @@ --minimizer-spaces ${database_type.minimizer_spaces} #else if $database_type.database_type == "custom" --threads \${GALAXY_SLOTS:-1} + ${database_type.skip_maps} --custom-fasta ${database_type.custom_fasta} --custom-database-name ${database_type.custom_database_name} --kmer-len ${database_type.kmer_len} @@ -68,6 +69,7 @@ <when value="custom"> <param name="custom_fasta" type="data" format="fasta" multiple="False" optional="true" label="Select history item" /> <param name="custom_database_name" type="text" label="Name for this database" /> + <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." /> <expand macro="common_params" /> </when> @@ -76,6 +78,15 @@ <outputs> <data name="out_file" format="data_manager_json" /> </outputs> + <tests> + <test> + <param name="database_type" value="custom" /> + <param name="custom_fasta" value="adapter.fa" /> + <param name="custom_database_name" value="database" /> + <param name="skip_maps" value="true" /> + <output name="out_file" value="kraken2_custom_data_manager.json" /> + </test> + </tests> <help> </help> <citations>
--- a/data_manager_conf.xml Mon May 06 19:42:14 2019 -0400 +++ b/data_manager_conf.xml Fri May 24 13:39:08 2019 -0400 @@ -1,18 +1,18 @@ <data_managers> - <data_manager tool_file="data_manager/kraken2_build_database.xml" id="kraken2_build_database" version="1.0"> - <data_table name="kraken2_databases"> - <output> - <column name="value"/> - <column name="name"/> - <column name="path" output_ref="out_file"> - <move type="directory"> - <source>${path}</source> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kraken2_databases/${path}</target> - </move> + <data_manager tool_file="data_manager/kraken2_build_database.xml" id="kraken2_build_database" version="2.0.8_beta"> + <data_table name="kraken2_databases"> + <output> + <column name="value"/> + <column name="name"/> + <column name="path" output_ref="out_file"> + <move type="directory"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kraken2_databases/${path}</target> + </move> <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/kraken2_databases/${path}</value_translation> - <value_translation type="function">abspath</value_translation> - </column> - </output> - </data_table> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> </data_manager> </data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/adapter.fa Fri May 24 13:39:08 2019 -0400 @@ -0,0 +1,2 @@ +>sequence16|kraken:taxid|32630 Adapter sequence +CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA