Mercurial > repos > iuc > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_database.py @ 6:9002633b4737 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 5d74f13e59c4a2862c108ac1a08c067b0cfb2d97
author | iuc |
---|---|
date | Fri, 24 Jun 2022 12:44:33 +0000 |
parents | 2f27f3b86827 |
children | 90b4d4f0a3a4 |
comparison
equal
deleted
inserted
replaced
5:2f27f3b86827 | 6:9002633b4737 |
---|---|
52 def __str__(self): | 52 def __str__(self): |
53 return self.value | 53 return self.value |
54 | 54 |
55 | 55 |
56 class StandardPrebuiltSizes(Enum): | 56 class StandardPrebuiltSizes(Enum): |
57 full = 'full' | 57 viral = "viral" |
58 gb_16 = '16' | 58 minusb = "minusb" |
59 gb_8 = '8' | 59 standard = "standard" |
60 standard_08gb = "standard_08gb" | |
61 standard_16gb = "standard_16gb" | |
62 pluspf = "pluspf" | |
63 pluspf_08gb = "pluspf_08gb" | |
64 pluspf_16gb = "pluspf_16gb" | |
65 pluspfp = "pluspfp" | |
66 pluspfp_08gb = "pluspfp_08gb" | |
67 pluspfp_16gb = "pluspfp_16gb" | |
60 | 68 |
61 def __str__(self): | 69 def __str__(self): |
62 return self.value | 70 return self.value |
63 | 71 |
64 | 72 |
120 } | 128 } |
121 | 129 |
122 return data_table_entry | 130 return data_table_entry |
123 | 131 |
124 | 132 |
125 def kraken2_build_standard_prebuilt(standard_prebuilt_size, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME): | 133 def kraken2_build_standard_prebuilt(prebuilt_db, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME): |
126 | 134 |
127 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | 135 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") |
136 | |
137 prebuild_name = { | |
138 'viral': "Viral", | |
139 'minusb': "MinusB (archaea, viral, plasmid, human, UniVec_Core)", | |
140 'standard': "Standard-Full (archaea, bacteria, viral, plasmid, human,UniVec_Core)", | |
141 'standard_08gb': "Standard-8 (Standard with DB capped at 8 GB)", | |
142 'standard_16gb': "Standard-16 (Standard with DB capped at 16 GB)", | |
143 'pluspf': "PlusPF (Standard plus protozoa and fungi)", | |
144 'pluspf_08gb': "PlusPF-8 (PlusPF with DB capped at 8 GB)", | |
145 'pluspf_16gb': "PlusPF-16 (PlusPF with DB capped at 16 GB)", | |
146 'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)", | |
147 'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)", | |
148 'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)" | |
149 } | |
128 | 150 |
129 database_value = "_".join([ | 151 database_value = "_".join([ |
130 now, | 152 now, |
131 "standard_prebuilt", | 153 "standard_prebuilt", |
132 standard_prebuilt_size | 154 prebuilt_db, |
155 prebuilt_date | |
133 ]) | 156 ]) |
134 | 157 |
135 database_name = " ".join([ | 158 database_name = " ".join([ |
136 "Standard (Prebuilt)", | 159 "Prebuilt Refseq indexes: ", |
137 standard_prebuilt_size, | 160 prebuild_name[prebuilt_db], |
138 "(Downloaded:", | 161 "(Version: ", |
162 prebuilt_date, | |
163 "- Downloaded:", | |
139 now + ")" | 164 now + ")" |
140 ]) | 165 ]) |
141 | 166 |
142 database_path = database_value | 167 database_path = database_value |
143 | 168 |
144 size_to_url_str = { | |
145 'full': '', | |
146 '16': '_16gb', | |
147 '8': '_8gb', | |
148 } | |
149 # we may need to let the user choose the date when new DBs are posted. | 169 # we may need to let the user choose the date when new DBs are posted. |
150 date_url_str = prebuilt_date.replace('-', '') | 170 date_url_str = prebuilt_date.replace('-', '') |
151 standard_prebuilt_size_url = size_to_url_str[standard_prebuilt_size] | |
152 # download the pre-built database | 171 # download the pre-built database |
153 try: | 172 try: |
154 download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard%s_%s.tar.gz' % (standard_prebuilt_size_url, date_url_str) | 173 download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_%s_%s.tar.gz' % (prebuilt_db, date_url_str) |
155 src = urlopen(download_url) | 174 src = urlopen(download_url) |
156 except URLError as e: | 175 except URLError as e: |
157 print('url: ' + download_url, file=sys.stderr) | 176 print('url: ' + download_url, file=sys.stderr) |
158 print(e, file=sys.stderr) | 177 print(e, file=sys.stderr) |
159 exit(1) | 178 exit(1) |
367 parser.add_argument('--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces') | 386 parser.add_argument('--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces') |
368 parser.add_argument('--load-factor', dest='load_factor', type=float, default=0.7, help='load factor') | 387 parser.add_argument('--load-factor', dest='load_factor', type=float, default=0.7, help='load factor') |
369 parser.add_argument('--threads', dest='threads', default=1, help='threads') | 388 parser.add_argument('--threads', dest='threads', default=1, help='threads') |
370 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') | 389 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') |
371 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') | 390 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') |
372 parser.add_argument('--standard-prebuilt-size', dest='standard_prebuilt_size', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Size of standard prebuilt database to download (only applies to --database-type standard_prebuilt. Options are: "8", "16", "full".)') | 391 parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.') |
373 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') | 392 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') |
374 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') | 393 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') |
375 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') | 394 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') |
376 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') | 395 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') |
377 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') | 396 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') |
406 kraken2_args, | 425 kraken2_args, |
407 target_directory, | 426 target_directory, |
408 ) | 427 ) |
409 elif str(args.database_type) == 'standard_prebuilt': | 428 elif str(args.database_type) == 'standard_prebuilt': |
410 data_manager_output = kraken2_build_standard_prebuilt( | 429 data_manager_output = kraken2_build_standard_prebuilt( |
411 str(args.standard_prebuilt_size), | 430 str(args.prebuilt_db), |
412 str(args.prebuilt_date), | 431 str(args.prebuilt_date), |
413 target_directory | 432 target_directory |
414 ) | 433 ) |
415 elif str(args.database_type) == 'minikraken': | 434 elif str(args.database_type) == 'minikraken': |
416 data_manager_output = kraken2_build_minikraken( | 435 data_manager_output = kraken2_build_minikraken( |