comparison data_manager/kraken2_build_database.py @ 6:9002633b4737 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 5d74f13e59c4a2862c108ac1a08c067b0cfb2d97
author iuc
date Fri, 24 Jun 2022 12:44:33 +0000
parents 2f27f3b86827
children 90b4d4f0a3a4
comparison
equal deleted inserted replaced
5:2f27f3b86827 6:9002633b4737
52 def __str__(self): 52 def __str__(self):
53 return self.value 53 return self.value
54 54
55 55
56 class StandardPrebuiltSizes(Enum): 56 class StandardPrebuiltSizes(Enum):
57 full = 'full' 57 viral = "viral"
58 gb_16 = '16' 58 minusb = "minusb"
59 gb_8 = '8' 59 standard = "standard"
60 standard_08gb = "standard_08gb"
61 standard_16gb = "standard_16gb"
62 pluspf = "pluspf"
63 pluspf_08gb = "pluspf_08gb"
64 pluspf_16gb = "pluspf_16gb"
65 pluspfp = "pluspfp"
66 pluspfp_08gb = "pluspfp_08gb"
67 pluspfp_16gb = "pluspfp_16gb"
60 68
61 def __str__(self): 69 def __str__(self):
62 return self.value 70 return self.value
63 71
64 72
120 } 128 }
121 129
122 return data_table_entry 130 return data_table_entry
123 131
124 132
125 def kraken2_build_standard_prebuilt(standard_prebuilt_size, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME): 133 def kraken2_build_standard_prebuilt(prebuilt_db, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME):
126 134
127 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") 135 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
136
137 prebuild_name = {
138 'viral': "Viral",
139 'minusb': "MinusB (archaea, viral, plasmid, human, UniVec_Core)",
140 'standard': "Standard-Full (archaea, bacteria, viral, plasmid, human,UniVec_Core)",
141 'standard_08gb': "Standard-8 (Standard with DB capped at 8 GB)",
142 'standard_16gb': "Standard-16 (Standard with DB capped at 16 GB)",
143 'pluspf': "PlusPF (Standard plus protozoa and fungi)",
144 'pluspf_08gb': "PlusPF-8 (PlusPF with DB capped at 8 GB)",
145 'pluspf_16gb': "PlusPF-16 (PlusPF with DB capped at 16 GB)",
146 'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)",
147 'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)",
148 'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)"
149 }
128 150
129 database_value = "_".join([ 151 database_value = "_".join([
130 now, 152 now,
131 "standard_prebuilt", 153 "standard_prebuilt",
132 standard_prebuilt_size 154 prebuilt_db,
155 prebuilt_date
133 ]) 156 ])
134 157
135 database_name = " ".join([ 158 database_name = " ".join([
136 "Standard (Prebuilt)", 159 "Prebuilt Refseq indexes: ",
137 standard_prebuilt_size, 160 prebuild_name[prebuilt_db],
138 "(Downloaded:", 161 "(Version: ",
162 prebuilt_date,
163 "- Downloaded:",
139 now + ")" 164 now + ")"
140 ]) 165 ])
141 166
142 database_path = database_value 167 database_path = database_value
143 168
144 size_to_url_str = {
145 'full': '',
146 '16': '_16gb',
147 '8': '_8gb',
148 }
149 # we may need to let the user choose the date when new DBs are posted. 169 # we may need to let the user choose the date when new DBs are posted.
150 date_url_str = prebuilt_date.replace('-', '') 170 date_url_str = prebuilt_date.replace('-', '')
151 standard_prebuilt_size_url = size_to_url_str[standard_prebuilt_size]
152 # download the pre-built database 171 # download the pre-built database
153 try: 172 try:
154 download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard%s_%s.tar.gz' % (standard_prebuilt_size_url, date_url_str) 173 download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_%s_%s.tar.gz' % (prebuilt_db, date_url_str)
155 src = urlopen(download_url) 174 src = urlopen(download_url)
156 except URLError as e: 175 except URLError as e:
157 print('url: ' + download_url, file=sys.stderr) 176 print('url: ' + download_url, file=sys.stderr)
158 print(e, file=sys.stderr) 177 print(e, file=sys.stderr)
159 exit(1) 178 exit(1)
367 parser.add_argument('--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces') 386 parser.add_argument('--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces')
368 parser.add_argument('--load-factor', dest='load_factor', type=float, default=0.7, help='load factor') 387 parser.add_argument('--load-factor', dest='load_factor', type=float, default=0.7, help='load factor')
369 parser.add_argument('--threads', dest='threads', default=1, help='threads') 388 parser.add_argument('--threads', dest='threads', default=1, help='threads')
370 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') 389 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build')
371 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') 390 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)')
372 parser.add_argument('--standard-prebuilt-size', dest='standard_prebuilt_size', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Size of standard prebuilt database to download (only applies to --database-type standard_prebuilt. Options are: "8", "16", "full".)') 391 parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.')
373 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') 392 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.')
374 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') 393 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)')
375 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') 394 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)')
376 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') 395 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)')
377 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') 396 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='')
406 kraken2_args, 425 kraken2_args,
407 target_directory, 426 target_directory,
408 ) 427 )
409 elif str(args.database_type) == 'standard_prebuilt': 428 elif str(args.database_type) == 'standard_prebuilt':
410 data_manager_output = kraken2_build_standard_prebuilt( 429 data_manager_output = kraken2_build_standard_prebuilt(
411 str(args.standard_prebuilt_size), 430 str(args.prebuilt_db),
412 str(args.prebuilt_date), 431 str(args.prebuilt_date),
413 target_directory 432 target_directory
414 ) 433 )
415 elif str(args.database_type) == 'minikraken': 434 elif str(args.database_type) == 'minikraken':
416 data_manager_output = kraken2_build_minikraken( 435 data_manager_output = kraken2_build_minikraken(