diff data_manager/kraken2_build_database.py @ 6:9002633b4737 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 5d74f13e59c4a2862c108ac1a08c067b0cfb2d97
author iuc
date Fri, 24 Jun 2022 12:44:33 +0000
parents 2f27f3b86827
children 90b4d4f0a3a4
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py	Mon Nov 08 15:40:34 2021 +0000
+++ b/data_manager/kraken2_build_database.py	Fri Jun 24 12:44:33 2022 +0000
@@ -54,9 +54,17 @@
 
 
 class StandardPrebuiltSizes(Enum):
-    full = 'full'
-    gb_16 = '16'
-    gb_8 = '8'
+    viral = "viral"
+    minusb = "minusb"
+    standard = "standard"
+    standard_08gb = "standard_08gb"
+    standard_16gb = "standard_16gb"
+    pluspf = "pluspf"
+    pluspf_08gb = "pluspf_08gb"
+    pluspf_16gb = "pluspf_16gb"
+    pluspfp = "pluspfp"
+    pluspfp_08gb = "pluspfp_08gb"
+    pluspfp_16gb = "pluspfp_16gb"
 
     def __str__(self):
         return self.value
@@ -122,36 +130,47 @@
     return data_table_entry
 
 
-def kraken2_build_standard_prebuilt(standard_prebuilt_size, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME):
+def kraken2_build_standard_prebuilt(prebuilt_db, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME):
 
     now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
 
+    prebuild_name = {
+        'viral': "Viral",
+        'minusb': "MinusB (archaea, viral, plasmid, human, UniVec_Core)",
+        'standard': "Standard-Full (archaea, bacteria, viral, plasmid, human,UniVec_Core)",
+        'standard_08gb': "Standard-8 (Standard with DB capped at 8 GB)",
+        'standard_16gb': "Standard-16 (Standard with DB capped at 16 GB)",
+        'pluspf': "PlusPF (Standard plus protozoa and fungi)",
+        'pluspf_08gb': "PlusPF-8 (PlusPF with DB capped at 8 GB)",
+        'pluspf_16gb': "PlusPF-16 (PlusPF with DB capped at 16 GB)",
+        'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)",
+        'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)",
+        'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)"
+    }
+
     database_value = "_".join([
         now,
         "standard_prebuilt",
-        standard_prebuilt_size
+        prebuilt_db,
+        prebuilt_date
     ])
 
     database_name = " ".join([
-        "Standard (Prebuilt)",
-        standard_prebuilt_size,
-        "(Downloaded:",
+        "Prebuilt Refseq indexes: ",
+        prebuild_name[prebuilt_db],
+        "(Version: ",
+        prebuilt_date,
+        "- Downloaded:",
         now + ")"
     ])
 
     database_path = database_value
 
-    size_to_url_str = {
-        'full': '',
-        '16': '_16gb',
-        '8': '_8gb',
-    }
     # we may need to let the user choose the date when new DBs are posted.
     date_url_str = prebuilt_date.replace('-', '')
-    standard_prebuilt_size_url = size_to_url_str[standard_prebuilt_size]
     # download the pre-built database
     try:
-        download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard%s_%s.tar.gz' % (standard_prebuilt_size_url, date_url_str)
+        download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_%s_%s.tar.gz' % (prebuilt_db, date_url_str)
         src = urlopen(download_url)
     except URLError as e:
         print('url: ' + download_url, file=sys.stderr)
@@ -369,7 +388,7 @@
     parser.add_argument('--threads', dest='threads', default=1, help='threads')
     parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build')
     parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)')
-    parser.add_argument('--standard-prebuilt-size', dest='standard_prebuilt_size', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Size of standard prebuilt database to download (only applies to --database-type standard_prebuilt. Options are: "8", "16", "full".)')
+    parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.')
     parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.')
     parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)')
     parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)')
@@ -408,7 +427,7 @@
         )
     elif str(args.database_type) == 'standard_prebuilt':
         data_manager_output = kraken2_build_standard_prebuilt(
-            str(args.standard_prebuilt_size),
+            str(args.prebuilt_db),
             str(args.prebuilt_date),
             target_directory
         )