changeset 56:c0097a584a8a draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 4b5df41484f6bdf316edaf95b53c92d328ec1674
author fubar
date Tue, 19 Mar 2024 02:33:40 +0000
parents 469c0f6d87d7
children 94264fe60478
files jbrowse2.py repeathumfish.txt
diffstat 2 files changed, 36 insertions(+), 124 deletions(-) [+]
line wrap: on
line diff
--- a/jbrowse2.py	Sat Mar 16 06:50:17 2024 +0000
+++ b/jbrowse2.py	Tue Mar 19 02:33:40 2024 +0000
@@ -19,7 +19,7 @@
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("jbrowse")
 
-JB2VER = "v2.10.2"
+JB2VER = "v2.10.3"
 # version pinned for cloning
 
 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
@@ -471,8 +471,6 @@
                     self.genome_sequence_adapter = assem["sequence"]["adapter"]
                     self.genome_firstcontig = None
                     if not useuri:
-                        # https://lazarus.name/jbrowse/fish/bigwig_0_coverage_bedgraph_cov_count_count_bw.bigwig
-                        # https://lazarus.name/jbrowse/fish/klBraLanc5.haps_combined.decontam.20230620.fasta.fa.gz
                         fl = open(fapath, "r").readline()
                         fls = fl.strip().split(">")
                         if len(fls) > 1:
@@ -606,11 +604,13 @@
             uri = data
         else:
             uri = trackData["hic_url"]
+        categ = trackData['category']
         trackDict = {
             "type": "HicTrack",
             "trackId": tId,
             "name": uri,
             "assemblyNames": [self.genome_name],
+            "category": [categ,],
             "adapter": {
                 "type": "HicAdapter",
                 "hicLocation": uri,
@@ -643,7 +643,7 @@
                 }
             ]
         }
-
+        categ = trackData['category']
         fname = "%s.bed" % tId
         dest = "%s/%s" % (self.outdir, fname)
         gname = self.genome_name
@@ -669,6 +669,7 @@
             "type": "MafTrack",
             "trackId": tId,
             "name": trackData["name"],
+            "category": [categ,],
             "adapter": {
                 "type": "MafTabixAdapter",
                 "samples": samples,
@@ -738,11 +739,13 @@
         self._sort_gff(gff3, dest)
         url = url + ".gz"
         tId = trackData["label"]
+        categ = trackData['category']
         trackDict = {
             "type": "FeatureTrack",
             "trackId": tId,
             "name": trackData["name"],
             "assemblyNames": [self.genome_name],
+            "category": [categ,],
             "adapter": {
                 "type": "Gff3TabixAdapter",
                 "gffGzLocation": {
@@ -791,10 +794,12 @@
             self.subprocess_check_call(cmd)
         bwloc = {"uri": url}
         tId = trackData["label"]
+        categ = trackData['category']
         trackDict = {
             "type": "QuantitativeTrack",
             "trackId": tId,
             "name": trackData["name"],
+            "category": [categ,],
             "assemblyNames": [
                 self.genome_name,
             ],
@@ -818,6 +823,7 @@
         tId = trackData["label"]
         useuri = trackData["useuri"].lower() == "yes"
         bindex = bam_index
+        categ = trackData['category']
         if useuri:
             url = data
         else:
@@ -845,6 +851,7 @@
             "type": "AlignmentsTrack",
             "trackId": tId,
             "name": trackData["name"],
+            "category": [categ,],
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "BamAdapter",
@@ -869,6 +876,7 @@
 
     def add_cram(self, data, trackData, cram_index=None, **kwargs):
         tId = trackData["label"]
+        categ = trackData['category']
         useuri = trackData["useuri"].lower() == "yes"
         if useuri:
             url = data
@@ -892,6 +900,7 @@
             "type": "AlignmentsTrack",
             "trackId": tId,
             "name": trackData["name"],
+            "category": [categ,],
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "CramAdapter",
@@ -919,7 +928,7 @@
         # self.giURL,
         # trackData["metadata"]["dataset_id"],
         # )
-
+        categ = trackData['category']
         useuri = trackData["useuri"].lower() == "yes"
         if useuri:
             url = data
@@ -935,6 +944,7 @@
             "trackId": tId,
             "name": trackData["name"],
             "assemblyNames": [self.genome_name],
+            "category": [categ,],
             "adapter": {
                 "type": "VcfTabixAdapter",
                 "vcfGzLocation": {
@@ -993,11 +1003,13 @@
             dest = "%s/%s" % (self.outdir, url)
             self._sort_gff(data, dest)
         tId = trackData["label"]
+        categ = trackData['category']
         trackDict = {
             "type": "FeatureTrack",
             "trackId": tId,
             "name": trackData["name"],
             "assemblyNames": [self.genome_name],
+            "category": [categ,],
             "adapter": {
                 "type": "Gff3TabixAdapter",
                 "gffGzLocation": {
@@ -1027,6 +1039,7 @@
 
     def add_bed(self, data, ext, trackData):
         tId = trackData["label"]
+        categ = trackData['category']
         useuri = trackData["useuri"].lower() == "yes"
         if useuri:
             url = data
@@ -1040,6 +1053,7 @@
             "name": trackData["name"],
             "assemblyNames": [self.genome_name],
             "adapter": {
+            "category": [categ,],
                 "type": "BedTabixAdapter",
                 "bedGzLocation": {
                     "uri": url,
@@ -1073,6 +1087,7 @@
     def add_paf(self, data, trackData, pafOpts, **kwargs):
         tname = trackData["name"]
         tId = trackData["label"]
+        categ = trackData['category']
         pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")]
         pgpaths = [x.strip() for x in pafOpts["genome"].split(",")]
         passnames = [self.genome_name]  # always first
@@ -1099,22 +1114,23 @@
             "type": "SyntenyTrack",
             "trackId": tId,
             "assemblyNames": passnames,
+            "category": [categ,],
             "name": tname,
             "adapter": {
                 "type": "PAFAdapter",
                 "pafLocation": {"uri": url},
                 "assemblyNames": passnames,
             },
-            # "displays": [
-            # {
-            # "type": "LinearSyntenyDisplay",
-            # "displayId": "%s-LinearSyntenyDisplay" % tId,
-            # },
-            # {
-            # "type": "DotPlotDisplay",
-            # "displayId": "%s-DotPlotDisplay" % tId,
-            # },
-            # ],
+            "displays": [
+            {
+            "type": "LinearSyntenyDisplay",
+            "displayId": "%s-LinearSyntenyDisplay" % tId,
+            },
+            {
+            "type": "DotPlotDisplay",
+            "displayId": "%s-DotPlotDisplay" % tId,
+            },
+            ],
         }
         style_json = self._prepare_track_style(trackDict)
         trackDict["style"] = style_json
@@ -1362,11 +1378,13 @@
         with open(config_path, "w") as config_file:
             json.dump(self.config_json, config_file, indent=2)
 
-    def clone_jbrowse(self):
+    def clone_jbrowse(self, realclone=True):
         """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now"""
         dest = self.outdir
-        # self.subprocess_check_call(['jbrowse', 'create', dest, '--tag', f"{JB_VER}"])
-        shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True)
+        if realclone:
+            self.subprocess_check_call(['jbrowse', 'create', dest,"-f", '--tag', f"{JB2VER}"])
+        else:
+            shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True)
         for fn in [
             "asset-manifest.json",
             "favicon.ico",
--- a/repeathumfish.txt	Sat Mar 16 06:50:17 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-     Calypte Anna
-
-     ==================================================
-     file name: rm_input.fasta
-     sequences:           159
-     total length: 1059687259 bp  (1043590636 bp excl N/X-runs)
-     GC level:         41.49 %
-     bases masked:   79638300 bp ( 7.63 %)
-     ==================================================
-                    number of      length   percentage
-                    elements*    occupied  of sequence
-     --------------------------------------------------
-     SINEs:            32491      3044030 bp    0.29 %
-           ALUs            0            0 bp    0.00 %
-           MIRs        23309      1792450 bp    0.17 %
-
-     LINEs:           168150     56100653 bp    5.38 %
-           LINE1         446        51456 bp    0.00 %
-           LINE2        3520       419909 bp    0.04 %
-           L3/CR1     161974     55206716 bp    5.29 %
-
-     LTR elements:      3415      1802798 bp    0.17 %
-           ERVL          649       266145 bp    0.03 %
-           ERVL-MaLRs     15          814 bp    0.00 %
-           ERV_classI   1341       795364 bp    0.08 %
-           ERV_classII  1159       705705 bp    0.07 %
-
-     DNA elements:     12073      1651025 bp    0.16 %
-          hAT-Charlie    338        55376 bp    0.01 %
-          TcMar-Tigger   787        83392 bp    0.01 %
-
-     Unclassified:      3325       445147 bp    0.04 %
-
-     Total interspersed repeats: 63043653 bp    6.04 %
-
-
-     Small RNA:         5543       745245 bp    0.07 %
-
-     Satellites:         109         8273 bp    0.00 %
-     Simple repeats:  284418     12956749 bp    1.24 %
-     Low complexity:   61652      3494878 bp    0.33 %
-     ==================================================
-
-     * most repeats fragmented by insertions or deletions
-       have been counted as one element
-       Runs of >=20 X/Ns in query were excluded in % calcs
-
-
-     The query species was assumed to be homo sapiens
-     RepeatMasker version 4.1.5 , default mode
-
-     run with rmblastn version 2.13.0+
-     FamDB: CONS-Dfam_3.7
-
-Amphioxus
-==================================================
-file name: rm_input.fasta
-sequences:            96
-total length:  458494623 bp  (458485423 bp excl N/X-runs)
-GC level:         41.61 %
-bases masked:    9756877 bp ( 2.13 %)
-==================================================
-               number of      length   percentage
-               elements*    occupied  of sequence
---------------------------------------------------
-SINEs:            10212       946135 bp    0.21 %
-      ALUs            0            0 bp    0.00 %
-      MIRs         5121       527992 bp    0.12 %
-
-LINEs:             4019       622661 bp    0.14 %
-      LINE1         179        11937 bp    0.00 %
-      LINE2         662        63166 bp    0.01 %
-      L3/CR1       2144       286741 bp    0.06 %
-
-LTR elements:       689       138371 bp    0.03 %
-      ERVL           20         1005 bp    0.00 %
-      ERVL-MaLRs      4          322 bp    0.00 %
-      ERV_classI    584       118702 bp    0.03 %
-      ERV_classII    28         1778 bp    0.00 %
-
-DNA elements:       850        57478 bp    0.01 %
-     hAT-Charlie     46         2708 bp    0.00 %
-     TcMar-Tigger    73         5236 bp    0.00 %
-
-Unclassified:       517        41430 bp    0.01 %
-
-Total interspersed repeats:  1806075 bp    0.39 %
-
-
-Small RNA:         5487       857233 bp    0.19 %
-
-Satellites:         629        50630 bp    0.01 %
-Simple repeats:  115914      6556304 bp    1.43 %
-Low complexity:    9124       485904 bp    0.11 %
-==================================================
-
-* most repeats fragmented by insertions or deletions
-  have been counted as one element
-  Runs of >=20 X/Ns in query were excluded in % calcs
-
-
-The query species was assumed to be homo sapiens
-RepeatMasker version 4.1.5 , default mode
-
-run with rmblastn version 2.13.0+
-FamDB: CONS-Dfam_3.7