Mercurial > repos > fubar > jbrowse2
changeset 56:c0097a584a8a draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 4b5df41484f6bdf316edaf95b53c92d328ec1674
author | fubar |
---|---|
date | Tue, 19 Mar 2024 02:33:40 +0000 |
parents | 469c0f6d87d7 |
children | 94264fe60478 |
files | jbrowse2.py repeathumfish.txt |
diffstat | 2 files changed, 36 insertions(+), 124 deletions(-) [+] |
line wrap: on
line diff
--- a/jbrowse2.py Sat Mar 16 06:50:17 2024 +0000 +++ b/jbrowse2.py Tue Mar 19 02:33:40 2024 +0000 @@ -19,7 +19,7 @@ logging.basicConfig(level=logging.INFO) log = logging.getLogger("jbrowse") -JB2VER = "v2.10.2" +JB2VER = "v2.10.3" # version pinned for cloning TODAY = datetime.datetime.now().strftime("%Y-%m-%d") @@ -471,8 +471,6 @@ self.genome_sequence_adapter = assem["sequence"]["adapter"] self.genome_firstcontig = None if not useuri: - # https://lazarus.name/jbrowse/fish/bigwig_0_coverage_bedgraph_cov_count_count_bw.bigwig - # https://lazarus.name/jbrowse/fish/klBraLanc5.haps_combined.decontam.20230620.fasta.fa.gz fl = open(fapath, "r").readline() fls = fl.strip().split(">") if len(fls) > 1: @@ -606,11 +604,13 @@ uri = data else: uri = trackData["hic_url"] + categ = trackData['category'] trackDict = { "type": "HicTrack", "trackId": tId, "name": uri, "assemblyNames": [self.genome_name], + "category": [categ,], "adapter": { "type": "HicAdapter", "hicLocation": uri, @@ -643,7 +643,7 @@ } ] } - + categ = trackData['category'] fname = "%s.bed" % tId dest = "%s/%s" % (self.outdir, fname) gname = self.genome_name @@ -669,6 +669,7 @@ "type": "MafTrack", "trackId": tId, "name": trackData["name"], + "category": [categ,], "adapter": { "type": "MafTabixAdapter", "samples": samples, @@ -738,11 +739,13 @@ self._sort_gff(gff3, dest) url = url + ".gz" tId = trackData["label"] + categ = trackData['category'] trackDict = { "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], + "category": [categ,], "adapter": { "type": "Gff3TabixAdapter", "gffGzLocation": { @@ -791,10 +794,12 @@ self.subprocess_check_call(cmd) bwloc = {"uri": url} tId = trackData["label"] + categ = trackData['category'] trackDict = { "type": "QuantitativeTrack", "trackId": tId, "name": trackData["name"], + "category": [categ,], "assemblyNames": [ self.genome_name, ], @@ -818,6 +823,7 @@ tId = trackData["label"] useuri = trackData["useuri"].lower() == "yes" bindex = bam_index + categ = trackData['category'] if useuri: url = data else: @@ -845,6 +851,7 @@ "type": "AlignmentsTrack", "trackId": tId, "name": trackData["name"], + "category": [categ,], "assemblyNames": [self.genome_name], "adapter": { "type": "BamAdapter", @@ -869,6 +876,7 @@ def add_cram(self, data, trackData, cram_index=None, **kwargs): tId = trackData["label"] + categ = trackData['category'] useuri = trackData["useuri"].lower() == "yes" if useuri: url = data @@ -892,6 +900,7 @@ "type": "AlignmentsTrack", "trackId": tId, "name": trackData["name"], + "category": [categ,], "assemblyNames": [self.genome_name], "adapter": { "type": "CramAdapter", @@ -919,7 +928,7 @@ # self.giURL, # trackData["metadata"]["dataset_id"], # ) - + categ = trackData['category'] useuri = trackData["useuri"].lower() == "yes" if useuri: url = data @@ -935,6 +944,7 @@ "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], + "category": [categ,], "adapter": { "type": "VcfTabixAdapter", "vcfGzLocation": { @@ -993,11 +1003,13 @@ dest = "%s/%s" % (self.outdir, url) self._sort_gff(data, dest) tId = trackData["label"] + categ = trackData['category'] trackDict = { "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], + "category": [categ,], "adapter": { "type": "Gff3TabixAdapter", "gffGzLocation": { @@ -1027,6 +1039,7 @@ def add_bed(self, data, ext, trackData): tId = trackData["label"] + categ = trackData['category'] useuri = trackData["useuri"].lower() == "yes" if useuri: url = data @@ -1040,6 +1053,7 @@ "name": trackData["name"], "assemblyNames": [self.genome_name], "adapter": { + "category": [categ,], "type": "BedTabixAdapter", "bedGzLocation": { "uri": url, @@ -1073,6 +1087,7 @@ def add_paf(self, data, trackData, pafOpts, **kwargs): tname = trackData["name"] tId = trackData["label"] + categ = trackData['category'] pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")] pgpaths = [x.strip() for x in pafOpts["genome"].split(",")] passnames = [self.genome_name] # always first @@ -1099,22 +1114,23 @@ "type": "SyntenyTrack", "trackId": tId, "assemblyNames": passnames, + "category": [categ,], "name": tname, "adapter": { "type": "PAFAdapter", "pafLocation": {"uri": url}, "assemblyNames": passnames, }, - # "displays": [ - # { - # "type": "LinearSyntenyDisplay", - # "displayId": "%s-LinearSyntenyDisplay" % tId, - # }, - # { - # "type": "DotPlotDisplay", - # "displayId": "%s-DotPlotDisplay" % tId, - # }, - # ], + "displays": [ + { + "type": "LinearSyntenyDisplay", + "displayId": "%s-LinearSyntenyDisplay" % tId, + }, + { + "type": "DotPlotDisplay", + "displayId": "%s-DotPlotDisplay" % tId, + }, + ], } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json @@ -1362,11 +1378,13 @@ with open(config_path, "w") as config_file: json.dump(self.config_json, config_file, indent=2) - def clone_jbrowse(self): + def clone_jbrowse(self, realclone=True): """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now""" dest = self.outdir - # self.subprocess_check_call(['jbrowse', 'create', dest, '--tag', f"{JB_VER}"]) - shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) + if realclone: + self.subprocess_check_call(['jbrowse', 'create', dest,"-f", '--tag', f"{JB2VER}"]) + else: + shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) for fn in [ "asset-manifest.json", "favicon.ico",
--- a/repeathumfish.txt Sat Mar 16 06:50:17 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ - Calypte Anna - - ================================================== - file name: rm_input.fasta - sequences: 159 - total length: 1059687259 bp (1043590636 bp excl N/X-runs) - GC level: 41.49 % - bases masked: 79638300 bp ( 7.63 %) - ================================================== - number of length percentage - elements* occupied of sequence - -------------------------------------------------- - SINEs: 32491 3044030 bp 0.29 % - ALUs 0 0 bp 0.00 % - MIRs 23309 1792450 bp 0.17 % - - LINEs: 168150 56100653 bp 5.38 % - LINE1 446 51456 bp 0.00 % - LINE2 3520 419909 bp 0.04 % - L3/CR1 161974 55206716 bp 5.29 % - - LTR elements: 3415 1802798 bp 0.17 % - ERVL 649 266145 bp 0.03 % - ERVL-MaLRs 15 814 bp 0.00 % - ERV_classI 1341 795364 bp 0.08 % - ERV_classII 1159 705705 bp 0.07 % - - DNA elements: 12073 1651025 bp 0.16 % - hAT-Charlie 338 55376 bp 0.01 % - TcMar-Tigger 787 83392 bp 0.01 % - - Unclassified: 3325 445147 bp 0.04 % - - Total interspersed repeats: 63043653 bp 6.04 % - - - Small RNA: 5543 745245 bp 0.07 % - - Satellites: 109 8273 bp 0.00 % - Simple repeats: 284418 12956749 bp 1.24 % - Low complexity: 61652 3494878 bp 0.33 % - ================================================== - - * most repeats fragmented by insertions or deletions - have been counted as one element - Runs of >=20 X/Ns in query were excluded in % calcs - - - The query species was assumed to be homo sapiens - RepeatMasker version 4.1.5 , default mode - - run with rmblastn version 2.13.0+ - FamDB: CONS-Dfam_3.7 - -Amphioxus -================================================== -file name: rm_input.fasta -sequences: 96 -total length: 458494623 bp (458485423 bp excl N/X-runs) -GC level: 41.61 % -bases masked: 9756877 bp ( 2.13 %) -================================================== - number of length percentage - elements* occupied of sequence --------------------------------------------------- -SINEs: 10212 946135 bp 0.21 % - ALUs 0 0 bp 0.00 % - MIRs 5121 527992 bp 0.12 % - -LINEs: 4019 622661 bp 0.14 % - LINE1 179 11937 bp 0.00 % - LINE2 662 63166 bp 0.01 % - L3/CR1 2144 286741 bp 0.06 % - -LTR elements: 689 138371 bp 0.03 % - ERVL 20 1005 bp 0.00 % - ERVL-MaLRs 4 322 bp 0.00 % - ERV_classI 584 118702 bp 0.03 % - ERV_classII 28 1778 bp 0.00 % - -DNA elements: 850 57478 bp 0.01 % - hAT-Charlie 46 2708 bp 0.00 % - TcMar-Tigger 73 5236 bp 0.00 % - -Unclassified: 517 41430 bp 0.01 % - -Total interspersed repeats: 1806075 bp 0.39 % - - -Small RNA: 5487 857233 bp 0.19 % - -Satellites: 629 50630 bp 0.01 % -Simple repeats: 115914 6556304 bp 1.43 % -Low complexity: 9124 485904 bp 0.11 % -================================================== - -* most repeats fragmented by insertions or deletions - have been counted as one element - Runs of >=20 X/Ns in query were excluded in % calcs - - -The query species was assumed to be homo sapiens -RepeatMasker version 4.1.5 , default mode - -run with rmblastn version 2.13.0+ -FamDB: CONS-Dfam_3.7