changeset 30:8f02a84ee278 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 48bc917d34af182e9158915862c8a35723660919
author fubar
date Wed, 21 Feb 2024 02:57:30 +0000
parents f728cf0df71d
children cb4b32ca9968
files autogenJB2.py autogenJB2.xml jbrowse2.py jbrowse2.xml macros.xml
diffstat 5 files changed, 296 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/autogenJB2.py	Wed Feb 21 02:57:30 2024 +0000
@@ -0,0 +1,147 @@
+import argparse
+import re
+import sys
+
+from jbrowse2 import jbrowseConnector as jbC
+
+
+def makeDefaultLocation(jc, defLoc=None):
+
+    refName = None
+    drdict = {
+        "reversed": False,
+        "assemblyName": jc.genome_name,
+        "start": 0,
+        "end": 100000,
+    }
+
+    if defLoc:
+        loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", defLoc)
+        # allow commas like 100,000 but ignore as integer
+        if loc_match:
+            refName = loc_match.group(1)
+            drdict["refName"] = refName
+            if loc_match.group(2) > "":
+                drdict["start"] = int(loc_match.group(2).replace(",", ""))
+            if loc_match.group(3) > "":
+                drdict["end"] = int(loc_match.group(3).replace(",", ""))
+        else:
+            jc.logging.info(
+                "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"
+                % defLoc
+            )
+    else:
+        drdict["refName"] = jc.genome_firstcontig
+    if drdict.get("refName", None):
+        jc.logging.info("@@@ defaultlocation %s for default session" % drdict)
+        return drdict
+    else:
+        jc.logging.info("@@@ no contig name found for default session - please add one!")
+        return None
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="", epilog="")
+    parser.add_argument("--sessname", help="Session name", default="AutoJBrowse")
+    parser.add_argument(
+        "--collection",
+        help="Collection of 'filepath, filename, filext' for JBrowse2",
+        default=[],
+        action="extend",
+    )
+    parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.0.1")
+    args = parser.parse_args()
+    sessName = args.sessname
+    flistList = [x.split(",") for x in args.collection]
+    if flistList:
+        listgenomes = [f for f in flistList if f[0].startswith("REFERENCE_")]
+        if len(listgenomes) > 0:
+            genome_paths = [
+                x[1] for x in listgenomes
+            ]  # expect genome_1_genomename.fasta etc
+            genome_names = [x[0].split("REFERENCE_")[1] for x in listgenomes]
+            jc = jbC(
+                outdir=args.outdir,
+                genomes=[
+                    {
+                        "path": x,
+                        "meta": {
+                            "name": genome_names[i],
+                        },
+                    }
+                    for i, x in enumerate(genome_paths)
+                ],
+            )
+            jc.process_genomes()
+            default_session_data = {
+                "visibility": {
+                    "default_on": [],
+                    "default_off": [],
+                },
+                "style": {},
+                "style_labels": {},
+            }
+            defLoc = makeDefaultLocation(jc)
+            listtracks = [f for f in flistList if not f[0].startswith("REFERENCE_")]
+            # foo.paf must have a foo_paf.fasta or fasta.gz to match
+            tnames = [x[0] for x in listtracks]
+            texts = [x[2] for x in listtracks]
+            for i, track in enumerate(listtracks):
+                if track[2] == "paf":
+                    refname = track[0] + "_paf.fasta"
+                    refdat = [x[1] for x in listtracks if x[0] == refname]
+                    if not refdat:
+                        jc.logging.warn(
+                            "!! No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf"
+                            % (refname, tnames[i])
+                        )
+                        sys.exit(3)
+                    else:
+                        track_conf = {
+                            "conf": {
+                                "options": {
+                                    "paf": {"genome": refdat, "genome_label": track[0]}
+                                }
+                            }
+                        }
+                else:
+                    track_conf = {}
+                track_conf["format"] = track[2]
+                track_conf["name"] = track[0]
+                track_conf["label"] = track[0]
+                track_conf["trackfiles"] = []
+                keys = jc.process_annotations(track_conf)
+
+                if keys:
+                    for key in keys:
+                        default_session_data["visibility"][
+                            track.attrib.get("visibility", "default_off")
+                        ].append(key)
+                        # if track_conf.get("style", None):
+                        # default_session_data["style"][key] = track_conf[
+                        # "style"
+                        # ]  # TODO do we need this anymore?
+                        # if track_conf.get("style_lables", None):
+                        # default_session_data["style_labels"][key] = track_conf.get(
+                        # "style_labels", None
+                        # )
+            # general_data = {
+            # "analytics": root.find("metadata/general/analytics").text,
+            # "primary_color": root.find("metadata/general/primary_color").text,
+            # "secondary_color": root.find("metadata/general/secondary_color").text,
+            # "tertiary_color": root.find("metadata/general/tertiary_color").text,
+            # "quaternary_color": root.find("metadata/general/quaternary_color").text,
+            # "font_size": root.find("metadata/general/font_size").text,
+            # }
+            # jc.add_general_configuration(general_data)
+            trackconf = jc.config_json.get("tracks", None)
+            if trackconf:
+                jc.config_json["tracks"].update(jc.tracksToAdd)
+            else:
+                jc.config_json["tracks"] = jc.tracksToAdd
+            jc.write_config()
+            defaultData = {"defaultLocation": defLoc, "session_name": sessName}
+            jc.add_default_session(defaultData)
+            # jc.text_index() not sure what broke here.
+    else:
+        sys.stderr.write("!! empty collection supplied - nothing to process")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/autogenJB2.xml	Wed Feb 21 02:57:30 2024 +0000
@@ -0,0 +1,141 @@
+ <tool id="autogenjb2" name="autogenjb2" version="2.10.0_0" profile="22.05">
+    <description>Files to JBrowse2</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edamInc"/>
+    <xrefs>
+        <xref type="bio.tools">jbrowse2</xref>
+    </xrefs>
+    <expand macro="requirements"/>
+    <version_command>python '${__tool_directory__}/autogenJB2.py' --version</version_command>
+    <command detect_errors="aggressive"><![CDATA[
+python '$__tool_directory__/autogenJB2.py'
+#for $key in $jbrowseme.keys():
+--collection '$key,$jbrowseme[$key],$jbrowseme[$key].ext'
+#end for
+--sessName "Autogen JBrowse"
+  ]]></command>
+    <inputs>
+        <param
+                    label="Collection of files specially named to become tracks"
+                    name="jbrowseme"
+                    type="data_collection">
+        </param>
+    </inputs>
+    <outputs>
+        <data format="html" name="output" label="JBrowse2"/>
+    </outputs>
+
+    <help><![CDATA[
+
+JBrowse2-in-Galaxy
+==================
+
+JBrowse2-in-Galaxy offers a highly configurable, workflow-compatible
+alternative to JBrowse1-in-Galaxy and Trackster.
+
+Compared to JBrowse1-in-Galaxy, there is no support for alternative codons for unusual genomes,
+and detailed track styling is not yet implemented. Send code.
+JBrowse1 development has now ceased in favour of JBrowse2.
+
+Use and local viewing
+=====================
+
+
+A JBrowse2 history item can be opened by viewing it (the "eye" icon).
+
+The same browser data and setup can also be downloaded as a compressed zip archive by clicking the download ("floppy disk") icon in the history.
+This can be shared and viewed without Galaxy.
+
+A replacement application to serve the browser is required without Galaxy. A local python web server can be started using a script included in each archive,
+assuming that Python3 is already working on your desktop - if not you will have to install it first. Unzip the archive (*unzip [filename].zip*) and change
+directory to the first level in that zip archive. It contains a file named *jb2_webserver.py*
+
+With python3 installed,
+
+*python3 jb2_webserver.py*
+
+will serve the unarchived JBrowse2 configuration from the same directory as the python script automatically. If a new browser window does not open,
+but the script appears to be running, try pointing your web browser to the default of *localhost:8080*
+
+Overview
+--------
+
+JBrowse is a fast, embeddable genome browser built completely with
+JavaScript and HTML5.
+
+The JBrowse-in-Galaxy (JiG) tool was written to help build complex
+JBrowse installations straight from Galaxy. It allows you to build up a JBrowse instance without worrying
+about how to run the command line tools to format your data, and which
+options need to be supplied and where.
+
+The JBrowse-in-Galaxy tool has been rejected by `a Galaxy IUC
+<https://github.com/galaxyproject/tools-iuc/issues>`__, reviewer.
+It is maintained by https://github.com/fubar2 who you can help you
+with missing features or bugs in the tool. For the record, he remains unconvinced by the reviewer's logic,
+and disturbed by the distinctly coercive approach to introducing new code,
+compared to the more usual method of providing a working PR.
+
+Options
+-------
+
+**Reference or Assembly**
+
+Choose either a built-in or select one from your history.
+
+Track coordinates and contig names *must* match this reference precisely
+or they will not display.
+
+**Track Groups** represent a set of tracks in a single category.
+
+Annotation Tracks
+-----------------
+
+GFF3/BED
+~~~~~~~~
+
+Standard feature tracks. They usually highlight genes, mRNAs and other features of interest along a genomic region.
+
+When these contain tens of millions of features, such as repeat regions from a VGP assembly, displaying one at a time leads
+to extremely slow loading times when a large region is in view, unless the "LinearPileupDisplay" display option is
+selected for that track in the styling options section. The default is LinearBasicDisplay, which shows all details and works
+well for relatively sparse bed files. A better option is to make a bigwig track using a set of windows based on the
+lengths of each assembly or reference contig.
+
+BAM Pileups
+~~~~~~~~~~~
+
+We support BAM files and can automatically generate SNP tracks based on
+that bam data.
+
+
+BlastXML
+~~~~~~~~
+
+JiG now supports both blastn and blastp datasets. JiG internally uses a
+blastXML to gapped GFF3 tool to convert your blastxml datasets into a
+format amenable to visualization in JBrowse. This tool is also
+available separately from the IUC on the toolshed.
+
+**Minimum Gap Size** reflects how long a gap must be before it becomes a
+real gap in the processed gff3 file. In the picture above, various sizes
+of gaps can be seen. If the minimum gap size was set much higher, say
+100nt, many of the smaller gaps would disappear, and the features on
+both sides would be merged into one, longer feature. This setting is
+inversely proportional to runtime and output file size. *Do not set this
+to a low value for large datasets*. By setting this number lower, you
+will have extremely large outputs and extremely long runtimes. The
+default was configured based off of the author's experience, but the
+author only works on small viruses. It is *strongly* recommended that
+you filter your blast results before display, e.g. picking out the top
+10 hits or so.
+
+**Protein blast search** option merely informs underlying tools that
+they should adjust feature locations by 3x.
+
+
+@ATTRIBUTION@
+]]></help>
+    <expand macro="citations"/>
+</tool>
--- a/jbrowse2.py	Fri Feb 16 00:04:37 2024 +0000
+++ b/jbrowse2.py	Wed Feb 21 02:57:30 2024 +0000
@@ -1119,6 +1119,7 @@
             # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
             for key, value in mapped_chars.items():
                 track_human_label = track_human_label.replace(value, key)
+            track_human_label = track_human_label.replace(" ","_")
             outputTrackConfig = {
                 "category": category,
                 "style": {},
@@ -1141,13 +1142,13 @@
                     dataset_ext,
                     outputTrackConfig,
                 )
-            elif dataset_ext in ("hic",):
+            elif dataset_ext in ("hic","juicebox_hic"):
                 self.add_hic(
                     dataset_path,
                     outputTrackConfig,
                 )
             elif dataset_ext in ("cool", "mcool", "scool"):
-                hic_url = "%s_%d.hic" % (track_human_label, i)
+                hic_url = "%s_%d.juicebox_hic" % (track_human_label, i)
                 hic_path = os.path.join(self.outdir, hic_url)
                 self.subprocess_check_call(
                     [
--- a/jbrowse2.xml	Fri Feb 16 00:04:37 2024 +0000
+++ b/jbrowse2.xml	Wed Feb 21 02:57:30 2024 +0000
@@ -93,7 +93,7 @@
         <track cat="${tg.category}" format="${track.data_format.data_format_select}" visibility="${track.data_format.track_visibility}">
             #if $track.data_format.data_format_select != "sparql":
             <files>
-              #for $dataset in $track.data_format.annotation:
+              #set dataset = $track.data_format.annotation
               <trackFile path="${dataset}" ext="${dataset.ext}" label="${dataset.element_identifier}">
                 <metadata>
                   <dataset id="${__app__.security.encode_id($dataset.id)}" hid="${dataset.hid}"
@@ -128,7 +128,6 @@
                       />
                 </metadata>
               </trackFile>
-              #end for
             </files>
             #end if
 
@@ -138,17 +137,13 @@
             #if str($track.data_format.data_format_select) == "pileup":
                 <pileup>
                     <bam_indices>
-                        #for $dataset in $track.data_format.annotation:
                         <bam_index>${dataset.metadata.bam_index}</bam_index>
-                        #end for
                     </bam_indices>
                 </pileup>
             #else if str($track.data_format.data_format_select) == "cram":
                 <cram>
                     <cram_indices>
-                        #for $dataset in $track.data_format.annotation:
                         <cram_index>${dataset.metadata.cram_index}</cram_index>
-                        #end for
                     </cram_indices>
                 </cram>
             #else if str($track.data_format.data_format_select) == "blast":
@@ -234,11 +229,11 @@
                         <option value="pileup">BAM Pileup track</option>
                         <option value="wiggle">BigWig track</option>
                         <option value="blast">Blast XML track - converted to GFF</option>
-                        <option value="cool">cool/mcool/scool data in hdf5 data</option>
+                        <option value="cool">HiC as cool/mcool/scool format files</option>
                         <option value="cram">CRAM</option>
                         <option value="gene_calls" selected="true">GFF/GFF3/BED feature track</option>
-                        <option value="hic">HiC (compressed binary) track. Existing cool format must be converted to binary hic - hic_matrix will NOT work.</option>
-                        <option value="maf">Multiple alignment format track. Reference name must match the MAF name exactly to work correctly</option>
+                        <option value="hic">HiC as juicebox_hic format file. Tabular hic_matrix will NOT work.</option>
+                        <option value="maf">Multiple alignment format. Reference name must match the MAF name exactly to work correctly</option>
                         <option value="paf">PAF - approximate mapping positions between two set of sequences</option>
                         <option value="sparql">SPARQL</option>
                        <option value="vcf">VCF SNP annotation</option>
--- a/macros.xml	Fri Feb 16 00:04:37 2024 +0000
+++ b/macros.xml	Wed Feb 21 02:57:30 2024 +0000
@@ -504,7 +504,7 @@
 
 
     <xml name="input_conditional" token_label="Track Data" token_format="data">
-        <param label="@LABEL@" format="@FORMAT@" name="annotation" type="data" multiple="True"/>
+        <param label="@LABEL@" format="@FORMAT@" name="annotation" type="data" multiple="False"/>
     </xml>
     <xml name="citations">
         <citations>