changeset 31:cb4b32ca9968 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 48bc917d34af182e9158915862c8a35723660919-dirty
author fubar
date Fri, 23 Feb 2024 07:15:42 +0000
parents 8f02a84ee278
children fe48ed5810bd
files jbrowse2.py jbrowse2.xml macros.xml
diffstat 3 files changed, 161 insertions(+), 130 deletions(-) [+]
line wrap: on
line diff
--- a/jbrowse2.py	Wed Feb 21 02:57:30 2024 +0000
+++ b/jbrowse2.py	Fri Feb 23 07:15:42 2024 +0000
@@ -450,22 +450,24 @@
             if len(genome_name.split()) > 1:
                 genome_name = genome_name.split()[0]
                 # spaces and cruft break scripts when substituted
-            fapath = genome_node["path"]
-            assem = self.make_assembly(fapath, genome_name)
-            assemblies.append(assem)
-            self.genome_names.append(genome_name)
-            if self.genome_name is None:
-                self.genome_name = (
-                    genome_name  # first one for all tracks - other than paf
-                )
-                self.genome_firstcontig = None
-                fl = open(fapath, "r").readline().strip().split(">")
-                if len(fl) > 1:
-                    fl = fl[1]
-                    if len(fl.split()) > 1:
-                        self.genome_firstcontig = fl.split()[0].strip()
-                    else:
-                        self.genome_firstcontig = fl
+            if genome_name not  in self.genome_names:
+                # ignore dupes - can have multiple pafs with same references?
+                fapath = genome_node["path"]
+                assem = self.make_assembly(fapath, genome_name)
+                assemblies.append(assem)
+                self.genome_names.append(genome_name)
+                if self.genome_name is None:
+                    self.genome_name = (
+                        genome_name  # first one for all tracks - other than paf
+                    )
+                    self.genome_firstcontig = None
+                    fl = open(fapath, "r").readline().strip().split(">")
+                    if len(fl) > 1:
+                        fl = fl[1]
+                        if len(fl.split()) > 1:
+                            self.genome_firstcontig = fl.split()[0].strip()
+                        else:
+                            self.genome_firstcontig = fl
         if self.config_json.get("assemblies", None):
             self.config_json["assemblies"] += assemblies
         else:
@@ -1013,26 +1015,31 @@
     def add_paf(self, data, trackData, pafOpts, **kwargs):
         tname = trackData["name"]
         tId = trackData["label"]
-        pgname = pafOpts["genome_label"]
-        if len(pgname.split()) > 1:
-            pgname = pgname.split()[
-                0
-            ]  # trouble from spacey names in command lines avoidance
-        asstrack = self.make_assembly(pafOpts["genome"], pgname)
-        self.genome_names.append(pgname)
-        if self.config_json.get("assemblies", None):
-            self.config_json["assemblies"].append(asstrack)
-        else:
-            self.config_json["assemblies"] = [
-                asstrack,
-            ]
+        pgnames = [x.strip() for x in pafOpts["genome_label"].split(',')]
+        pgpaths =  [x.strip() for x in pafOpts["genome"].split(',')]
+        passnames = [self.genome_name] # always first
+        for i, gname in enumerate(pgnames):
+            if len(gname.split()) > 1:
+                gname = gname.split()[0]
+                # trouble from spacey names in command lines avoidance
+                if gname not in self.genome_names:
+                    passnames.append(gname)
+                   # ignore if already there - eg for duplicates among pafs.
+                    asstrack = self.make_assembly(pgpaths[i], gname)
+                    self.genome_names.append(gname)
+                    if self.config_json.get("assemblies", None):
+                        self.config_json["assemblies"].append(asstrack)
+                    else:
+                        self.config_json["assemblies"] = [
+                            asstrack,
+                        ]
         url = "%s.paf" % (trackData["label"])
         dest = "%s/%s" % (self.outdir, url)
         self.symlink_or_copy(os.path.realpath(data), dest)
         trackDict = {
             "type": "SyntenyTrack",
             "trackId": tId,
-            "assemblyNames": [self.genome_name, pgname],
+            "assemblyNames": passnames,
             "name": tname,
             "adapter": {
                 "type": "PAFAdapter",
@@ -1533,6 +1540,9 @@
     default_session_data["session_name"] = root.find(
         "metadata/general/session_name"
     ).text
+    jc.zipOut =  root.find(
+        "metadata/general/zipOut"
+    ).text == "true"
     general_data = {
         "analytics": root.find("metadata/general/analytics").text,
         "primary_color": root.find("metadata/general/primary_color").text,
--- a/jbrowse2.xml	Wed Feb 21 02:57:30 2024 +0000
+++ b/jbrowse2.xml	Fri Feb 23 07:15:42 2024 +0000
@@ -17,9 +17,12 @@
 python '$__tool_directory__/jbrowse2.py'
 --outdir '$output.files_path'
 --xml '$trackxml' &&
-
-cp '$output.files_path/index.html' '$output'
-
+#if $jbgen.zipOut == "true":
+    (cd '$output.files_path' && zip -r - . ) > JBrowse2.zip &&
+    mv JBrowse2.zip '$output'
+#else
+  cp '$output.files_path/index.html' '$output'
+#end if
 ## Ugly testing hack since I cannot get <extra_files> to test the files I want to test. Hmph.
 #if str($uglyTestingHack) == "enabled":
  &&   cp '$trackxml' '$output'
@@ -77,6 +80,7 @@
         </genomes>
         <general>
             <defaultLocation>${jbgen.defaultLocation}</defaultLocation>
+            <zipOut>${jbgen.zipOut}</zipOut>
             <analytics>${jbgen.enableAnalytics}</analytics>
             <primary_color>${jbgen.primary_color}</primary_color>
             <secondary_color>${jbgen.secondary_color}</secondary_color>
@@ -89,100 +93,109 @@
     </metadata>
     <tracks>
         #for $tg in $track_groups:
-        #for $track in $tg.data_tracks:
-        <track cat="${tg.category}" format="${track.data_format.data_format_select}" visibility="${track.data_format.track_visibility}">
-            #if $track.data_format.data_format_select != "sparql":
-            <files>
-              #set dataset = $track.data_format.annotation
-              <trackFile path="${dataset}" ext="${dataset.ext}" label="${dataset.element_identifier}">
-                <metadata>
-                  <dataset id="${__app__.security.encode_id($dataset.id)}" hid="${dataset.hid}"
-                      size="${dataset.get_size(nice_size=True)}"
-                      edam_format="${dataset.datatype.edam_format}"
-                      file_ext="${dataset.ext}" />
-                  <history id="${__app__.security.encode_id($dataset.history_id)}"
-                      #if $dataset.history.user:
-                      user_email="${dataset.history.user.email}"
-                      user_id="${dataset.history.user_id}"
-                      display_name="${dataset.history.get_display_name()}"/>
-                      #else
-                      user_email="anonymous"
-                      user_id="-1"
-                      display_name="Unnamed History"/>
-                      #end if
-                  <metadata
-                    #for (key, value) in $dataset.get_metadata().items():
-                    #if "_types" not in $key and $value is not None and len(str($value)) < 5000:
-                      #if isinstance($value, list):
-                        #set value_str = "[%s]" % ','.join([str(val) for val in value])
-                        ${key}="$value_str"
-                      #else
-                        ${key}="${value}"
-                      #end if
-                    #end if
-                    #end for
-                      />
-                  <tool
-                      tool_id="${dataset.creating_job.tool_id}"
-                      tool_version="${dataset.creating_job.tool_version}"
-                      />
-                </metadata>
-              </trackFile>
-            </files>
-            #end if
+            #for $track in $tg.data_tracks:
+            <track cat="${tg.category}" format="${track.data_format.data_format_select}" visibility="${track.data_format.track_visibility}">
+                #if $track.data_format.data_format_select != "sparql":
+                    <files>
+                    #set dataset = $track.data_format.annotation
+                      <trackFile path="${dataset}" ext="${dataset.ext}" label="${dataset.element_identifier}">
+                        <metadata>
+                          <dataset id="${__app__.security.encode_id($dataset.id)}" hid="${dataset.hid}"
+                              size="${dataset.get_size(nice_size=True)}"
+                              edam_format="${dataset.datatype.edam_format}"
+                              file_ext="${dataset.ext}" />
+                          <history id="${__app__.security.encode_id($dataset.history_id)}"
+                              #if $dataset.history.user:
+                              user_email="${dataset.history.user.email}"
+                              user_id="${dataset.history.user_id}"
+                              display_name="${dataset.history.get_display_name()}"/>
+                              #else
+                              user_email="anonymous"
+                              user_id="-1"
+                              display_name="Unnamed History"/>
+                              #end if
+                          <metadata
+                            #for (key, value) in $dataset.get_metadata().items():
+                            #if "_types" not in $key and $value is not None and len(str($value)) < 5000:
+                              #if isinstance($value, list):
+                                #set value_str = "[%s]" % ','.join([str(val) for val in value])
+                                ${key}="$value_str"
+                              #else
+                                ${key}="${value}"
+                              #end if
+                            #end if
+                            #end for
+                              />
+                          <tool
+                              tool_id="${dataset.creating_job.tool_id}"
+                              tool_version="${dataset.creating_job.tool_version}"
+                              />
+                        </metadata>
+                      </trackFile>
+                    </files>
+                #else
+                        <track cat="${tg.category}" format="sparql" visibility="off">
+                #end if
 
-            <options>
-
+                <options>
 
-            #if str($track.data_format.data_format_select) == "pileup":
-                <pileup>
-                    <bam_indices>
-                        <bam_index>${dataset.metadata.bam_index}</bam_index>
-                    </bam_indices>
-                </pileup>
-            #else if str($track.data_format.data_format_select) == "cram":
-                <cram>
-                    <cram_indices>
-                        <cram_index>${dataset.metadata.cram_index}</cram_index>
-                    </cram_indices>
-                </cram>
-            #else if str($track.data_format.data_format_select) == "blast":
-                <blast>
-                  #if str($track.data_format.blast_parent) != "":
-                    <parent>${track.data_format.blast_parent}</parent>
-                  #end if
-                    <protein>${track.data_format.is_protein}</protein>
-                    <min_gap>${track.data_format.min_gap}</min_gap>
-                </blast>
-            #else if str($track.data_format.data_format_select) == "gene_calls":
-                <gff>
-                  #if $track.data_format.match_part.match_part_select == "true":
-                    <match>${track.data_format.match_part.name}</match>
-                  #end if
-                </gff>
-            #else if str($track.data_format.data_format_select) == "paf":
-                <paf>
-                    <genome>${track.data_format.synteny_genome}</genome>
-                    <genome_label>${track.data_format.synteny_genome.name}</genome_label>
-                </paf>
-            #else if str($track.data_format.data_format_select) == "hic":
-                <hic>
-                </hic>
-            #else if str($track.data_format.data_format_select) == "cool":
-                <cool>
-                </cool>
-            #else if str($track.data_format.data_format_select) == "sparql":
-                <label>${track.data_format.label}</label>
-                <sparql>
-                    <url>${track.data_format.url}</url>
-                    <query>${track.data_format.query}</query>
-                    <query_refnames>${track.data_format.query_refnames}</query_refnames>
-                </sparql>
-            #end if
-            </options>
-        </track>
+                #if str($track.data_format.data_format_select) == "pileup":
+                    <pileup>
+                        <bam_indices>
+                            <bam_index>${dataset.metadata.bam_index}</bam_index>
+                        </bam_indices>
+                    </pileup>
+                #else if str($track.data_format.data_format_select) == "cram":
+                    <cram>
+                        <cram_indices>
+                            <cram_index>${dataset.metadata.cram_index}</cram_index>
+                        </cram_indices>
+                    </cram>
+                #else if str($track.data_format.data_format_select) == "blast":
+                    <blast>
+                      #if str($track.data_format.blast_parent) != "":
+                        <parent>${track.data_format.blast_parent}</parent>
+                      #end if
+                        <protein>${track.data_format.is_protein}</protein>
+                        <min_gap>${track.data_format.min_gap}</min_gap>
+                    </blast>
+                #else if str($track.data_format.data_format_select) == "gene_calls":
+                    <gff>
+                      #if $track.data_format.match_part.match_part_select == "true":
+                        <match>${track.data_format.match_part.name}</match>
+                      #end if
+                    </gff>
+                #else if str($track.data_format.data_format_select) == "paf":
+                    <paf>
+                        <genome>
+                        #for gnome in $track.data_format.synteny_genome:
+                            $gnome,
+                        #end for
+                        </genome>
+                        <genome_label>
+                        #for gnome in $track.data_format.synteny_genome:
+                            $gnome.name,
+                        #end for
+                        </genome_label>
+                    </paf>
+                #else if str($track.data_format.data_format_select) == "hic":
+                    <hic>
+                    </hic>
+                #else if str($track.data_format.data_format_select) == "cool":
+                    <cool>
+                    </cool>
+                #else if str($track.data_format.data_format_select) == "sparql":
+                    <label>${track.data_format.label}</label>
+                    <sparql>
+                        <url>${track.data_format.url}</url>
+                        <query>${track.data_format.query}</query>
+                        <query_refnames>${track.data_format.query_refnames}</query_refnames>
+                    </sparql>
+                #end if
+                </options>
+            </track>
         #end for
-        #end for
+    #end for
     </tracks>
 </root>
 ]]></configfile>
@@ -236,7 +249,7 @@
                         <option value="maf">Multiple alignment format. Reference name must match the MAF name exactly to work correctly</option>
                         <option value="paf">PAF - approximate mapping positions between two set of sequences</option>
                         <option value="sparql">SPARQL</option>
-                       <option value="vcf">VCF SNP annotation</option>
+                       <option value="vcf">VCF SNP</option>
                     </param>
                     <when value="blast">
                         <expand macro="input_conditional" label="BlastXML Track Data" format="blastxml" />
@@ -302,11 +315,12 @@
                         <expand macro="track_visibility" />
                     </when>
                     <when value="paf">
-                        <param label="Comparison genome sequence" help="Paf from this as the reference, using the real reference as the reads to map"
+                        <param label="Comparison genome sequence" help="Paf from these as the references, using the real reference as the reads to map"
                             format="fasta"
                             name="synteny_genome"
-                            type="data" />
-                        <expand macro="input_conditional" label="Synteny data" format="paf" help="Make paf with minimap2 mapping real reference onto desired syntenic reference"/>
+                            type="data"
+                            multiple="True"/>
+                        <expand macro="input_conditional" label="Synteny data" format="paf" help="Make paf with mashmap or minimap2 mapping real reference onto desired syntenic references"/>
                         <expand macro="track_visibility" />
                     </when>
 
@@ -376,7 +390,11 @@
         <param type="hidden" name="uglyTestingHack" value="" />
     </inputs>
     <outputs>
-        <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.name"/>
+        <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.name">
+            <change_format>
+                <when input="zipOut" value="true" format="zip" />
+            </change_format>
+        </data>
     </outputs>
     <tests>
          <test>
@@ -405,6 +423,7 @@
                 </assert_contents>
             </output>
         </test>
+
         <test>
             <param name="reference_genome|genome_type_select" value="history"/>
             <param name="reference_genome|genome" value="merlin.fa"/>
@@ -739,6 +758,7 @@
                 </assert_contents>
             </output>
         </test>
+
         <!-- TODO add a synteny test -->
         <!-- TODO add a bam and a cram test -->
         <!-- TODO add an hic test -->
--- a/macros.xml	Wed Feb 21 02:57:30 2024 +0000
+++ b/macros.xml	Fri Feb 23 07:15:42 2024 +0000
@@ -140,6 +140,8 @@
 
     <xml name="general_options">
         <section name="jbgen" title="General JBrowse Options [Advanced]" expanded="false">
+            <param label="Create a zip archive for downloading rather than viewing " name="zipOut" help="Default is to make an interactive browser appear when the 'eye' icon is activated"
+            type="boolean" checked="false" truevalue="true" falsevalue="false" />
             <param label="Subset to display to new users" type="text" name="defaultLocation" value="" help="Initial subset to be shown for users who have never visited the browser before. Example: 'ctgA:1234..5678'"/>
             <param label="Session name" type="text" name="session_name" value="New session" help="Displayed at the top of the window"/>
             <param label="Enable analytics" help="Will send usage data to Google Analytics, see https://github.com/GMOD/jbrowse-components/issues/1166" name="enableAnalytics" type="boolean" checked="false" truevalue="true" falsevalue="false" />
@@ -502,7 +504,6 @@
         </sanitizer>
     </xml>
 
-
     <xml name="input_conditional" token_label="Track Data" token_format="data">
         <param label="@LABEL@" format="@FORMAT@" name="annotation" type="data" multiple="False"/>
     </xml>