Mercurial > repos > fubar > jbrowse2
changeset 35:15da358c3108 draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 80b849766a962bac4bd0bb8cb69c118cc42699cd-dirty
author | fubar |
---|---|
date | Wed, 28 Feb 2024 10:08:57 +0000 |
parents | 2893ef33fba9 |
children | 5f39f745682f |
files | autogenJB2.py autogenJB2.xml jbrowse2.py jbrowse2.xml macros.xml |
diffstat | 5 files changed, 211 insertions(+), 294 deletions(-) [+] |
line wrap: on
line diff
--- a/autogenJB2.py Sun Feb 25 04:18:53 2024 +0000 +++ b/autogenJB2.py Wed Feb 28 10:08:57 2024 +0000 @@ -1,65 +1,66 @@ import argparse -import re +import logging import sys -from jbrowse2 import jbrowseConnector as jbC +from jbrowse2 import JbrowseConnector as jbC + +logging.basicConfig(level=logging.debug) +log = logging.getLogger("jbrowse") -def makeDefaultLocation(jc, defLoc=None): - - refName = None - drdict = { - "reversed": False, - "assemblyName": jc.genome_name, - "start": 0, - "end": 100000, - } +def makeDefaultLocation(): - if defLoc: - loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", defLoc) - # allow commas like 100,000 but ignore as integer - if loc_match: - refName = loc_match.group(1) - drdict["refName"] = refName - if loc_match.group(2) > "": - drdict["start"] = int(loc_match.group(2).replace(",", "")) - if loc_match.group(3) > "": - drdict["end"] = int(loc_match.group(3).replace(",", "")) - else: - jc.logging.info( - "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" - % defLoc - ) - else: - drdict["refName"] = jc.genome_firstcontig - if drdict.get("refName", None): - jc.logging.info("@@@ defaultlocation %s for default session" % drdict) - return drdict - else: - jc.logging.info("@@@ no contig name found for default session - please add one!") - return None + refName = jc.genome_firstcontig + defloc = "%s:100..10000" % refName + print ('defloc',defloc) + return defloc if __name__ == "__main__": parser = argparse.ArgumentParser(description="", epilog="") - parser.add_argument("--sessname", help="Session name", default="AutoJBrowse") + parser.add_argument("--sessName", help="Session name", default="AutoJBrowse") + parser.add_argument( + "--trackmeta", + help="Repeatable of 'filename, filext,filepath, ... ,' for JBrowse2 tracks", + default=[], + action="append", + ) parser.add_argument( - "--collection", - help="Collection of 'filepath, filename, filext' for JBrowse2", + "--referencemeta", + help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks", default=[], - action="extend", + action="append", + ) + parser.add_argument( + "--pafmeta", + help="Repeatable. Each is a 'filename, filext, filepath, ... ,' for a JBrowse2 paf track", + default=[], + action="append", ) - parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.0.1") + parser.add_argument( + "--pafreferencemeta", + help="Repeatable. Each is a 'pafname,filepath,refname,filepath,refname....'. Every paf must have a corresponding one ", + default=[], + action="append", + ) + + parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2") + parser.add_argument("--outdir", help="Output directory", required=True) args = parser.parse_args() - sessName = args.sessname - flistList = [x.split(",") for x in args.collection] - if flistList: - listgenomes = [f for f in flistList if f[0].startswith("REFERENCE_")] + sessName = args.sessName + # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key' + trackList = [x.strip().split(",") for x in args.trackmeta if x > ''] + refList = [x.strip().split(",") for x in args.referencemeta if x > ''] + print("tracklist = %s\nreflist = %s" % (trackList,refList)) + if len(refList) > 0: + listgenomes = [f for f in refList if f[1] in ['fasta', 'fasta.gz']] + # assume no pafs here + print('genomes=%s' % listgenomes) if len(listgenomes) > 0: genome_paths = [ - x[1] for x in listgenomes + x[0] for x in listgenomes ] # expect genome_1_genomename.fasta etc - genome_names = [x[0].split("REFERENCE_")[1] for x in listgenomes] + genome_names = [x[2] for x in listgenomes] jc = jbC( outdir=args.outdir, genomes=[ @@ -67,11 +68,13 @@ "path": x, "meta": { "name": genome_names[i], + "dataset_dname": genome_names[i], }, } for i, x in enumerate(genome_paths) ], ) + sys.stdout.write('$$$ genome_paths:%s genome_names: %s' % (genome_paths,genome_names)) jc.process_genomes() default_session_data = { "visibility": { @@ -81,42 +84,49 @@ "style": {}, "style_labels": {}, } - defLoc = makeDefaultLocation(jc) - listtracks = [f for f in flistList if not f[0].startswith("REFERENCE_")] + + listtracks = trackList # foo.paf must have a foo_paf.fasta or fasta.gz to match - tnames = [x[0] for x in listtracks] - texts = [x[2] for x in listtracks] + tnames = [x[2] for x in listtracks] + texts = [x[1] for x in listtracks] for i, track in enumerate(listtracks): - if track[2] == "paf": - refname = track[0] + "_paf.fasta" - refdat = [x[1] for x in listtracks if x[0] == refname] + tpath, trext, trackname = track[:3] + if trext == "paf": + refname = trackname + "_paf.fasta" + refdat = [x[2] for x in listtracks if x[2] == refname] if not refdat: jc.logging.warn( "!! No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" - % (refname, tnames[i]) + % (refname, trackname) ) sys.exit(3) else: track_conf = { "conf": { "options": { - "paf": {"genome": refdat, "genome_label": track[0]} + "paf": {"genome": refdat, "genome_label": trackname} } } } + elif trext == 'bam': + track_conf["conf"] = {"options": {"bam": {"bam_indices": {"bam_index": track[3]}}}} + elif trext == 'cram': + track_conf["conf"] = {"options": {"cram": {"cram_indices": {"cram_index": track[3]}}}} else: track_conf = {} - track_conf["format"] = track[2] - track_conf["name"] = track[0] - track_conf["label"] = track[0] - track_conf["trackfiles"] = [] + track_conf["format"] = trext + track_conf["name"] = trackname + track_conf["label"] = trackname + track_conf["trackfiles"] = [(tpath, trext, trackname,{}),] + track_conf["category"] = "Autogenerated" keys = jc.process_annotations(track_conf) if keys: for key in keys: - default_session_data["visibility"][ - track.attrib.get("visibility", "default_off") - ].append(key) + if trext in ["bigwig", "gff3", "gff", "vcf", "maf",]: + default_session_data["visibility"]["default_on"].append(key) + else: + default_session_data["visibility"]["default_off"].append(key) # if track_conf.get("style", None): # default_session_data["style"][key] = track_conf[ # "style" @@ -140,8 +150,10 @@ else: jc.config_json["tracks"] = jc.tracksToAdd jc.write_config() - defaultData = {"defaultLocation": defLoc, "session_name": sessName} - jc.add_default_session(defaultData) + defLoc = makeDefaultLocation() + default_session_data.update({"defaultLocation": defLoc, "session_name": sessName}) + track_conf.update(default_session_data) + jc.add_default_session(default_session_data) # jc.text_index() not sure what broke here. else: - sys.stderr.write("!! empty collection supplied - nothing to process") + print("!! empty collection supplied - nothing to process")
--- a/autogenJB2.xml Sun Feb 25 04:18:53 2024 +0000 +++ b/autogenJB2.xml Wed Feb 28 10:08:57 2024 +0000 @@ -1,5 +1,5 @@ - <tool id="autogenjb2" name="autogenjb2" version="2.10.0_0" profile="22.05"> - <description>Files to JBrowse2</description> + <tool id="autogenjb2" name="autogenjb2" version="2.10.2_0" profile="22.05"> + <description>Track collection to JBrowse2</description> <macros> <import>macros.xml</import> </macros> @@ -11,20 +11,35 @@ <version_command>python '${__tool_directory__}/autogenJB2.py' --version</version_command> <command detect_errors="aggressive"><![CDATA[ python '$__tool_directory__/autogenJB2.py' -#for $key in $jbrowseme.keys(): ---collection '$key,$jbrowseme[$key],$jbrowseme[$key].ext' +#for $key in $autoCollection.keys(): + #if $autoCollection[$key].ext == 'fasta': + --referencemeta '$autoCollection[$key],$autoCollection[$key].ext,$key' + #else if $autoCollection[$key].ext in ['bed', 'bigwig', 'cool', 'gff', 'gff3', 'hic', 'maf', 'mcool', 'scool', 'vcf'] + --trackmeta '$autoCollection[$key],$autoCollection[$key].ext,$key' + #else if $autoCollection[$key].ext in ['bam',] + --trackmeta '$autoCollection[$key],$autoCollection[$key].ext,$key,$autoCollection[$key].metadata.bam_index' + #else if $autoCollection[$key].ext in ['cram',] + --trackmeta '$autoCollection[$key],$autoCollection[$key].ext,$key,$autoCollection[$key].metadata.cram_index' + #end if #end for ---sessName "Autogen JBrowse" +--outdir '$output.files_path' +--sessName "Autogen JBrowse" && + + cp '$output.files_path/index.html' '$output' + +## Ugly testing hack since I cannot get <extra_files> to test the files I want to test. Hmph. + + ]]></command> <inputs> <param - label="Collection of files specially named to become tracks" - name="jbrowseme" + label="Collection of files to become tracks - they must have short, informative names" + name="autoCollection" type="data_collection"> </param> </inputs> <outputs> - <data format="html" name="output" label="JBrowse2"/> + <data format="html" name="output" label="AutoJBrowse2"/> </outputs> <help><![CDATA[
--- a/jbrowse2.py Sun Feb 25 04:18:53 2024 +0000 +++ b/jbrowse2.py Wed Feb 28 10:08:57 2024 +0000 @@ -1062,59 +1062,6 @@ self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) - def add_hicab(self, data, trackData, hicOpts, **kwargs): - rel_dest = os.path.join("data", trackData["label"] + ".hic") - dest = os.path.join(self.outdir, rel_dest) - - self.symlink_or_copy(os.path.realpath(data), dest) - - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - rel_dest, - config={}, - ) - - def add_sparql(self, url, query, query_refnames, trackData): - - json_track_data = { - "type": "FeatureTrack", - "trackId": id, - "name": trackData["label"], - "adapter": { - "type": "SPARQLAdapter", - "endpoint": {"uri": url, "locationType": "UriLocation"}, - "queryTemplate": query, - }, - "category": [trackData["category"]], - "assemblyNames": [self.genome_name], - } - - if query_refnames: - json_track_data["adapter"]["refNamesQueryTemplate"]: query_refnames - - self.subprocess_check_call( - [ - "jbrowse", - "add-track-json", - "--target", - os.path.join(self.outdir, "data"), - json_track_data, - ] - ) - - # Doesn't work as of 1.6.4, might work in the future - # self.subprocess_check_call([ - # 'jbrowse', 'add-track', - # '--trackType', 'sparql', - # '--name', trackData['label'], - # '--category', trackData['category'], - # '--target', os.path.join(self.outdir, 'data'), - # '--trackId', id, - # '--config', '{"queryTemplate": "%s"}' % query, - # url]) - def process_annotations(self, track): category = track["category"].replace("__pd__date__pd__", TODAY) for i, ( @@ -1190,7 +1137,7 @@ outputTrackConfig, ) elif dataset_ext == "bam": - real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][ + real_indexes = track["conf"]["options"]["bam"]["bam_indices"][ "bam_index" ] if not isinstance(real_indexes, list): @@ -1199,7 +1146,7 @@ self.add_bam( dataset_path, outputTrackConfig, - track["conf"]["options"]["pileup"], + track["conf"]["options"]["bam"], bam_index=real_indexes[i], ) elif dataset_ext == "cram": @@ -1475,16 +1422,6 @@ metadata, ) ) - else: - # For tracks without files (rest, sparql) - track_conf["trackfiles"].append( - ( - "", # N/A, no path for rest or sparql - track.attrib["format"], - track.find("options/label").text, - {}, - ) - ) if is_multi_bigwig: metadata = metadata_from_node(x.find("metadata"))
--- a/jbrowse2.xml Sun Feb 25 04:18:53 2024 +0000 +++ b/jbrowse2.xml Wed Feb 28 10:08:57 2024 +0000 @@ -93,107 +93,110 @@ <tracks> #for $tg in $track_groups: #for $track in $tg.data_tracks: - <track cat="${tg.category}" format="${track.data_format.data_format_select}" visibility="${track.data_format.track_visibility}"> - #if $track.data_format.data_format_select != "sparql": - <files> - #set dataset = $track.data_format.annotation - <trackFile path="${dataset}" ext="${dataset.ext}" label="${dataset.name}"> - <metadata> - <dataset id="${__app__.security.encode_id($dataset.id)}" hid="${dataset.hid}" - size="${dataset.get_size(nice_size=True)}" - edam_format="${dataset.datatype.edam_format}" - file_ext="${dataset.ext}" /> - <history id="${__app__.security.encode_id($dataset.history_id)}" - #if $dataset.history.user: - user_email="${dataset.history.user.email}" - user_id="${dataset.history.user_id}" - display_name="${dataset.history.get_display_name()}"/> - #else - user_email="anonymous" - user_id="-1" - display_name="Unnamed History"/> - #end if - <metadata - #for (key, value) in $dataset.get_metadata().items(): - #if "_types" not in $key and $value is not None and len(str($value)) < 5000: - #if isinstance($value, list): - #set value_str = "[%s]" % ','.join([str(val) for val in value]) - ${key}="$value_str" - #else - ${key}="${value}" - #end if - #end if - #end for - /> - <tool - tool_id="${dataset.creating_job.tool_id}" - tool_version="${dataset.creating_job.tool_version}" - /> - </metadata> - </trackFile> - </files> - #else - <track cat="${tg.category}" format="sparql" visibility="off"> - #end if + <track cat="${tg.category}" format="${track.data_format.data_format_select}" visibility="${track.data_format.track_visibility}"> + #if $track.data_format.data_format_select != "sparql": + #for $dataset in $track.data_format.annotation: + <files> + <trackFile path="${dataset}" ext="${dataset.ext}" label="${dataset.name}"> + <metadata> + <dataset id="${__app__.security.encode_id($dataset.id)}" hid="${dataset.hid}" + size="${dataset.get_size(nice_size=True)}" + edam_format="${dataset.datatype.edam_format}" + file_ext="${dataset.ext}" /> + <history id="${__app__.security.encode_id($dataset.history_id)}" + #if $dataset.history.user: + user_email="${dataset.history.user.email}" + user_id="${dataset.history.user_id}" + display_name="${dataset.history.get_display_name()}"/> + #else + user_email="anonymous" + user_id="-1" + display_name="Unnamed History"/> + #end if + <metadata + #for (key, value) in $dataset.get_metadata().items(): + #if "_types" not in $key and $value is not None and len(str($value)) < 5000: + #if isinstance($value, list): + #set value_str = "[%s]" % ','.join([str(val) for val in value]) + ${key}="$value_str" + #else + ${key}="${value}" + #end if + #end if + #end for + /> + <tool + tool_id="${dataset.creating_job.tool_id}" + tool_version="${dataset.creating_job.tool_version}" + /> + </metadata> + </trackFile> + </files> + #end for + #else + <track cat="${tg.category}" format="sparql" visibility="off"> + #end if - <options> + <options> - #if str($track.data_format.data_format_select) == "pileup": - <pileup> - <bam_indices> - <bam_index>${dataset.metadata.bam_index}</bam_index> - </bam_indices> - </pileup> - #else if str($track.data_format.data_format_select) == "cram": - <cram> - <cram_indices> - <cram_index>${dataset.metadata.cram_index}</cram_index> - </cram_indices> - </cram> - #else if str($track.data_format.data_format_select) == "blast": - <blast> - #if str($track.data_format.blast_parent) != "": - <parent>${track.data_format.blast_parent}</parent> - #end if - <protein>${track.data_format.is_protein}</protein> - <min_gap>${track.data_format.min_gap}</min_gap> - </blast> - #else if str($track.data_format.data_format_select) == "gene_calls": - <gff> - #if $track.data_format.match_part.match_part_select == "true": - <match>${track.data_format.match_part.name}</match> - #end if - </gff> - #else if str($track.data_format.data_format_select) == "paf": - <paf> - <genome> - #for gnome in $track.data_format.synteny_genome: - $gnome, - #end for - </genome> - <genome_label> - #for gnome in $track.data_format.synteny_genome: - $gnome.name, - #end for - </genome_label> - </paf> - #else if str($track.data_format.data_format_select) == "hic": - <hic> - </hic> - #else if str($track.data_format.data_format_select) == "cool": - <cool> - </cool> - #else if str($track.data_format.data_format_select) == "sparql": - <label>${track.data_format.label}</label> - <sparql> - <url>${track.data_format.url}</url> - <query>${track.data_format.query}</query> - <query_refnames>${track.data_format.query_refnames}</query_refnames> - </sparql> - #end if - </options> - </track> - #end for + #if str($track.data_format.data_format_select) == "bam": + <pileup> + #for $dataset in $track.data_format.annotation: + <bam_index>${dataset.metadata.bam_index}</bam_index> + #end for + </pileup> + #else if str($track.data_format.data_format_select) == "cram": + <cram> + <cram_indices> + #for $dataset in $track.data_format.annotation: + <cram_index>${dataset.metadata.cram_index}</cram_index> + #end for + </cram_indices> + </cram> + #else if str($track.data_format.data_format_select) == "blastxml": + <blast> + #if str($track.data_format.blast_parent) != "": + <parent>${track.data_format.blast_parent}</parent> + #end if + <protein>${track.data_format.is_protein}</protein> + <min_gap>${track.data_format.min_gap}</min_gap> + </blast> + #else if str($track.data_format.data_format_select) == "gene_calls": + <gff> + #if $track.data_format.match_part.match_part_select == "true": + <match>${track.data_format.match_part.name}</match> + #end if + </gff> + #else if str($track.data_format.data_format_select) == "paf": + <paf> + <genome> + #for gnome in $track.data_format.synteny_genome: + $gnome, + #end for + </genome> + <genome_label> + #for gnome in $track.data_format.synteny_genome: + $gnome.name, + #end for + </genome_label> + </paf> + #else if str($track.data_format.data_format_select) == "hic": + <hic> + </hic> + #else if str($track.data_format.data_format_select) == "cool": + <cool> + </cool> + #else if str($track.data_format.data_format_select) == "sparql": + <label>${track.data_format.label}</label> + <sparql> + <url>${track.data_format.url}</url> + <query>${track.data_format.query}</query> + <query_refnames>${track.data_format.query_refnames}</query_refnames> + </sparql> + #end if + </options> + </track> + #end for #end for </tracks> </root> @@ -238,19 +241,18 @@ <repeat name="data_tracks" title="Annotation Track"> <conditional name="data_format" label="Track Data Selection Options"> <param type="select" label="Track Type" name="data_format_select"> - <option value="pileup">BAM Pileup track</option> - <option value="wiggle">BigWig track</option> - <option value="blast">Blast XML track - converted to GFF</option> + <option value="bam">BAM Pileup track</option> + <option value="bigwig">BigWig track</option> + <option value="blastxml">Blast XML track - converted to GFF</option> <option value="cool">HiC as cool/mcool/scool format files</option> <option value="cram">CRAM</option> <option value="gene_calls" selected="true">GFF/GFF3/BED feature track</option> <option value="hic">HiC as juicebox_hic format file. Tabular hic_matrix will NOT work.</option> <option value="maf">Multiple alignment format. Reference name must match the MAF name exactly to work correctly</option> <option value="paf">PAF - approximate mapping positions between two set of sequences</option> - <option value="sparql">SPARQL</option> <option value="vcf">VCF SNP</option> </param> - <when value="blast"> + <when value="blastxml"> <expand macro="input_conditional" label="BlastXML Track Data" format="blastxml" /> <param label="Features used in Blast Search" @@ -297,7 +299,7 @@ </conditional> <expand macro="track_visibility" /> </when> - <when value="pileup"> + <when value="bam"> <expand macro="input_conditional" label="BAM Track Data" format="bam" /> <expand macro="track_visibility" /> </when> @@ -309,12 +311,12 @@ <expand macro="input_conditional" label="MAF Track Data" format="maf" /> <expand macro="track_visibility" /> </when> - <when value="wiggle"> + <when value="bigwig"> <expand macro="input_conditional" label="BigWig Track Data" format="bigwig" /> <expand macro="track_visibility" /> </when> <when value="paf"> - <param label="Comparison genome sequence" help="Paf from these as the references, using the real reference as the reads to map" + <param label="Comparison genome sequence" help="Paf from these as the reference(s), using the common reference as the reads to map" format="fasta" name="synteny_genome" type="data" @@ -331,55 +333,6 @@ <expand macro="input_conditional" label="HiC data in cool/mcool/scool format" format="cool,mcool,scool" /> <expand macro="track_visibility" /> </when> - <when value="sparql"> - <param type="text" label="SPARQL Server URL" name="url" /> - <param type="text" label="Track Label" name="label" value="SPARQL Genes" /> - <param type="text" label="SPARQL Query" name="query" area="true"> - <sanitizer> - <mapping initial="galaxy.util.mapped_chars"> - <add source=" " target=" " /> - <add source=">" target="__gt__" /> - <add source="<" target="__lt__" /> - </mapping> - <valid initial="default"> - <add value="|" /> - <add value="#" /> - <add value="{"/> - <add value="}"/> - <add value="!"/> - <add value="?"/> - <add value="&"/> - <add value="+"/> - <add value="="/> - <add value="'"/> - <add value='"'/> - </valid> - </sanitizer> - </param> - <param type="text" label="SPARQL reference names query" help="This query should return a line for each reference name in a `refName` column" name="query_refnames" area="true"> - <sanitizer> - <mapping initial="galaxy.util.mapped_chars"> - <add source=" " target=" " /> - <add source=">" target="__gt__" /> - <add source="<" target="__lt__" /> - </mapping> - <valid initial="default"> - <add value="|" /> - <add value="#" /> - <add value="{"/> - <add value="}"/> - <add value="!"/> - <add value="?"/> - <add value="&"/> - <add value="+"/> - <add value="="/> - <add value="'"/> - <add value='"'/> - </valid> - </sanitizer> - </param> - <expand macro="track_visibility" /> - </when> </conditional> </repeat> </repeat>
--- a/macros.xml Sun Feb 25 04:18:53 2024 +0000 +++ b/macros.xml Wed Feb 28 10:08:57 2024 +0000 @@ -505,7 +505,7 @@ </xml> <xml name="input_conditional" token_label="Track Data" token_format="data"> - <param label="@LABEL@" format="@FORMAT@" name="annotation" type="data" multiple="False"/> + <param label="@LABEL@" format="@FORMAT@" name="annotation" type="data" multiple="True"/> </xml> <xml name="citations"> <citations>