# HG changeset patch
# User iuc
# Date 1451588323 18000
# Node ID 7342f467507b5debff2db14025b715f51f1b47f1
# Parent b6a0e126dbeec2bfa3d0d962fa4e4713a938c6d0
Uploaded v0.4 of JBrowse
diff -r b6a0e126dbee -r 7342f467507b blastxml_to_gapped_gff3.py
--- a/blastxml_to_gapped_gff3.py Tue Jun 23 12:10:15 2015 -0400
+++ b/blastxml_to_gapped_gff3.py Thu Dec 31 13:58:43 2015 -0500
@@ -28,6 +28,12 @@
blast_records = NCBIXML.parse(blastxml)
records = []
for record in blast_records:
+ # http://www.sequenceontology.org/browser/release_2.4/term/SO:0000343
+ match_type = { # Currently we can only handle BLASTN, BLASTP
+ 'BLASTN': 'nucleotide_match',
+ 'BLASTP': 'protein_match',
+ }.get(record.application, 'match')
+
rec = SeqRecord(Seq("ACTG"), id=record.query)
for hit in record.alignments:
for hsp in hit.hsps:
@@ -67,10 +73,10 @@
if parent_match_end > hsp.query_end:
parent_match_end = hsp.query_end + 1
- # The ``protein_match`` feature will hold one or more ``match_part``s
+ # The ``match`` feature will hold one or more ``match_part``s
top_feature = SeqFeature(
FeatureLocation(parent_match_start, parent_match_end),
- type="protein_match", strand=0,
+ type=match_type, strand=0,
qualifiers=qualifiers
)
@@ -87,7 +93,7 @@
if trim:
# If trimming, then we start relative to the
- # protein_match's start
+ # match's start
match_part_start = parent_match_start + start
else:
# Otherwise, we have to account for the subject start's location
@@ -108,6 +114,7 @@
)
rec.features.append(top_feature)
+ rec.annotations = {}
records.append(rec)
return records
@@ -252,5 +259,4 @@
args = parser.parse_args()
result = blastxml2gff3(**vars(args))
-
GFF.write(result, sys.stdout)
diff -r b6a0e126dbee -r 7342f467507b gff3_rebase.py
--- a/gff3_rebase.py Tue Jun 23 12:10:15 2015 -0400
+++ b/gff3_rebase.py Thu Dec 31 13:58:43 2015 -0500
@@ -3,6 +3,7 @@
import logging
logging.basicConfig(level=logging.INFO)
import argparse
+import copy
from BCBio import GFF
from Bio.SeqFeature import FeatureLocation
log = logging.getLogger(__name__)
@@ -13,6 +14,70 @@
__email__ = "esr@tamu.edu"
+def feature_lambda(feature_list, test, test_kwargs, subfeatures=True):
+ """Recursively search through features, testing each with a test function, yielding matches.
+
+ GFF3 is a hierachical data structure, so we need to be able to recursively
+ search through features. E.g. if you're looking for a feature with
+ ID='bob.42', you can't just do a simple list comprehension with a test
+ case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.
+
+ :type feature_list: list
+ :param feature_list: an iterable of features
+
+ :type test: function reference
+ :param test: a closure with the method signature (feature, **kwargs) where
+ the kwargs are those passed in the next argument. This
+ function should return True or False, True if the feature is
+ to be yielded as part of the main feature_lambda function, or
+ False if it is to be ignored. This function CAN mutate the
+ features passed to it (think "apply").
+
+ :type test_kwargs: dictionary
+ :param test_kwargs: kwargs to pass to your closure when it is called.
+
+ :type subfeatures: boolean
+ :param subfeatures: when a feature is matched, should just that feature be
+ yielded to the caller, or should the entire sub_feature
+ tree for that feature be included? subfeatures=True is
+ useful in cases such as searching for a gene feature,
+ and wanting to know what RBS/Shine_Dalgarno_sequences
+ are in the sub_feature tree (which can be accomplished
+ with two feature_lambda calls). subfeatures=False is
+ useful in cases when you want to process (and possibly
+ return) the entire feature tree, such as applying a
+ qualifier to every single feature.
+
+ :rtype: yielded list
+ :return: Yields a list of matching features.
+ """
+ # Either the top level set of [features] or the subfeature attribute
+ for feature in feature_list:
+ if test(feature, **test_kwargs):
+ if not subfeatures:
+ feature_copy = copy.deepcopy(feature)
+ feature_copy.sub_features = []
+ yield feature_copy
+ else:
+ yield feature
+
+ if hasattr(feature, 'sub_features'):
+ for x in feature_lambda(feature.sub_features, test, test_kwargs, subfeatures=subfeatures):
+ yield x
+
+
+def feature_test_qual_value(feature, **kwargs):
+ """Test qualifier values.
+
+ For every feature, check that at least one value in
+ feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list']
+ """
+ for attribute_value in feature.qualifiers.get(kwargs['qualifier'], []):
+ if attribute_value in kwargs['attribute_list']:
+ return True
+ return False
+
+
def __get_features(child, interpro=False):
child_features = {}
for rec in GFF.parse(child):
@@ -69,30 +134,35 @@
child_features = __get_features(child, interpro=interpro)
for rec in GFF.parse(parent):
- # TODO, replace with recursion in case it's matched against a
- # non-parent feature. We're cheating a bit here right now...
replacement_features = []
- for feature in rec.features:
- if feature.id in child_features:
- new_subfeatures = child_features[feature.id]
- # TODO: update starts
- fixed_subfeatures = []
- for x in new_subfeatures:
- # Then update the location of the actual feature
- __update_feature_location(x, feature, protein2dna)
+ for feature in feature_lambda(
+ rec.features,
+ feature_test_qual_value,
+ {
+ 'qualifier': 'ID',
+ 'attribute_list': child_features.keys(),
+ },
+ subfeatures=False):
- if interpro:
- for y in ('status', 'Target'):
- try:
- del x.qualifiers[y]
- except:
- pass
+ new_subfeatures = child_features[feature.id]
+ fixed_subfeatures = []
+ for x in new_subfeatures:
+ # Then update the location of the actual feature
+ __update_feature_location(x, feature, protein2dna)
- fixed_subfeatures.append(x)
- replacement_features.extend(fixed_subfeatures)
+ if interpro:
+ for y in ('status', 'Target'):
+ try:
+ del x.qualifiers[y]
+ except:
+ pass
+
+ fixed_subfeatures.append(x)
+ replacement_features.extend(fixed_subfeatures)
# We do this so we don't include the original set of features that we
# were rebasing against in our result.
rec.features = replacement_features
+ rec.annotations = {}
GFF.write([rec], sys.stdout)
diff -r b6a0e126dbee -r 7342f467507b jbrowse-fromdir.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jbrowse-fromdir.xml Thu Dec 31 13:58:43 2015 -0500
@@ -0,0 +1,40 @@
+
+ upgrades the bare data directory to a full JBrowse instance
+
+ macros.xml
+
+
+
+ python jbrowse.py --version
+
+
+
+
+
+
+
+
+
+
+
+ 10.1101/gr.094607.109
+
+
+
diff -r b6a0e126dbee -r 7342f467507b jbrowse.py
--- a/jbrowse.py Tue Jun 23 12:10:15 2015 -0400
+++ b/jbrowse.py Thu Dec 31 13:58:43 2015 -0500
@@ -1,142 +1,133 @@
#!/usr/bin/env python
-from string import Template
import os
+import copy
import argparse
import subprocess
import hashlib
+import struct
import tempfile
+import shutil
import json
-import yaml
+from Bio.Data import CodonTable
+import xml.etree.ElementTree as ET
import logging
-logging.basicConfig()
-log = logging.getLogger(__name__)
-
-COLOR_FUNCTION_TEMPLATE = Template("""
-function(feature, variableName, glyphObject, track) {
- var score = ${score};
- ${opacity}
- return 'rgba(${red}, ${green}, ${blue}, ' + opacity + ')';
-}
-""")
-
-BLAST_OPACITY_MATH = """
-var opacity = 0;
-if(score == 0.0) {
- opacity = 1;
-} else{
- opacity = (20 - Math.log10(score)) / 180;
-}
-"""
-
-BREWER_COLOUR_IDX = 0
-BREWER_COLOUR_SCHEMES = [
- (228, 26, 28),
- (55, 126, 184),
- (77, 175, 74),
- (152, 78, 163),
- (255, 127, 0),
-]
+from collections import defaultdict
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger('jbrowse')
-# score comes from feature._parent.get('score') or feature.get('score')
-# Opacity math
+class ColorScaling(object):
+
+ COLOR_FUNCTION_TEMPLATE = """
+ function(feature, variableName, glyphObject, track) {{
+ var score = {score};
+ {opacity}
+ return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
+ }}
+ """
+
+ COLOR_FUNCTION_TEMPLATE_QUAL = """
+ function(feature, variableName, glyphObject, track) {{
+ var search_up = function self(sf, attr){{
+ if(sf.get(attr) !== undefined){{
+ return sf.get(attr);
+ }}
+ if(sf.parent() === undefined) {{
+ return;
+ }}else{{
+ return self(sf.parent(), attr);
+ }}
+ }};
-TN_TABLE = {
- 'gff3': '--gff',
- 'gff': '--gff',
- 'bed': '--bed',
- # 'genbank': '--gbk',
-}
+ var search_down = function self(sf, attr){{
+ if(sf.get(attr) !== undefined){{
+ return sf.get(attr);
+ }}
+ if(sf.children() === undefined) {{
+ return;
+ }}else{{
+ var kids = sf.children();
+ for(var child_idx in kids){{
+ var x = self(kids[child_idx], attr);
+ if(x !== undefined){{
+ return x;
+ }}
+ }}
+ return;
+ }}
+ }};
+
+ var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
+ var score = (search_up(feature, 'score') || search_down(feature, 'score'));
+ {opacity}
+ var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
+ var red = parseInt(result[1], 16);
+ var green = parseInt(result[2], 16);
+ var blue = parseInt(result[3], 16);
+ if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
+ return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
+ }}
+ """
-INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
-
-
-class JbrowseConnector(object):
+ OPACITY_MATH = {
+ 'linear': """
+ var opacity = (score - ({min})) / (({max}) - ({min}));
+ """,
+ 'logarithmic': """
+ var opacity = (score - ({min})) / (({max}) - ({min}));
+ opacity = Math.log10(opacity) + Math.log10({max});
+ """,
+ 'blast': """
+ var opacity = 0;
+ if(score == 0.0) {
+ opacity = 1;
+ } else{
+ opacity = (20 - Math.log10(score)) / 180;
+ }
+ """
+ }
- def __init__(self, jbrowse, jbrowse_dir, outdir, genome):
- self.jbrowse = jbrowse
- self.jbrowse_dir = jbrowse_dir
- self.outdir = outdir
- self.genome_path = genome
+ BREWER_COLOUR_IDX = 0
+ BREWER_COLOUR_SCHEMES = [
+ (166, 206, 227),
+ (31, 120, 180),
+ (178, 223, 138),
+ (51, 160, 44),
+ (251, 154, 153),
+ (227, 26, 28),
+ (253, 191, 111),
+ (255, 127, 0),
+ (202, 178, 214),
+ (106, 61, 154),
+ (255, 255, 153),
+ (177, 89, 40),
+ (228, 26, 28),
+ (55, 126, 184),
+ (77, 175, 74),
+ (152, 78, 163),
+ (255, 127, 0),
+ ]
+
+ BREWER_DIVERGING_PALLETES = {
+ 'BrBg': ("#543005", "#003c30"),
+ 'PiYg': ("#8e0152", "#276419"),
+ 'PRGn': ("#40004b", "#00441b"),
+ 'PuOr': ("#7f3b08", "#2d004b"),
+ 'RdBu': ("#67001f", "#053061"),
+ 'RdGy': ("#67001f", "#1a1a1a"),
+ 'RdYlBu': ("#a50026", "#313695"),
+ 'RdYlGn': ("#a50026", "#006837"),
+ 'Spectral': ("#9e0142", "#5e4fa2"),
+ }
+
+ def __init__(self):
self.brewer_colour_idx = 0
- self.clone_jbrowse(self.jbrowse, self.outdir)
- self.process_genome()
-
- def subprocess_check_call(self, command):
- log.debug('cd %s && %s', self.jbrowse_dir, ' '.join(command))
- subprocess.check_call(command, cwd=self.jbrowse_dir)
-
- def _get_colours(self):
- r, g, b = BREWER_COLOUR_SCHEMES[self.brewer_colour_idx]
- self.brewer_colour_idx += 1
- return r, g, b
-
- def process_genome(self):
- self.subprocess_check_call(['perl', 'bin/prepare-refseqs.pl',
- '--fasta', self.genome_path])
-
- def _add_json(self, json_data):
- if len(json_data.keys()) == 0:
- return
-
- tmp = tempfile.NamedTemporaryFile(delete=False)
- tmp.write(json.dumps(json_data))
- tmp.close()
- cmd = ['perl', 'bin/add-track-json.pl', tmp.name,
- os.path.join('data', 'trackList.json')]
- self.subprocess_check_call(cmd)
- os.unlink(tmp.name)
-
- def add_blastxml(self, data, key, **kwargs):
- gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
- cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
- '--trim_end', '--min_gap', str(kwargs['min_gap']), data]
- subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_unrebased)
- gff3_unrebased.close()
+ def rgb_from_hex(self, hexstr):
+ # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
+ return struct.unpack('BBB',hexstr.decode('hex'))
- gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
- cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
- if kwargs['protein']:
- cmd.append('--protein2dna')
- cmd.extend([kwargs['parent'], gff3_unrebased.name])
- subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_rebased)
- gff3_rebased.close()
-
- label = hashlib.md5(data).hexdigest()
-
- red, green, blue = self._get_colours()
- color_function = COLOR_FUNCTION_TEMPLATE.substitute({
- 'score': "feature._parent.get('score')",
- 'opacity': BLAST_OPACITY_MATH,
- 'red': red,
- 'green': green,
- 'blue': blue,
- })
-
- clientConfig = {
- 'label': 'description',
- 'color': color_function.replace('\n', ''),
- 'description': 'Hit_titles',
- }
- config = {'glyph': 'JBrowse/View/FeatureGlyph/Segments'}
- if 'category' in kwargs:
- config['category'] = kwargs['category']
-
- cmd = ['perl', 'bin/flatfile-to-json.pl',
- '--gff', gff3_rebased.name,
- '--trackLabel', label,
- '--key', key,
- '--clientConfig', json.dumps(clientConfig),
- '--config', json.dumps(config),
- '--trackType', 'JBrowse/View/Track/CanvasFeatures'
- ]
-
- self.subprocess_check_call(cmd)
- os.unlink(gff3_rebased.name)
- os.unlink(gff3_unrebased.name)
-
- def _min_max_gff(self, gff_file):
+ def min_max_gff(self, gff_file):
min_val = None
max_val = None
with open(gff_file, 'r') as handle:
@@ -155,65 +146,296 @@
pass
return min_val, max_val
- def add_bigwig(self, data, key, **kwargs):
- label = hashlib.md5(data).hexdigest()
- dest = os.path.join('data', 'raw', os.path.basename(data))
- cmd = ['ln', '-s', data, dest]
- self.subprocess_check_call(cmd)
+ def hex_from_rgb(self, r, g, b):
+ return '#%02x%02x%02x' % (r, g, b)
+
+ def _get_colours(self):
+ r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx]
+ self.brewer_colour_idx += 1
+ return r, g, b
+
+ def parse_colours(self, track, trackFormat, gff3=None):
+ # Wiggle tracks have a bicolor pallete
+ trackConfig = {'style': {}}
+ if trackFormat == 'wiggle':
+
+ trackConfig['style']['pos_color'] = track['wiggle']['color_pos']
+ trackConfig['style']['neg_color'] = track['wiggle']['color_neg']
+
+ if trackConfig['style']['pos_color'] == '__auto__':
+ trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours())
+ trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours())
+
- track_data = {
- "label": label,
- "urlTemplate": os.path.join('..', dest),
- "bicolor_pivot": "zero",
- "storeClass": "JBrowse/Store/SeqFeature/BigWig",
- "type": "JBrowse/View/Track/Wiggle/Density",
- "key": key,
+ # Wiggle tracks can change colour at a specified place
+ bc_pivot = track['wiggle']['bicolor_pivot']
+ if bc_pivot not in ('mean', 'zero'):
+ # The values are either one of those two strings
+ # or a number
+ bc_pivot = float(bc_pivot)
+ trackConfig['bicolor_pivot'] = bc_pivot
+ elif 'scaling' in track:
+ if track['scaling']['method'] == 'ignore':
+ if track['scaling']['scheme']['color'] != '__auto__':
+ trackConfig['style']['color'] = track['scaling']['scheme']['color']
+ else:
+ trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours())
+ else:
+ # Scored method
+ algo = track['scaling']['algo']
+ # linear, logarithmic, blast
+ scales = track['scaling']['scales']
+ # type __auto__, manual (min, max)
+ scheme = track['scaling']['scheme']
+ # scheme -> (type (opacity), color)
+ # ==================================
+ # GENE CALLS OR BLAST
+ # ==================================
+ if trackFormat == 'blast':
+ red, green, blue = self._get_colours()
+ color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{
+ 'score': "feature._parent.get('score')",
+ 'opacity': self.OPACITY_MATH['blast'],
+ 'red': red,
+ 'green': green,
+ 'blue': blue,
+ })
+ trackConfig['style']['color'] = color_function.replace('\n', '')
+ elif trackFormat == 'gene_calls':
+ # Default values, based on GFF3 spec
+ min_val = 0
+ max_val = 1000
+ # Get min/max and build a scoring function since JBrowse doesn't
+ if scales['type'] == 'automatic':
+ min_val, max_val = self.min_max_gff(gff3)
+ else:
+ min_val = scales['min']
+ max_val = scales['max']
+
+ if scheme['color'] == '__auto__':
+ user_color = 'undefined'
+ auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
+ elif scheme['color'].startswith('#'):
+ user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:]))
+ auto_color = 'undefined'
+ else:
+ user_color = 'undefined'
+ auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
+
+ color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{
+ 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val,'min': min_val}),
+ 'user_spec_color': user_color,
+ 'auto_gen_color': auto_color,
+ })
+
+ trackConfig['style']['color'] = color_function.replace('\n', '')
+ return trackConfig
+
+
+def etree_to_dict(t):
+ d = {t.tag: {} if t.attrib else None}
+ children = list(t)
+ if children:
+ dd = defaultdict(list)
+ for dc in map(etree_to_dict, children):
+ for k, v in dc.iteritems():
+ dd[k].append(v)
+ d = {t.tag: {k:v[0] if len(v) == 1 else v for k, v in dd.iteritems()}}
+ if t.attrib:
+ d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems())
+ if t.text:
+ text = t.text.strip()
+ if children or t.attrib:
+ if text:
+ d[t.tag]['#text'] = text
+ else:
+ d[t.tag] = text
+ return d
+
+
+# score comes from feature._parent.get('score') or feature.get('score')
+
+INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
+
+
+class JbrowseConnector(object):
+
+ def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1):
+ self.TN_TABLE = {
+ 'gff3': '--gff',
+ 'gff': '--gff',
+ 'bed': '--bed',
+ 'genbank': '--gbk',
}
- track_data.update(kwargs)
+
+ self.cs = ColorScaling()
+ self.jbrowse = jbrowse
+ self.outdir = outdir
+ self.genome_paths = genomes
+ self.standalone = standalone
+ self.gencode = gencode
+
+ if standalone:
+ self.clone_jbrowse(self.jbrowse, self.outdir)
+ else:
+ try:
+ os.makedirs(self.outdir)
+ except OSError:
+ # Ignore if the folder exists
+ pass
+
+ self.process_genomes()
+ self.update_gencode()
+
+ def update_gencode(self):
+ table = CodonTable.unambiguous_dna_by_id[int(self.gencode)]
+ trackList = os.path.join(self.outdir, 'data', 'trackList.json')
+ with open(trackList, 'r') as handle:
+ trackListData = json.load(handle)
- if 'min' not in track_data and 'max' not in track_data \
- and 'autoscale' not in track_data:
- track_data['autoscale'] = 'local'
+ trackListData['tracks'][0].update({
+ 'codonStarts': table.start_codons,
+ 'codonStops': table.stop_codons,
+ 'codonTable': table.forward_table,
+ })
+
+ with open(trackList, 'w') as handle:
+ json.dump(trackListData, handle, indent=2)
+
+
+ def subprocess_check_call(self, command):
+ log.debug('cd %s && %s', self.outdir, ' '.join(command))
+ subprocess.check_call(command, cwd=self.outdir)
+
+ def _jbrowse_bin(self, command):
+ return os.path.realpath(os.path.join(self.jbrowse, 'bin', command))
+
+ def process_genomes(self):
+ for genome_path in self.genome_paths:
+ self.subprocess_check_call([
+ 'perl', self._jbrowse_bin('prepare-refseqs.pl'),
+ '--fasta', genome_path])
+
+ def _add_json(self, json_data):
+ if len(json_data.keys()) == 0:
+ return
+
+ tmp = tempfile.NamedTemporaryFile(delete=False)
+ tmp.write(json.dumps(json_data))
+ tmp.close()
+ cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name,
+ os.path.join('data', 'trackList.json')]
+ self.subprocess_check_call(cmd)
+ os.unlink(tmp.name)
- self._add_json(track_data)
+ def _add_track_json(self, json_data):
+ if len(json_data.keys()) == 0:
+ return
+
+ tmp = tempfile.NamedTemporaryFile(delete=False)
+ tmp.write(json.dumps(json_data))
+ tmp.close()
+ cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name,
+ os.path.join('data', 'trackList.json')]
+ self.subprocess_check_call(cmd)
+ os.unlink(tmp.name)
+
+
+ def _blastxml_to_gff3(self, xml, min_gap=10):
+ gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
+ cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
+ '--trim', '--trim_end', '--min_gap', str(min_gap), xml]
+ log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name)
+ subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
+ gff3_unrebased.close()
+ return gff3_unrebased.name
+
+ def add_blastxml(self, data, trackData, blastOpts, **kwargs):
+ gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap'])
- def add_bam(self, data, key, **kwargs):
- label = hashlib.md5(data).hexdigest()
- dest = os.path.join('data', 'raw', os.path.basename(data))
- # ln?
- cmd = ['ln', '-s', data, dest]
+ if 'parent' in blastOpts:
+ gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
+ cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
+ if blastOpts.get('protein', 'false') == 'true':
+ cmd.append('--protein2dna')
+ cmd.extend([os.path.realpath(blastOpts['parent']), gff3])
+ log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name)
+ subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
+ gff3_rebased.close()
+
+ # Replace original gff3 file
+ shutil.copy(gff3_rebased.name, gff3)
+ os.unlink(gff3_rebased.name)
+
+ config = {
+ 'glyph': 'JBrowse/View/FeatureGlyph/Segments',
+ "category": trackData['category'],
+ }
+
+ clientConfig = trackData['style']
+
+ cmd = ['perl', self._jbrowse_bin('flatfile-to-json.pl'),
+ '--gff', gff3,
+ '--trackLabel', trackData['label'],
+ '--key', trackData['key'],
+ '--clientConfig', json.dumps(clientConfig),
+ '--config', json.dumps(config),
+ '--trackType', 'JBrowse/View/Track/CanvasFeatures'
+ ]
+
+ self.subprocess_check_call(cmd)
+ os.unlink(gff3)
+
+ def add_bigwig(self, data, trackData, wiggleOpts, **kwargs):
+ dest = os.path.join('data', 'raw', trackData['label'] + '.bw')
+ cmd = ['ln', data, dest]
self.subprocess_check_call(cmd)
- bai_source = kwargs['bam_index']
- cmd = ['ln', '-s', bai_source, dest + '.bai']
+ trackData.update({
+ "urlTemplate": os.path.join('..', dest),
+ "storeClass": "JBrowse/Store/SeqFeature/BigWig",
+ "type": "JBrowse/View/Track/Wiggle/Density",
+ })
+
+ trackData['type'] = wiggleOpts['type']
+ trackData['variance_band'] = True if wiggleOpts['variance_band'] == 'true' else False
+
+ if 'min' in wiggleOpts and 'max' in wiggleOpts:
+ trackData['min_score'] = wiggleOpts['min']
+ trackData['max_score'] = wiggleOpts['max']
+ else:
+ trackData['autoscale'] = wiggleOpts.get('autoscale', 'local')
+
+ self._add_track_json(trackData)
+
+ def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
+ dest = os.path.join('data', 'raw', trackData['label'] + '.bam')
+ cmd = ['ln', '-s', os.path.realpath(data), dest]
self.subprocess_check_call(cmd)
- track_data = {
+ cmd = ['ln', '-s', os.path.realpath(bam_index), dest + '.bai']
+ self.subprocess_check_call(cmd)
+
+ trackData.update({
"urlTemplate": os.path.join('..', dest),
- "key": key,
- "label": label,
"type": "JBrowse/View/Track/Alignments2",
"storeClass": "JBrowse/Store/SeqFeature/BAM",
- }
- if 'category' in kwargs:
- track_data['category'] = kwargs['category']
- self._add_json(track_data)
+ })
+
- if kwargs.get('auto_snp', False):
- track_data = {
- "storeClass": "JBrowse/Store/SeqFeature/BAM",
- "urlTemplate": os.path.join('..', dest),
+ self._add_track_json(trackData)
+
+ if bamOpts.get('auto_snp', 'false') == 'true':
+ trackData2 = copy.copy(trackData)
+ trackData2.update({
"type": "JBrowse/View/Track/SNPCoverage",
- "key": key + " - SNPs/Coverage",
- "label": label + "_autosnp",
- }
- if 'category' in kwargs:
- track_data['category'] = kwargs['category']
- self._add_json(track_data)
+ "key": trackData['key'] + " - SNPs/Coverage",
+ "label": trackData['label'] + "_autosnp",
+ })
+ self._add_track_json(trackData2)
- def add_vcf(self, data, key, **kwargs):
- label = hashlib.md5(data).hexdigest()
- dest = os.path.join('data', 'raw', os.path.basename(data))
+ def add_vcf(self, data, trackData, vcfOpts={}, **kwargs):
+ dest = os.path.join('data', 'raw', trackData['label'] + '.vcf')
# ln?
cmd = ['ln', '-s', data, dest]
self.subprocess_check_call(cmd)
@@ -222,135 +444,146 @@
cmd = ['tabix', '-p', 'vcf', dest + '.gz']
self.subprocess_check_call(cmd)
- track_data = {
- "key": key,
- "label": label,
+ trackData.update({
"urlTemplate": os.path.join('..', dest + '.gz'),
"type": "JBrowse/View/Track/HTMLVariants",
"storeClass": "JBrowse/Store/SeqFeature/VCFTabix",
- }
- track_data.update(kwargs)
- self._add_json(track_data)
-
- def add_features(self, data, key, format, **kwargs):
- label = hashlib.md5(data).hexdigest()
- cmd = ['perl', 'bin/flatfile-to-json.pl',
- TN_TABLE.get(format, 'gff'), data,
- '--trackLabel', label,
- '--key', key]
-
- config = {}
- if 'category' in kwargs:
- config['category'] = kwargs['category']
-
- if kwargs.get('match', False):
- clientConfig = {
- 'label': 'description',
- 'description': 'Hit_titles',
- }
-
- # Get min/max and build a scoring function since JBrowse doesn't
- min_val, max_val = self._min_max_gff(data)
+ })
+ self._add_track_json(trackData)
- if min_val is not None and max_val is not None:
- MIN_MAX_OPACITY_MATH = Template("""
- var opacity = (score - ${min}) * (1/(${max} - ${min}));
- """).substitute({
- 'max': max_val,
- 'min': min_val,
- })
+ def add_features(self, data, format, trackData, gffOpts, **kwargs):
+ cmd = [
+ 'perl', self._jbrowse_bin('flatfile-to-json.pl'),
+ self.TN_TABLE.get(format, 'gff'),
+ data,
+ '--trackLabel', trackData['label'],
+ '--trackType', 'JBrowse/View/Track/CanvasFeatures',
+ '--key', trackData['key']
+ ]
- red, green, blue = self._get_colours()
- color_function = COLOR_FUNCTION_TEMPLATE.substitute({
- 'score': "feature.get('score')",
- 'opacity': MIN_MAX_OPACITY_MATH,
- 'red': red,
- 'green': green,
- 'blue': blue,
- })
+ config = copy.copy(trackData)
+ clientConfig = trackData['style']
+ del config['style']
- clientConfig['color'] = color_function.replace('\n', '')
-
+ if 'match' in gffOpts:
config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'
+ cmd += ['--type', gffOpts['match']]
- cmd += ['--clientConfig', json.dumps(clientConfig),
- '--trackType', 'JBrowse/View/Track/CanvasFeatures'
- ]
+ cmd += ['--clientConfig', json.dumps(clientConfig),
+ '--trackType', 'JBrowse/View/Track/CanvasFeatures'
+ ]
cmd.extend(['--config', json.dumps(config)])
self.subprocess_check_call(cmd)
- def process_annotations(self, data, key, format, **kwargs):
- if format in ('gff', 'gff3', 'bed'):
- self.add_features(data, key, format, **kwargs)
- elif format == 'bigwig':
- self.add_bigwig(data, key, **kwargs)
- elif format == 'bam':
- self.add_bam(data, key, **kwargs)
- elif format == 'blastxml':
- self.add_blastxml(data, key, **kwargs)
- elif format == 'vcf':
- self.add_vcf(data, key, **kwargs)
+
+ def process_annotations(self, track):
+ outputTrackConfig = {
+ 'style': {
+ 'label': track['style'].get('label', 'description'),
+ 'className': track['style'].get('className', 'feature'),
+ 'description': track['style'].get('description', ''),
+ },
+ 'category': track['category'],
+ }
+
+ for i, (dataset_path, dataset_ext, track_human_label) in enumerate(track['trackfiles']):
+ log.info('Processing %s / %s', track['category'], track_human_label)
+ outputTrackConfig['key'] = track_human_label
+ hashData = [dataset_path, track_human_label, track['category']]
+ outputTrackConfig['label'] = hashlib.md5('|'.join(hashData)).hexdigest() + '_%s' % i
+
+ # Colour parsing is complex due to different track types having
+ # different colour options.
+ colourOptions = self.cs.parse_colours(track['conf']['options'], track['format'], gff3=dataset_path)
+ # This used to be done with a dict.update() call, however that wiped out any previous style settings...
+ for key in colourOptions:
+ if key == 'style':
+ for subkey in colourOptions['style']:
+ outputTrackConfig['style'][subkey] = colourOptions['style'][subkey]
+ else:
+ outputTrackConfig[key] = colourOptions[key]
+
+ if dataset_ext in ('gff', 'gff3', 'bed'):
+ self.add_features(dataset_path, dataset_ext, outputTrackConfig,
+ track['conf']['options']['gff'])
+ elif dataset_ext == 'bigwig':
+ self.add_bigwig(dataset_path, outputTrackConfig,
+ track['conf']['options']['wiggle'])
+ elif dataset_ext == 'bam':
+ real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index']
+ if not isinstance(real_indexes, list):
+ #
+ # /path/to/a.bam.bai
+ #
+ #
+ # The above will result in the 'bam_index' key containing a
+ # string. If there are two or more indices, the container
+ # becomes a list. Fun!
+ real_indexes = [real_indexes]
+
+ self.add_bam(dataset_path, outputTrackConfig,
+ track['conf']['options']['pileup'],
+ bam_index=real_indexes[i])
+ elif dataset_ext == 'blastxml':
+ self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast'])
+ elif dataset_ext == 'vcf':
+ self.add_vcf(dataset_path, outputTrackConfig)
def clone_jbrowse(self, jbrowse_dir, destination):
+ """Clone a JBrowse directory into a destination directory.
+ """
# JBrowse seems to have included some bad symlinks, cp ignores bad symlinks
# unlike copytree
- cmd = ['mkdir', '-p', destination]
+ cmd = ['cp', '-r', os.path.join(jbrowse_dir, '.'), destination]
+ log.debug(' '.join(cmd))
subprocess.check_call(cmd)
- cmd = ['cp', '-r', jbrowse_dir, destination]
- subprocess.check_call(cmd)
- cmd = ['mkdir', '-p', os.path.join(destination, 'JBrowse-1.11.6',
- 'data', 'raw')]
+ cmd = ['mkdir', '-p', os.path.join(destination, 'data', 'raw')]
+ log.debug(' '.join(cmd))
subprocess.check_call(cmd)
# http://unix.stackexchange.com/a/38691/22785
# JBrowse releases come with some broken symlinks
cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+']
+ log.debug(' '.join(cmd))
subprocess.check_call(cmd)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="", epilog="")
- parser.add_argument('genome', type=file, help='Input genome file')
- parser.add_argument('yaml', type=file, help='Track Configuration')
+ parser.add_argument('xml', type=file, help='Track Configuration')
parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
parser.add_argument('--outdir', help='Output directory', default='out')
+ parser.add_argument('--standalone', help='Standalone mode includes a copy of JBrowse', action='store_true')
args = parser.parse_args()
+ tree = ET.parse(args.xml.name)
+ root = tree.getroot()
+
jc = JbrowseConnector(
jbrowse=args.jbrowse,
- jbrowse_dir=os.path.join(args.outdir, 'JBrowse-1.11.6'),
outdir=args.outdir,
- genome=os.path.realpath(args.genome.name),
+ genomes=[os.path.realpath(x.text) for x in root.findall('metadata/genomes/genome')],
+ standalone=args.standalone,
+ gencode=root.find('metadata/gencode').text
)
- track_data = yaml.load(args.yaml)
- for track in track_data:
- path = os.path.realpath(track['file'])
- extra = track.get('options', {})
- if '__unused__' in extra:
- del extra['__unused__']
-
- for possible_partial_path in ('bam_index', 'parent'):
- if possible_partial_path in extra:
- extra[possible_partial_path] = os.path.realpath(
- extra[possible_partial_path])
- extra['category'] = track.get('category', 'Default')
+ for track in root.findall('tracks/track'):
+ track_conf = {}
+ track_conf['trackfiles'] = [
+ (os.path.realpath(x.attrib['path']), x.attrib['ext'], x.attrib['label'])
+ for x in track.findall('files/trackFile')
+ ]
- jc.process_annotations(path, track['label'], track['ext'], **extra)
-
- print """
-
-
-
- Go to JBrowse
- Please note that JBrowse functions best on production Galaxy
- instances. The paste server used in development instances has issues
- serving the volumes of data regularly involved in JBrowse
-
-
- """
+ track_conf['category'] = track.attrib['cat']
+ track_conf['format'] = track.attrib['format']
+ try:
+ # Only pertains to gff3 + blastxml. TODO?
+ track_conf['style'] = {t.tag: t.text for t in track.find('options/style')}
+ except TypeError, te:
+ track_conf['style'] = {}
+ pass
+ track_conf['conf'] = etree_to_dict(track.find('options'))
+ jc.process_annotations(track_conf)
diff -r b6a0e126dbee -r 7342f467507b jbrowse.xml
--- a/jbrowse.xml Tue Jun 23 12:10:15 2015 -0400
+++ b/jbrowse.xml Thu Dec 31 13:58:43 2015 -0500
@@ -1,4 +1,4 @@
-
+
genome browser
macros.xml
@@ -6,106 +6,317 @@
python jbrowse.py --version
- $default]]>
+#if str($standalone) == "Complete":
+ --standalone
+#end if
+--outdir $output.files_path;
+
+#if str($standalone) == "Complete":
+ mv $output.files_path/index.html $output;
+#else:
+ mv $dummyIndex $output;
+#end if
+
+
+## Ugly testing hack since I cannot get to test the files I want to test. Hmph.
+#if str($uglyTestingHack) == "enabled":
+ mv $trackxml $output
+#end if
+]]>
-
----
-#for $track in $data_tracks:
- -
- file: ${track.data_format.annotation}
- ext: ${track.data_format.annotation.ext}
- label: "${track.annotation_label}"
- category: "${track.category}"
- options:
- __unused__: "Not used...just to ensure options has at least one key"
- #if str($track.data_format.data_format_select) == "wiggle":
- type: ${track.data_format.xyplot}
- variance_band: ${track.data_format.var_band}
- #if str($track.data_format.scaling.scale_select) == "auto_local":
- autoscale: local
- #else if str($track.data_format.scaling.scale_select) == "auto_global":
- autoscale: global
- #else:
- min: ${track.data_format.scaling.minimum}
- max: ${track.data_format.scaling.maximum}
- #end if
- #else if str($track.data_format.data_format_select) == "pileup":
- auto_snp: ${track.data_format.auto_snp}
- bam_index: ${track.data_format.annotation.metadata.bam_index}
- #else if str($track.data_format.data_format_select) == "blast":
- parent: ${track.data_format.blast_parent}
- protein: ${track.data_format.is_protein}
- min_gap: ${track.data_format.min_gap}
- match: true
- #else if str($track.data_format.data_format_select) == "gene_calls":
- match: ${track.data_format.match_part}
- #end if
-#end for
+
+
+
+
+
+ JBrowse Data Directory
+
+ Hi! This is not a full JBrowse instance. JBrowse v0.4(+?)
+ started shipping with the ability to produce just the
+ "data" directory from a JBrowse instance, rather than a
+ complete, standalone instance. This was intended to be used
+ with the in-development Apollo integration, but may have other
+ uses as well.
+
+
+ This is not usable on its own. The output dataset may be
+ used with Apollo, or may be passed through the "JBrowse -
+ Convert to Standalone" tool in Galaxy to "upgrade" to a full
+ JBrowse instance.
+
+
+