view jbrowse.py @ 1:497c6bb3b717 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
author iuc
date Thu, 18 Jun 2015 12:10:51 -0400
parents 2c9e5136b416
children 7342f467507b
line wrap: on
line source

#!/usr/bin/env python
from string import Template
import os
import argparse
import subprocess
import hashlib
import tempfile
import json
import yaml
import logging
logging.basicConfig()
log = logging.getLogger(__name__)

COLOR_FUNCTION_TEMPLATE = Template("""
function(feature, variableName, glyphObject, track) {
    var score = ${score};
    ${opacity}
    return 'rgba(${red}, ${green}, ${blue}, ' + opacity + ')';
}
""")

BLAST_OPACITY_MATH = """
var opacity = 0;
if(score == 0.0) {
    opacity = 1;
} else{
    opacity = (20 - Math.log10(score)) / 180;
}
"""

BREWER_COLOUR_IDX = 0
BREWER_COLOUR_SCHEMES = [
    (228, 26, 28),
    (55, 126, 184),
    (77, 175, 74),
    (152, 78, 163),
    (255, 127, 0),
]


# score comes from feature._parent.get('score') or feature.get('score')
# Opacity math

TN_TABLE = {
    'gff3': '--gff',
    'gff': '--gff',
    'bed': '--bed',
    # 'genbank': '--gbk',
}

INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))


class JbrowseConnector(object):

    def __init__(self, jbrowse, jbrowse_dir, outdir, genome):
        self.jbrowse = jbrowse
        self.jbrowse_dir = jbrowse_dir
        self.outdir = outdir
        self.genome_path = genome
        self.brewer_colour_idx = 0

        self.clone_jbrowse(self.jbrowse, self.outdir)
        self.process_genome()

    def subprocess_check_call(self, command):
        log.debug('cd %s && %s', self.jbrowse_dir, ' '.join(command))
        subprocess.check_call(command, cwd=self.jbrowse_dir)

    def _get_colours(self):
        r, g, b = BREWER_COLOUR_SCHEMES[self.brewer_colour_idx]
        self.brewer_colour_idx += 1
        return r, g, b

    def process_genome(self):
        self.subprocess_check_call(['perl', 'bin/prepare-refseqs.pl',
                                    '--fasta', self.genome_path])

    def _add_json(self, json_data):
        if len(json_data.keys()) == 0:
            return

        tmp = tempfile.NamedTemporaryFile(delete=False)
        tmp.write(json.dumps(json_data))
        tmp.close()
        cmd = ['perl', 'bin/add-track-json.pl', tmp.name,
               os.path.join('data', 'trackList.json')]
        self.subprocess_check_call(cmd)
        os.unlink(tmp.name)

    def add_blastxml(self, data, key, **kwargs):
        gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
        cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
               '--trim_end', '--min_gap', str(kwargs['min_gap']), data]
        subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_unrebased)
        gff3_unrebased.close()

        gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
        cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
        if kwargs['protein']:
            cmd.append('--protein2dna')
        cmd.extend([kwargs['parent'], gff3_unrebased.name])
        subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_rebased)
        gff3_rebased.close()

        label = hashlib.md5(data).hexdigest()

        red, green, blue = self._get_colours()
        color_function = COLOR_FUNCTION_TEMPLATE.substitute({
            'score': "feature._parent.get('score')",
            'opacity': BLAST_OPACITY_MATH,
            'red': red,
            'green': green,
            'blue': blue,
        })

        clientConfig = {
            'label': 'description',
            'color': color_function.replace('\n', ''),
            'description': 'Hit_titles',
        }
        config = {'glyph': 'JBrowse/View/FeatureGlyph/Segments'}
        if 'category' in kwargs:
            config['category'] = kwargs['category']

        cmd = ['perl', 'bin/flatfile-to-json.pl',
               '--gff', gff3_rebased.name,
               '--trackLabel', label,
               '--key', key,
               '--clientConfig', json.dumps(clientConfig),
               '--config', json.dumps(config),
               '--trackType', 'JBrowse/View/Track/CanvasFeatures'
               ]

        self.subprocess_check_call(cmd)
        os.unlink(gff3_rebased.name)
        os.unlink(gff3_unrebased.name)

    def _min_max_gff(self, gff_file):
        min_val = None
        max_val = None
        with open(gff_file, 'r') as handle:
            for line in handle:
                try:
                    value = float(line.split('\t')[5])
                    min_val = min(value, (min_val or value))
                    max_val = max(value, (max_val or value))

                    if value < min_val:
                        min_val = value

                    if value > max_val:
                        max_val = value
                except Exception:
                    pass
        return min_val, max_val

    def add_bigwig(self, data, key, **kwargs):
        label = hashlib.md5(data).hexdigest()
        dest = os.path.join('data', 'raw', os.path.basename(data))
        cmd = ['ln', '-s', data, dest]
        self.subprocess_check_call(cmd)

        track_data = {
            "label": label,
            "urlTemplate": os.path.join('..', dest),
            "bicolor_pivot": "zero",
            "storeClass": "JBrowse/Store/SeqFeature/BigWig",
            "type": "JBrowse/View/Track/Wiggle/Density",
            "key": key,
        }
        track_data.update(kwargs)

        if 'min' not in track_data and 'max' not in track_data \
                and 'autoscale' not in track_data:
            track_data['autoscale'] = 'local'

        self._add_json(track_data)

    def add_bam(self, data, key, **kwargs):
        label = hashlib.md5(data).hexdigest()
        dest = os.path.join('data', 'raw', os.path.basename(data))
        # ln?
        cmd = ['ln', '-s', data, dest]
        self.subprocess_check_call(cmd)

        bai_source = kwargs['bam_index']
        cmd = ['ln', '-s', bai_source, dest + '.bai']
        self.subprocess_check_call(cmd)

        track_data = {
            "urlTemplate": os.path.join('..', dest),
            "key": key,
            "label": label,
            "type": "JBrowse/View/Track/Alignments2",
            "storeClass": "JBrowse/Store/SeqFeature/BAM",
        }
        if 'category' in kwargs:
            track_data['category'] = kwargs['category']
        self._add_json(track_data)

        if kwargs.get('auto_snp', False):
            track_data = {
                "storeClass": "JBrowse/Store/SeqFeature/BAM",
                "urlTemplate": os.path.join('..', dest),
                "type": "JBrowse/View/Track/SNPCoverage",
                "key": key + " - SNPs/Coverage",
                "label": label + "_autosnp",
            }
            if 'category' in kwargs:
                track_data['category'] = kwargs['category']
            self._add_json(track_data)

    def add_vcf(self, data, key, **kwargs):
        label = hashlib.md5(data).hexdigest()
        dest = os.path.join('data', 'raw', os.path.basename(data))
        # ln?
        cmd = ['ln', '-s', data, dest]
        self.subprocess_check_call(cmd)
        cmd = ['bgzip', dest]
        self.subprocess_check_call(cmd)
        cmd = ['tabix', '-p', 'vcf', dest + '.gz']
        self.subprocess_check_call(cmd)

        track_data = {
            "key": key,
            "label": label,
            "urlTemplate": os.path.join('..', dest + '.gz'),
            "type": "JBrowse/View/Track/HTMLVariants",
            "storeClass": "JBrowse/Store/SeqFeature/VCFTabix",
        }
        track_data.update(kwargs)
        self._add_json(track_data)

    def add_features(self, data, key, format, **kwargs):
        label = hashlib.md5(data).hexdigest()
        cmd = ['perl', 'bin/flatfile-to-json.pl',
               TN_TABLE.get(format, 'gff'), data,
               '--trackLabel', label,
               '--key', key]

        config = {}
        if 'category' in kwargs:
            config['category'] = kwargs['category']

        if kwargs.get('match', False):
            clientConfig = {
                'label': 'description',
                'description': 'Hit_titles',
            }

            # Get min/max and build a scoring function since JBrowse doesn't
            min_val, max_val = self._min_max_gff(data)

            if min_val is not None and max_val is not None:
                MIN_MAX_OPACITY_MATH = Template("""
                var opacity = (score - ${min}) * (1/(${max} - ${min}));
                """).substitute({
                    'max': max_val,
                    'min': min_val,
                })

                red, green, blue = self._get_colours()
                color_function = COLOR_FUNCTION_TEMPLATE.substitute({
                    'score': "feature.get('score')",
                    'opacity': MIN_MAX_OPACITY_MATH,
                    'red': red,
                    'green': green,
                    'blue': blue,
                })

                clientConfig['color'] = color_function.replace('\n', '')

            config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'

            cmd += ['--clientConfig', json.dumps(clientConfig),
                    '--trackType', 'JBrowse/View/Track/CanvasFeatures'
                    ]

        cmd.extend(['--config', json.dumps(config)])

        self.subprocess_check_call(cmd)

    def process_annotations(self, data, key, format, **kwargs):
        if format in ('gff', 'gff3', 'bed'):
            self.add_features(data, key, format, **kwargs)
        elif format == 'bigwig':
            self.add_bigwig(data, key, **kwargs)
        elif format == 'bam':
            self.add_bam(data, key, **kwargs)
        elif format == 'blastxml':
            self.add_blastxml(data, key, **kwargs)
        elif format == 'vcf':
            self.add_vcf(data, key, **kwargs)

    def clone_jbrowse(self, jbrowse_dir, destination):
        # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks
        # unlike copytree
        cmd = ['mkdir', '-p', destination]
        subprocess.check_call(cmd)
        cmd = ['cp', '-r', jbrowse_dir, destination]
        subprocess.check_call(cmd)
        cmd = ['mkdir', '-p', os.path.join(destination, 'JBrowse-1.11.6',
                                           'data', 'raw')]
        subprocess.check_call(cmd)

        # http://unix.stackexchange.com/a/38691/22785
        # JBrowse releases come with some broken symlinks
        cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+']
        subprocess.check_call(cmd)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="", epilog="")
    parser.add_argument('genome', type=file, help='Input genome file')
    parser.add_argument('yaml', type=file, help='Track Configuration')

    parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
    parser.add_argument('--outdir', help='Output directory', default='out')
    args = parser.parse_args()

    jc = JbrowseConnector(
        jbrowse=args.jbrowse,
        jbrowse_dir=os.path.join(args.outdir, 'JBrowse-1.11.6'),
        outdir=args.outdir,
        genome=os.path.realpath(args.genome.name),
    )

    track_data = yaml.load(args.yaml)
    for track in track_data:
        path = os.path.realpath(track['file'])
        extra = track.get('options', {})
        if '__unused__' in extra:
            del extra['__unused__']

        for possible_partial_path in ('bam_index', 'parent'):
            if possible_partial_path in extra:
                extra[possible_partial_path] = os.path.realpath(
                    extra[possible_partial_path])
        extra['category'] = track.get('category', 'Default')

        jc.process_annotations(path, track['label'], track['ext'], **extra)

    print """
    <html>
        <body>
        <script type="text/javascript">
            window.location=JBrowse-1.11.6/index.html
        </script>
        <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a>
        <p>Please note that JBrowse functions best on production Galaxy
        instances. The paste server used in development instances has issues
        serving the volumes of data regularly involved in JBrowse</p>
        </body>
    </html>
    """