# HG changeset patch
# User yating-l
# Date 1492033315 14400
# Node ID 804a93e87cc8ef7355bb9bd567b2f414872705b9
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
diff -r 000000000000 -r 804a93e87cc8 TrackHub.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TrackHub.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+
+import os
+import subprocess
+import shutil
+import json
+import utils
+
+
+class TrackHub:
+    def __init__(self, inputFiles, reference, outputDirect, tool_dir, genome, extra_files_path, metaData, jbrowse_host):
+        self.input_files = inputFiles.tracks
+        self.outfile = outputDirect
+        self.outfolder = extra_files_path
+        self.out_path = os.path.join(extra_files_path, genome)
+        self.reference = reference
+        self.tool_dir = tool_dir
+        self.metaData = metaData
+        self.raw = os.path.join(self.out_path, 'raw')
+        self.json = os.path.join(self.out_path, 'json')
+        self.jbrowse_host = jbrowse_host
+        try: 
+            if os.path.exists(self.json):
+                shutil.rmtree(self.json)
+            os.makedirs(self.json)
+        except OSError as e:
+            print "Cannot create json folder error({0}): {1}".format(e.errno, e.strerror)
+        else:
+            print "Create jbrowse folder {}".format(self.out_path)
+    
+    def createHub(self):
+        self.prepareRefseq()
+        for input_file in self.input_files:
+            self.addTrack(input_file)
+        self.indexName()
+        slink = self.makeArchive()
+        self.outHtml(slink)
+        print "Success!\n"
+    
+    def prepareRefseq(self):
+        try:
+            #print os.path.join(self.tool_dir, 'prepare-refseqs.pl') + ", '--fasta', " + self.reference +", '--out', self.json])"
+            subprocess.call(['prepare-refseqs.pl', '--fasta', self.reference, '--out', self.json])
+        except OSError as e:
+            print "Cannot prepare reference error({0}): {1}".format(e.errno, e.strerror)
+    #TODO: hard coded the bam and bigwig tracks. Need to allow users to customize the settings
+    def addTrack(self, track):
+        #print "false_path" , track['false_path']
+        if track['false_path'] in self.metaData.keys():
+            metadata = self.metaData[track['false_path']]
+        else:
+            metadata = {}
+        self.SetMetadata(track, metadata)
+        if track['dataType'] == 'bam':
+            self.Bam(track, metadata)
+           # print "add bam track\n"
+        elif track['dataType'] == 'bigwig':
+            self.BigWig(track, metadata)
+        else: 
+            flat_file = os.path.join(self.raw, track['fileName'])
+            if track['dataType'] == 'bed':
+                subprocess.call(['flatfile-to-json.pl', '--bed', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
+            elif track['dataType'] == 'bedSpliceJunctions' or track['dataType'] == 'gtf' or track['dataType'] == 'blastxml':
+                subprocess.call(['flatfile-to-json.pl', '--gff', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"glyph": "JBrowse/View/FeatureGlyph/Segments", "category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
+            elif track['dataType'] == 'gff3_transcript':
+                subprocess.call(['flatfile-to-json.pl', '--gff', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"transcriptType": "transcript", "category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
+            else:
+                subprocess.call(['flatfile-to-json.pl', '--gff', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
+            
+    def indexName(self):
+        subprocess.call(['generate-names.pl', '-v', '--out', self.json])
+        print "finished name index \n"
+
+    def makeArchive(self):
+        shutil.make_archive(self.out_path, 'zip', self.out_path)
+        file_dir = os.path.abspath(self.outfile)
+        source_dir = os.path.dirname(file_dir)
+        folder_name = os.path.basename(self.outfolder)
+        source_name = os.path.basename(self.out_path)
+        source = os.path.join(source_dir, folder_name, source_name)
+        slink = source.replace('/', '_')
+        slink = os.path.join('/var/www/html/JBrowse-1.12.1/data', slink)
+        try:
+            if os.path.islink(slink):
+                os.unlink(slink)
+        except OSError as oserror:
+            print "Cannot create symlink to the data({0}): {1}".format(oserror.errno, oserror.strerror)
+        os.symlink(source, slink)
+        return slink
+        '''
+        data_folder = '/gonramp/static/JBrowse-1.12.1/jbrowse_hub'
+        try:
+            if os.path.exists(data_folder):
+                if os.path.isdir(data_folder):
+                    shutil.rmtree(data_folder)
+                else:
+                    os.remove(data_folder)
+        except OSError as oserror:
+            print "Cannot create data folder({0}): {1}".format(oserror.errno, oserror.strerror)
+        shutil.copytree(self.out_path, data_folder)
+        subprocess.call(['chmod', '-R', 'o+rx', '/var/www/html/JBrowse-1.12.1/jbrowse_hub'])
+        shutil.rmtree(self.out_path)
+        '''
+    
+    #TODO: this will list all zip files in the filedir and sub-dirs. worked in Galaxy but all list zip files in test-data when
+    #run it locally. May need modify
+    def outHtml(self, slink):
+        with open(self.outfile, 'w') as htmlfile:
+            htmlstr = 'The JBrowse Hub is created: 
'
+            zipfiles = '
Download'
+            url = self.jbrowse_host + "/JBrowse-1.12.1/index.html?data=%s"
+            jbrowse_hub = 'View JBrowse Hub' % url
+            filedir_abs = os.path.abspath(self.outfile)
+            filedir = os.path.dirname(filedir_abs)
+            filedir = os.path.join(filedir, self.outfolder)
+            for root, dirs, files in os.walk(filedir):
+                for file in files:
+                    if file.endswith('.zip'):   
+                        relative_directory = os.path.relpath(root, filedir)
+                        relative_file_path = os.path.join(relative_directory, file)
+                        htmlstr += zipfiles % relative_file_path
+            link_name = os.path.basename(slink)
+            relative_path = os.path.join('data', link_name + '/json')
+            htmlstr += jbrowse_hub % relative_path
+            htmlfile.write(htmlstr)
+
+    def createTrackList(self):
+        trackList = os.path.join(self.json, "trackList.json")
+        if not os.path.exists(trackList):
+            os.mknod(trackList)
+    
+    def Bam(self, track, metadata):
+        #create trackList.json if not exist
+        self.createTrackList()
+        json_file = os.path.join(self.json, "trackList.json")
+        bam_track = dict()
+        bam_track['type'] = 'JBrowse/View/Track/Alignments2'
+        bam_track['storeClass'] = 'JBrowse/Store/SeqFeature/BAM'
+        bam_track['urlTemplate'] = os.path.join('../raw', track['fileName'])
+        bam_track['baiUrlTemplate'] = os.path.join('../raw', track['index'])
+        bam_track['label'] = metadata['label']
+        bam_track['category'] = metadata['category']
+        bam_track = json.dumps(bam_track)
+        #Use add-track-json.pl to add bam track to json file
+        new_track = subprocess.Popen(['echo', bam_track], stdout=subprocess.PIPE)
+        subprocess.call(['add-track-json.pl', json_file], stdin=new_track.stdout)
+    
+    def BigWig(self, track, metadata):
+        #create trackList.json if not exist
+        self.createTrackList()
+        json_file = os.path.join(self.json, "trackList.json")
+        bigwig_track = dict()
+        bigwig_track['urlTemplate'] = os.path.join('../raw', track['fileName'])
+        bigwig_track['type'] = 'JBrowse/View/Track/Wiggle/XYPlot'
+        bigwig_track['storeClass'] = 'JBrowse/Store/SeqFeature/BigWig'
+        bigwig_track['label'] = metadata['label']
+        bigwig_track['style'] = metadata['style']
+        bigwig_track['category'] = metadata['category']
+        bigwig_track = json.dumps(bigwig_track)
+        #Use add-track-json.pl to add bigwig track to json file
+        new_track = subprocess.Popen(['echo', bigwig_track], stdout=subprocess.PIPE)
+        #output = new_track.communicate()[0]
+        subprocess.call(['add-track-json.pl', json_file], stdin=new_track.stdout)
+
+    #If the metadata is not set, use the default value
+    def SetMetadata(self, track, metadata):
+        if 'label' not in metadata.keys() or metadata['label'] == '':
+            metadata['label'] = track['fileName']
+        if 'color' not in metadata.keys() or metadata['color'] == '':
+            metadata['color'] = "#daa520"
+        if track['dataType'] == 'bigwig':
+            if 'style' not in metadata.keys():
+                metadata['style'] = {}
+            if 'pos_color' not in metadata['style'] or metadata['style']['pos_color'] == '':
+                metadata['style']['pos_color'] = "#FFA600"
+            if 'neg_color' not in metadata['style'] or metadata['style']['neg_color'] == '':
+                metadata['style']['neg_color'] = "#005EFF"
+        if 'category' not in metadata.keys() or metadata['category'] == '':
+            metadata['category'] = "Default group"
+        if track['dataType'] == 'blastxml':
+            metadata['type'] = "G-OnRamp_plugin/BlastAlignment"
+        elif track['dataType'] == 'gff3_transcript' or track['dataType'] == 'gff3_mrna':
+            metadata['type'] = "G-OnRamp_plugin/GenePred"
+        else:
+            metadata['type'] = "CanvasFeatures"
+
+
+
+   
+
+
+
diff -r 000000000000 -r 804a93e87cc8 bedToGff3.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bedToGff3.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+'''
+Convert BED format to gff3
+'''
+import os
+from collections import OrderedDict
+import utils
+
+class bedToGff3():
+    def __init__(self, inputBedFile, chrom_sizes, bed_type, output):
+        self.input = inputBedFile
+        #file_dir = os.path.basename(inputBedFile)
+        #print file_dir + "\n\n"
+        self.output = output
+        self.chrom_sizes = chrom_sizes
+        self.type = bed_type
+        if self.type == "trfbig":
+            self.trfbig_to_gff3()
+        if self.type == "regtools":
+            self.splicejunctions_to_gff3()
+
+    def trfbig_to_gff3(self):
+        gff3 = open(self.output, 'w')
+        gff3.write("##gff-version 3\n")
+        sizes_dict = utils.sequence_region(self.chrom_sizes)
+        seq_regions = dict()
+        with open(self.input, 'r') as bed:
+            for line in bed:
+                field = OrderedDict()
+                attribute = OrderedDict()
+                li = line.rstrip().split("\t")
+                field['seqid'] = li[0]
+                if field['seqid'] not in seq_regions:
+                    end_region = sizes_dict[field['seqid']]
+                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                    seq_regions[field['seqid']] = end_region
+                field['source'] = li[3]
+                field['type'] = 'tandem_repeat'
+                # The first base in a chromosome is numbered 0 in BED format
+                field['start'] = str(int(li[1]) + 1)
+                field['end'] = li[2]
+                field['score'] = li[9]
+                field['strand'] = '+'
+                field['phase'] = '.'
+                attribute['length of repeat unit'] = li[4]
+                attribute['mean number of copies of repeat'] = li[5]
+                attribute['length of consensus sequence'] = li[6]
+                attribute['percentage match'] = li[7]
+                attribute['percentage indel'] = li[8]
+                attribute['percent of a\'s in repeat unit'] = li[10]
+                attribute['percent of c\'s in repeat unit'] = li[11]
+                attribute['percent of g\'s in repeat unit'] = li[12]
+                attribute['percent of t\'s in repeat unit'] = li[13]
+                attribute['entropy'] = li[14]
+                attribute['sequence of repeat unit element'] = li[15]
+                utils.write_features(field, attribute, gff3)
+        gff3.close()
+
+
+    def splicejunctions_to_gff3(self):
+        gff3 = open(self.output, 'w')
+        gff3.write("##gff-version 3\n")
+        sizes_dict = utils.sequence_region(self.chrom_sizes)
+        seq_regions = dict()
+        with open(self.input, 'r') as bed:
+            for line in bed:
+                field = OrderedDict()
+                attribute = OrderedDict()
+                li = line.rstrip().split("\t")
+                field['seqid'] = li[0]
+                if field['seqid'] not in seq_regions:
+                    end_region = sizes_dict[field['seqid']]
+                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                    seq_regions[field['seqid']] = end_region
+                field['source'] = li[3]
+                field['type'] = 'junction'
+                # The first base in a chromosome is numbered 0 in BED format
+                field['start'] = int(li[1]) + 1
+                field['end'] = li[2]
+                field['score'] = li[12]
+                field['strand'] = li[5]
+                field['phase'] = '.'
+                attribute['ID'] = li[3]
+                attribute['Name'] = li[3]
+                attribute['blockcount'] = li[9]
+                attribute['blocksizes'] = li[10]
+                attribute['chromstarts'] = li[11]
+                utils.write_features(field, attribute, gff3)
+                utils.child_blocks(field, attribute, gff3)
+        gff3.close()
+        
\ No newline at end of file
diff -r 000000000000 -r 804a93e87cc8 blastxmlToGff3.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blastxmlToGff3.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+
+
+from Bio.Blast import NCBIXML
+from collections import OrderedDict
+import utils
+
+
+def align2cigar(hsp_query, hsp_reference):
+    """
+        Build CIGAR representation from an hsp_query
+        input:
+            hsp_query
+            hsp_sbjct
+        output:
+            CIGAR string
+    """
+    query = hsp_query
+    ref = hsp_reference
+    # preType, curType:
+    # 'M' represents match,
+    # 'I' represents insert a gap into the reference sequence,
+    # 'D' represents insert a gap into the target (delete from reference)
+    # some ideas of this algin2cigar function are coming from
+    # https://gist.github.com/ozagordi/099bdb796507da8d9426
+    prevType = 'M'
+    curType = 'M'
+    count = 0
+    cigar = []
+    num = len(query)
+    for i in range(num):
+        if query[i] == '-':
+            curType = 'D'
+        elif ref[i] == '-':
+            curType = 'I'
+        else:
+            curType = 'M'
+        if curType == prevType:
+            count += 1
+        else:
+            cigar.append('%s%d' % (prevType, count))
+            prevType = curType
+            count = 1
+    cigar.append('%s%d' % (curType, count))
+    return ' '.join(cigar)
+
+def gff3_writer(blast_records, gff3_file):
+    gff3 = open(gff3_file, 'a')
+    gff3.write("##gff-version 3\n")
+    seq_regions = dict()
+    for blast_record in blast_records:
+        query_name = blast_record.query.split(" ")[0]
+        source = blast_record.application
+        method = blast_record.matrix
+        for alignment in blast_record.alignments:
+            group = {
+            "parent_field" : OrderedDict(),
+            "parent_attribute" : OrderedDict(),
+            "alignments" : []
+            }
+            title = alignment.title.split(" ")
+            contig_name = title[len(title) - 1]
+            length = alignment.length
+            group['parent_field']['seqid'] = contig_name
+            group['parent_field']['source'] = source
+            group['parent_field']['type'] = 'match'
+            group['parent_attribute']['ID'] = contig_name + '_' + query_name
+            group['parent_attribute']['method'] = method
+            group['parent_attribute']['length'] = length
+            if contig_name not in seq_regions:
+                gff3.write("##sequence-region " + contig_name + ' 1 ' + str(length) + '\n')
+                seq_regions[contig_name] = length
+            match_num = 0
+            coords = [length, 0]
+            for hsp in alignment.hsps:
+                hsp_align = {}
+                field = OrderedDict()
+                attribute = OrderedDict()
+                ref = hsp.sbjct
+                query = hsp.query
+                field['seqid'] = contig_name
+                field['source'] = source
+                field['type'] = 'match_part'
+                
+                field['start'] = hsp.sbjct_start
+                if field['start'] < coords[0]:
+                    coords[0] = field['start']
+                ref_length = len(ref.replace('-', ''))
+                # if run tblastn, the actual length of reference should be multiplied by 3
+                if source.lower() == "tblastn":
+                    ref_length *= 3
+                field['end'] = field['start'] + ref_length - 1
+                if field['end'] > coords[1]:
+                    coords[1] = field['end']
+                field['score'] = hsp.score
+                #decide if the alignment in the same strand or reverse strand
+                #reading frame
+                # (+, +), (0, 0), (-, -) => +
+                # (+, -), (-, +) => -
+                if hsp.frame[1] * hsp.frame[0] > 0:
+                    field['strand'] = '+'
+                elif hsp.frame[1] * hsp.frame[0] < 0:
+                    field['strand'] = '-'
+                else:
+                    if hsp.frame[0] + hsp.frame[1] >= 0:
+                        field['strand'] = '+'
+                    else:
+                        field['strand'] = '-'
+                field['phase'] = '.'
+
+                target_start = hsp.query_start
+                target_len = len(query.replace('-', ''))
+                # if run blastx, the actual length of query should be multiplied by 3
+                if source.lower() == "blastx":
+                    target_len *= 3
+                target_end = target_start + target_len -1
+                attribute['ID'] = group['parent_attribute']['ID'] + '_match_' + str(match_num)
+                attribute['Parent'] = group['parent_attribute']['ID']
+                attribute['Target'] = query_name + " " + str(target_start) + " " + str(target_end)
+                attribute['Gap'] = align2cigar(query, ref)
+                #store the query sequence and match string in the file in order to display alignment with BlastAlignment plugin
+                attribute['subject'] = hsp.sbjct
+                attribute['query'] = hsp.query
+                attribute['match'] = hsp.match
+                attribute['gaps'] = attribute['match'].count(' ')
+                similar = attribute['match'].count('+')
+                attribute['identities'] = len(attribute['match']) - similar - attribute['gaps']
+                attribute['positives'] = attribute['identities'] + similar
+                attribute['expect'] = hsp.expect
+                # show reading frame attribute only if the frame is not (0, 0)
+                attribute['frame'] = hsp.frame[1]
+                match_num += 1
+                hsp_align['field'] = field
+                hsp_align['attribute'] = attribute
+                group['alignments'].append(hsp_align)
+            group['parent_field']['start'] = coords[0]
+            group['parent_field']['end'] = coords[1]
+            group['parent_field']['score'] = group['parent_field']['strand'] = group['parent_field']['phase'] = '.'
+            group['parent_attribute']['match_num'] = match_num
+            group['alignments'].sort(key=lambda x: (x['field']['start'], x['field']['end']))
+            utils.write_features(group['parent_field'], group['parent_attribute'], gff3)
+            prev_end = -1
+            for align in group['alignments']:
+                overlap = ''
+                if align['field']['start'] <= prev_end:
+                    overlap += str(align['field']['start']) + ',' + str(prev_end)
+                prev_end = align['field']['end']
+                align['attribute']['overlap'] = overlap
+                utils.write_features(align['field'], align['attribute'], gff3)
+    gff3.close()
+
+def blastxml2gff3(xml_file, gff3_file):
+    result_handle = open(xml_file)
+    blast_records = NCBIXML.parse(result_handle)
+    gff3_writer(blast_records, gff3_file)
+
+if __name__ == "__main__":
+    blastxml2gff3("../dbia3/raw/tblastn_dmel-hits-translation-r6.11.fa_vs_nucleotide_BLAST_database_from_data_3.blastxml", "gff3.txt")
+
diff -r 000000000000 -r 804a93e87cc8 jbrowse_hub.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jbrowse_hub.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+
+import sys
+import argparse
+import json
+import utils
+import trackObject
+import TrackHub
+
+
+
+def main(argv):
+    parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.')
+
+    # Reference genome mandatory
+    parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)')
+
+    # Genome name
+    parser.add_argument('-g', '--genome_name', help='Name of reference genome')
+
+    # Output folder
+    parser.add_argument('-o', '--out', help='output html')
+
+    # Output folder
+    parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder')
+
+    #Tool Directory
+    parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools')
+
+    #GFF3
+    parser.add_argument('--gff3', action='append', help='GFF3 format')
+
+    # GFF3 structure: gene->transcription->CDS
+    parser.add_argument('--gff3_transcript', action='append', help='GFF3 format for gene prediction, structure: gene->transcription->CDS')
+
+    # GFF3 structure: gene->mRNA->CDS
+    parser.add_argument('--gff3_mrna', action='append', help='GFF3 format for gene prediction, structure: gene->mRNA->CDS')
+
+    # generic BED 
+    parser.add_argument('--bed', action='append', help='BED format')
+
+    # trfBig simple repeats (BED 4+12)
+    parser.add_argument('--bedSimpleRepeats', action='append', help='BED 4+12 format, using simpleRepeats.as')
+
+    # regtools (BED 12+1)
+    parser.add_argument('--bedSpliceJunctions', action='append', help='BED 12+1 format, using spliceJunctions.as')
+
+    # tblastn alignment (blastxml)
+    parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn')
+
+    # BAM format
+    parser.add_argument('--bam', action='append', help='BAM format from HISAT')
+
+    # BIGWIG format
+    parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage')
+
+    # GTF format
+    parser.add_argument('--gtf', action='append', help='GTF format from StringTie')
+
+    # Metadata json format
+    parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
+
+    #JBrowse host
+    parser.add_argument('--jbrowse_host', help="JBrowse Host")
+
+    args = parser.parse_args()
+    all_datatype_dictionary = dict()
+    
+
+    if not args.fasta:
+        parser.print_help()
+        raise RuntimeError("No reference genome\n")
+    reference = args.fasta
+    genome = 'unknown'
+    out_path = 'unknown.html'
+    extra_files_path = '.'
+    tool_directory = '.'
+    jbrowse_host = ''
+    if args.jbrowse_host:
+        jbrowse_host = args.jbrowse_host
+    if args.genome_name:
+        genome = args.genome_name
+    if args.out:
+        out_path = args.out
+    if args.extra_files_path:
+        extra_files_path = args.extra_files_path
+
+    #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies
+    if args.tool_directory:
+        tool_directory = args.tool_directory
+
+    #Calculate chromsome sizes using genome reference and uscs tools
+    chrom_size = utils.getChromSizes(reference, tool_directory)
+
+    #get metadata from json file
+    json_inputs_data = args.data_json
+    if json_inputs_data:
+        inputs_data = json.loads(json_inputs_data)
+    else:
+        inputs_data = {}
+    
+    #print inputs_data
+
+    #Initate trackObject
+    all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) 
+    
+    array_inputs_bam = args.bam
+    array_inputs_bed = args.bed
+    array_inputs_bed_simple_repeats = args.bedSimpleRepeats
+    array_inputs_bed_splice_junctions = args.bedSpliceJunctions
+    array_inputs_bigwig = args.bigwig
+    array_inputs_gff3 = args.gff3
+    array_inputs_gff3_transcript = args.gff3_transcript
+    array_inputs_gff3_mrna = args.gff3_mrna
+    array_inputs_gtf = args.gtf
+    array_inputs_blastxml = args.blastxml
+
+    if array_inputs_bam:
+        all_datatype_dictionary['bam'] = array_inputs_bam
+    if array_inputs_bed:
+        all_datatype_dictionary['bed'] = array_inputs_bed
+    if array_inputs_bed_simple_repeats:
+        all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats
+    if array_inputs_bed_splice_junctions:
+        all_datatype_dictionary['bedSpliceJunctions'] = array_inputs_bed_splice_junctions
+    if array_inputs_bigwig:
+        all_datatype_dictionary['bigwig'] = array_inputs_bigwig
+    if array_inputs_gff3:
+        all_datatype_dictionary['gff3'] = array_inputs_gff3
+    if array_inputs_gff3_transcript:
+        all_datatype_dictionary['gff3_transcript'] = array_inputs_gff3_transcript
+    if array_inputs_gff3_mrna:
+        all_datatype_dictionary['gff3_mrna'] = array_inputs_gff3_mrna
+    if array_inputs_gtf:
+        all_datatype_dictionary['gtf'] = array_inputs_gtf
+    if array_inputs_blastxml:
+        all_datatype_dictionary['blastxml'] = array_inputs_blastxml
+    
+    print "input tracks: \n", all_datatype_dictionary
+
+    for datatype, inputfiles in all_datatype_dictionary.items():
+        try:
+            if not inputfiles:
+                raise ValueError('empty input, must provide track files!\n')
+        except IOError:
+            print 'Cannot open', datatype
+        else:
+            for f in inputfiles:
+                #metadata = {}
+                #print f
+                #if f in inputs_data.keys():
+                   # metadata = inputs_data[f]
+                    #print metadata
+                #Convert tracks into gff3 format
+                all_tracks.addToRaw(f, datatype)
+
+    jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path, inputs_data, jbrowse_host)
+    jbrowseHub.createHub()
+
+"""        
+def extractMetadata(array_inputs, inputs_data):
+    metadata_dict = {}
+    for input_false_path in array_inputs:
+        for key, data_value in inputs_data.items():
+            if key == input_false_path:
+                metadata_dict[input_false_path]
+"""
+
+if __name__ == "__main__":
+    main(sys.argv)
+
diff -r 000000000000 -r 804a93e87cc8 jbrowse_hub.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jbrowse_hub.xml	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,293 @@
+
+    
+        This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse
+    
+
+    
+        samtools
+        numpy
+        biopython
+        ucsc_tools_340
+        jbrowse_tools
+    
+
+    
+    
+
+    
+
+    
+        
+        
+        
+        
+            
+            
+                
+                    
+                        
+                        
+                        
+                        
+                        
+                        
+                    
+
+                    
+                        
+                        
+                         
+                    
+                        
+                            
+                                
+                                
+                                
+                            
+                            
+                                
+                            
+                            
+                                
+                            
+                            
+                                
+                            
+                        
+                        
+                        
+                            
+                                
+                                    
+                                
+                            
+                        
+                    
+                    
+                        
+                        
+                    
+                    
+                        
+                        
+                        
+                            
+                                
+                                    
+                                
+                            
+                        
+                        
+                            
+                                
+                                    
+                                
+                            
+                        
+                    
+                    
+                        
+                            
+                                
+                                
+                                
+                            
+                            
+                                
+                            
+                            
+                                
+                            
+                            
+                                
+                            
+                        
+                        
+                        
+                            
+                                
+                                    
+                                
+                            
+                        
+                    
+                    
+                        
+                        
+                        
+                            
+                                
+                                    
+                                
+                            
+                        
+                      
+                
+            
+        
+    
+
+    
+        
+    
+    
+        
+            
+            
+            
+                
+                
+                    
+                        
+                        
+                    
+                
+            
+            
+        
+    
+    
+        This Galaxy tool will create a tar file which including raw datasets and json datasets that can be used for
+        JBrowse visualization.
+    
+    
+    
+
\ No newline at end of file
diff -r 000000000000 -r 804a93e87cc8 tool_dependencies.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,94 @@
+
+
+    
+        
+    
+
+    
+        
+    
+
+    
+        
+This package is based on package_biopython_1_67 owned by biopython. 
+https://toolshed.g2.bx.psu.edu/repository?user_id=fd5c6d0f82f315d8
+
+This Galaxy Tool Shed package installs Biopython from source, having
+first installed NumPy which is a build time depencency. This requires
+and assumes a standard C compiler is already installed, along with
+the Python header files.
+
+Development of this dependency definition is being done here on GitHub:
+https://github.com/biopython/galaxy_packages
+
+The PYTHONPATH for biopython can be accessed through PYTHONPATH_BIOPYTHON.
+        
+         
+            
+                http://biopython.org/DIST/biopython-1.68.tar.gz
+                
+                    
+                        
+                    
+                
+                $INSTALL_DIR/lib/python
+                
+                    export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python && 
+                    export PATH=$PATH:$PATH_NUMPY && 
+                    export PYTHONPATH=$PYTHONPATH:$PYTHONPATH_NUMPY && 
+                    python setup.py install --install-lib $INSTALL_DIR/lib/python
+                
+                
+                    $INSTALL_DIR/lib/python
+                    $ENV[PYTHONPATH_NUMPY]
+                    $ENV[PATH_NUMPY]
+                    $INSTALL_DIR/lib/python
+                
+            
+        
+    
+    
+     
+        
+            
+                
+                    http://old-gep.wustl.edu/~galaxy/ucsc_tools_340.tar.gz
+                    
+                        .                       
+                        $INSTALL_DIR/bin
+                        
+                
+                
+                    $INSTALL_DIR/bin
+                
+            
+         
+         The well known UCSC tools from Jim Kent.
+    
+    
+        
+            
+                
+                    http://jbrowse.org/wordpress/wp-content/plugins/download-monitor/download.php?id=105
+                    $INSTALL_DIR/jbrowse
+                    
+                        ./setup.sh
+                    
+                    
+                        .                       
+                        $INSTALL_DIR/jbrowse
+                        
+                    
+                        $INSTALL_DIR/jbrowse
+                        $INSTALL_DIR/jbrowse/bin
+                        $INSTALL_DIR/jbrowse/src
+                        $INSTALL_DIR/jbrowse/extlib
+                    
+                
+            
+         
+         The perl scripts for converting flat files to json.
+    
+
+    
+
diff -r 000000000000 -r 804a93e87cc8 trackObject.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trackObject.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import os
+import shutil
+import utils
+import bedToGff3
+import blastxmlToGff3
+
+
+class trackObject:
+    def __init__(self, chrom_size, genome, extra_files_path):
+        self.chrom_size = chrom_size
+        outputDirect = os.path.join(extra_files_path, genome)
+        self.raw_folder = os.path.join(outputDirect, 'raw')
+        #Store metadata of the tracks
+        self.tracks = []
+        try:
+            if os.path.exists(self.raw_folder):
+                if os.path.isdir(self.raw_folder):
+                    shutil.rmtree(self.raw_folder)
+                else:
+                    os.remove(self.raw_folder)
+            os.makedirs(self.raw_folder)
+        except OSError as oserror:
+            print "Cannot create raw folder error({0}): {1}".format(oserror.errno, oserror.strerror)
+
+    def addToRaw(self, dataFile, dataType):
+        """
+        Convert gff3, BED, blastxml and gtf files into gff3 files 
+        and store converted files in folder 'raw'
+        """
+        false_path = os.path.abspath(dataFile)
+        fileName = os.path.basename(dataFile)
+        des_path = os.path.join(self.raw_folder, fileName)
+        track = {}
+        if dataType == 'bed' or dataType == 'gff3' or dataType == 'gff3_mrna' or dataType == 'gff3_transcript' or dataType == 'fasta' or dataType == 'bam' or dataType == 'bigwig':
+            if dataType == 'bam':
+                # JBrowse will raise error: not a BAM file if the filename hasn't .bam extension
+                extension = os.path.splitext(fileName)[1]
+                if extension != '.bam':
+                    fileName = fileName + '.bam'
+                des_path = os.path.join(self.raw_folder, fileName)
+                bam_index = utils.createBamIndex(dataFile)
+                indexname = os.path.basename(bam_index)
+                des_path_for_index = os.path.join(self.raw_folder, indexname)
+                shutil.copyfile(bam_index, des_path_for_index)  
+                track['index'] = indexname
+
+            try:
+                shutil.copyfile(dataFile, des_path)
+            except shutil.Error as err1:
+                print "Cannot move file, error({0}: {1})".format(err1.errno, err1.strerror)
+            except IOError as err2:
+                print "Cannot move file, error({0}: {1})".format(err2.errno, err2.strerror)
+        elif dataType == 'bedSimpleRepeats':
+            bedToGff3.bedToGff3(dataFile, self.chrom_size, 'trfbig', des_path)
+        elif dataType == 'bedSpliceJunctions':
+            bedToGff3.bedToGff3(dataFile, self.chrom_size, 'regtools', des_path)
+        elif dataType == 'blastxml':
+            blastxmlToGff3.blastxml2gff3(dataFile, des_path)
+        elif dataType == 'gtf':
+            utils.gtfToGff3(dataFile, des_path, self.chrom_size)
+        track['fileName'] = fileName
+        track['dataType'] = dataType
+        track['false_path'] = false_path
+        #self.SetMetadata(track, metaData)
+        self.tracks.append(track)
+
+    
\ No newline at end of file
diff -r 000000000000 -r 804a93e87cc8 utils.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+
+"""
+This file include common used functions for converting file format to gff3
+"""
+from collections import OrderedDict
+import json
+import subprocess
+import os
+import tempfile
+import string
+
+def write_features(field, attribute, gff3):
+    """
+    The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)
+    field, attribute are ordered dictionary 
+    gff3 is the file handler
+    """
+    attr = []
+    for v in field.values():
+        gff3.write(str(v) + '\t')
+    for k, v in attribute.items():
+        s = str(k) + '=' + str(v)
+        attr.append(s)
+    gff3.write(';'.join(attr))
+    gff3.write('\n')
+
+def getChromSizes(reference, tool_dir):
+    #TODO: find a better way instead of shipping the two exec files with the tool
+    faToTwoBit = os.path.join(tool_dir, 'faToTwoBit')
+    twoBitInfo = os.path.join(tool_dir, 'twoBitInfo')
+    try:
+        twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
+        chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0, suffix='.chrom.sizes', delete=False)
+    except IOError as err:
+        print "Cannot create tempfile err({0}): {1}".format(err.errno, err.strerror)
+    try:
+        subprocess.call(['faToTwoBit', reference, twoBitFile.name])
+    except OSError as err:
+        print "Cannot generate twoBitFile from faToTwoBit err({0}): {1}".format(err.errno, err.strerror)
+    try:
+        subprocess.call(['twoBitInfo', twoBitFile.name, chrom_sizes.name])
+    except OSError as err:
+        print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror)
+    return chrom_sizes
+
+def sequence_region(chrom_sizes):
+    """
+    This function read from a chromatin size file generated by twoBitInfo and write the information to dict
+    return a dict
+    """
+    f = open(chrom_sizes, 'r')
+    sizes = f.readlines()
+    sizes_dict = {}
+    for line in sizes:
+        chrom_info = line.rstrip().split('\t')
+        sizes_dict[chrom_info[0]] = chrom_info[1]
+    return sizes_dict
+
+def child_blocks(parent_field, parent_attr, gff3):
+    num = 0
+    blockcount = int(parent_attr['blockcount'])
+    chromstart = parent_attr['chromstarts'].split(',')
+    blocksize = parent_attr['blocksizes'].split(',')
+    while num < blockcount:
+        child_attr = OrderedDict()
+        child_field = parent_field
+        child_field['type'] = 'exon_junction'
+        child_field['start'] = int(chromstart[num]) + int(parent_field['start'])
+        child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1
+        child_attr['ID'] = parent_attr['ID'] + '_exon_' + str(num+1)
+        child_attr['Parent'] = parent_attr['ID']
+        write_features(child_field, child_attr, gff3)
+        num = num + 1
+
+def add_tracks_to_json(trackList_json, new_tracks, modify_type):
+    """
+    Add to track configuration (trackList.json)
+    # modify_type =  'add_tracks': add a new track like bam or bigwig, new_track = dict()
+    # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict())
+    """
+    with open(trackList_json, 'r+') as f:
+        data = json.load(f)
+        if modify_type == 'add_tracks':
+            data['tracks'].append(new_tracks)
+        elif modify_type == 'add_attr':
+            for k in new_tracks:
+                for track in data['tracks']:
+                    if k.lower() in track['urlTemplate'].lower():
+                        attr = new_tracks[k]
+                        for k, v in attr.items():
+                            track[k] = v
+        f.seek(0, 0)
+        f.write(json.dumps(data, separators=(',' , ':'), indent=4))
+        f.truncate()
+        f.close()
+
+def gtfToGff3(gtf_file, gff3_file, chrom_sizes):
+    """
+    Covert gtf file output from StringTie to gff3 format
+    """
+    gff3 = open(gff3_file, 'w')
+    gff3.write("##gff-version 3\n")
+    sizes_dict = sequence_region(chrom_sizes)
+    seq_regions = dict()
+    parents = dict()
+    with open(gtf_file, 'r') as gtf:
+        for line in gtf:
+            if line.startswith('#'):
+                continue
+            field = OrderedDict()
+            attribute = OrderedDict()
+            li = line.rstrip().split("\t")
+            #print li
+            field['seqid'] = li[0]
+            #print field['seqid']
+            if field['seqid'] not in seq_regions:
+                end_region = sizes_dict[field['seqid']]
+                gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                seq_regions[field['seqid']] = end_region
+            field['source'] = li[1]
+            field['type'] = li[2]
+                # The first base in a chromosome is numbered 0 in BED format
+            field['start'] = li[3]
+            field['end'] = li[4]
+            field['score'] = li[5]
+            field['strand'] = li[6]
+            field['phase'] = li[7]
+            attr_li = li[8].split(';')
+            gene_id = attr_li[0].split()[1].strip('"')
+            attribute['ID'] = gene_id + '_' + field['type'] + '_' + str(field['start']) + '_' + str(field['end'])
+            if field['type'] == 'transcript':
+                parents[gene_id] = attribute['ID']
+                attribute['transcript_id'] = attr_li[1].split()[1].strip('"')
+                attribute['coverage'] = attr_li[2].split()[1].strip('"')
+                attribute['fpkm'] = attr_li[3].split()[1].strip('"')
+                attribute['tpm'] = attr_li[4].split()[1].strip('"')
+            elif field['type'] == 'exon':
+                attribute['Parent'] = parents[gene_id]
+                attribute['transcript_id'] = attr_li[1].split()[1].strip('"')
+                attribute['coverage'] = attr_li[3].split()[1].strip('"')
+            write_features(field, attribute, gff3)
+    gff3.close()
+
+
+def sanitize_name(input_name):
+    """
+    Galaxy will name all the files and dirs as *.dat, 
+    the function can replace '.' to '_' for the dirs
+    """
+    validChars = "_-%s%s" % (string.ascii_letters, string.digits)
+    sanitized_name = ''.join([c if c in validChars else '_' for c in input_name])
+    return "gonramp_" + sanitized_name
+
+def createBamIndex(bamfile):
+    subprocess.call(['samtools', 'index', bamfile])
+    filename = bamfile + '.bai'
+    if os.path.exists(filename):
+        return filename
+    else:
+        raise ValueError('Did not find bai file')