Mercurial > repos > yating-l > jbrowsearchivecreator
changeset 4:7e471cdd9e71 draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
author | yating-l |
---|---|
date | Fri, 07 Jul 2017 16:17:57 -0400 |
parents | eda851e52060 |
children | e762f4b9e4bd |
files | TrackHub.py TrackHub.pyc bedToGff3.py bedToGff3.pyc datatypes_conf.xml jbrowse_hub.py jbrowse_hub.xml trackObject.py trackObject.pyc utils.py utils.pyc |
diffstat | 11 files changed, 92 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/TrackHub.py Wed May 31 15:45:47 2017 -0400 +++ b/TrackHub.py Fri Jul 07 16:17:57 2017 -0400 @@ -13,7 +13,7 @@ self.input_files = inputFiles.tracks self.outfile = outputDirect self.outfolder = extra_files_path - self.out_path = os.path.join(extra_files_path, genome) + self.out_path = os.path.join(extra_files_path, 'myHub') self.reference = reference self.tool_dir = tool_dir self.metaData = metaData @@ -153,6 +153,8 @@ metadata['category'] = "Default group" if track['dataType'] == 'blastxml': metadata['type'] = "G-OnRamp_plugin/BlastAlignment" + elif track['dataType'] == 'bigpsl': + metadata['type'] = "G-OnRamp_plugin/BlatAlignment" elif track['dataType'] == 'gff3_transcript' or track['dataType'] == 'gff3_mrna': metadata['type'] = "G-OnRamp_plugin/GenePred" else:
--- a/bedToGff3.py Wed May 31 15:45:47 2017 -0400 +++ b/bedToGff3.py Fri Jul 07 16:17:57 2017 -0400 @@ -2,6 +2,7 @@ ''' Convert BED format to gff3 +reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md ''' import os from collections import OrderedDict @@ -19,6 +20,8 @@ self.trfbig_to_gff3() if self.type == "regtools": self.splicejunctions_to_gff3() + if self.type == "blat": + self.bigpsl_to_gff3() def trfbig_to_gff3(self): gff3 = open(self.output, 'w') @@ -81,12 +84,56 @@ field['score'] = li[12] field['strand'] = li[5] field['phase'] = '.' - attribute['ID'] = li[3] + attribute['ID'] = li[0] + '_' + li[3] attribute['Name'] = li[3] attribute['blockcount'] = li[9] attribute['blocksizes'] = li[10] attribute['chromstarts'] = li[11] utils.write_features(field, attribute, gff3) - utils.child_blocks(field, attribute, gff3) + utils.child_blocks(field, attribute, gff3, 'exon_junction') + gff3.close() + + def bigpsl_to_gff3(self): + gff3 = open(self.output, 'w') + gff3.write("##gff-version 3\n") + sizes_dict = utils.sequence_region(self.chrom_sizes) + seq_regions = dict() + with open(self.input, 'r') as bed: + for line in bed: + field = OrderedDict() + attribute = OrderedDict() + li = line.rstrip().split("\t") + field['seqid'] = li[0] + if field['seqid'] not in seq_regions: + end_region = sizes_dict[field['seqid']] + gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n') + seq_regions[field['seqid']] = end_region + field['source'] = 'UCSC BLAT alignment tool' + field['type'] = 'match' + # The first base in a chromosome is numbered 0 in BED format + field['start'] = str(int(li[1]) + 1) + field['end'] = li[2] + field['score'] = li[4] + field['strand'] = li[5] + field['phase'] = '.' + attribute['ID'] = li[0] + '_' + li[3] + attribute['Name'] = li[3] + attribute['blockcount'] = li[9] + attribute['blocksizes'] = li[10] + attribute['chromstarts'] = li[11] + attribute['ochrom_start'] = li[12] + attribute['ochrom_end'] = li[13] + attribute['ochrom_strand'] = li[14] + attribute['ochrom_size'] = li[15] + attribute['ochrom_starts'] = li[16] + attribute['sequence on other chromosome'] = li[17] + attribute['cds in ncbi format'] = li[18] + attribute['size of target chromosome'] = li[19] + attribute['number of bases matched'] = li[20] + attribute['number of bases that don\'t match'] = li[21] + attribute['number of bases that match but are part of repeats'] = li[22] + attribute['number of \'N\' bases'] = li[23] + utils.write_features(field, attribute, gff3) + utils.child_blocks(field, attribute, gff3, 'match_part') gff3.close() \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Fri Jul 07 16:17:57 2017 -0400 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<datatypes> + <registration converters_path="lib/galaxy/datatypes/converters" display_path="display_applications"> + <datatype extension="jbrowsehub" type="galaxy.datatypes.tracks:UCSCTrackHub" display_in_upload="true"> + <display file="jbrowse/jbrowsehub.xml" /> + </datatype> + </registration> +</datatypes> \ No newline at end of file
--- a/jbrowse_hub.py Wed May 31 15:45:47 2017 -0400 +++ b/jbrowse_hub.py Fri Jul 07 16:17:57 2017 -0400 @@ -48,6 +48,9 @@ # tblastn alignment (blastxml) parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn') + # blat alignment (bigpsl 12+12) + parser.add_argument('--bigpsl', action='append', help='bigpsl format from blat alignment') + # BAM format parser.add_argument('--bam', action='append', help='BAM format from HISAT') @@ -114,6 +117,7 @@ array_inputs_gff3_mrna = args.gff3_mrna array_inputs_gtf = args.gtf array_inputs_blastxml = args.blastxml + array_inputs_bigpsl = args.bigpsl if array_inputs_bam: all_datatype_dictionary['bam'] = array_inputs_bam @@ -135,7 +139,8 @@ all_datatype_dictionary['gtf'] = array_inputs_gtf if array_inputs_blastxml: all_datatype_dictionary['blastxml'] = array_inputs_blastxml - + if array_inputs_bigpsl: + all_datatype_dictionary['bigpsl'] = array_inputs_bigpsl print "input tracks: \n", all_datatype_dictionary for datatype, inputfiles in all_datatype_dictionary.items():
--- a/jbrowse_hub.xml Wed May 31 15:45:47 2017 -0400 +++ b/jbrowse_hub.xml Fri Jul 07 16:17:57 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="1.0"> +<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="1.0.0"> <description> This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse </description> @@ -80,6 +80,10 @@ --blastxml $f.formatChoice.BlastXML #silent $prepare_json($f.formatChoice.BlastXML, extra_data_dict) #end if + #if $f.formatChoice.format_select == 'bigpsl' + --bigpsl $f.formatChoice.BigPsl + #silent $prepare_json($f.formatChoice.BigPsl, extra_data_dict) + #end if #if $f.formatChoice.format_select == 'gtf' --gtf $f.formatChoice.GTF #set track_color = str($f.formatChoice.track_color) @@ -114,8 +118,9 @@ <param name="format_select" type="select" label="Format"> <option value="bam" selected="true">BAM</option> <option value="bed">BED</option> - <option value="blastxml">BLASTXML</option> - <option value="bigwig">BIGWIG</option> + <option value="blastxml">BlastXML</option> + <option value="bigpsl">BigPsl</option> + <option value="bigwig">BigWig</option> <option value="gff3">GFF3</option> <option value="gtf">GTF</option> </param> @@ -179,6 +184,15 @@ /> <param name="label" type="text" size="30" value="Blast Alignment" label="Track name" /> </when> + <when value="bigpsl"> + <param + format="bigpsl" + name="BigPsl" + type="data" + label="Blat Alignments File" + /> + <param name="label" type="text" size="30" value="Blat Alignment" label="Track name" /> + </when> <when value="bigwig"> <param format="bigwig" @@ -265,7 +279,7 @@ </inputs> <outputs> - <data format="html" name="output" label="${tool.name}" /> + <data format="jbrowsehub" name="output" label="${tool.name}" /> </outputs> <tests> <test>
--- a/trackObject.py Wed May 31 15:45:47 2017 -0400 +++ b/trackObject.py Fri Jul 07 16:17:57 2017 -0400 @@ -10,7 +10,7 @@ class trackObject: def __init__(self, chrom_size, genome, extra_files_path): self.chrom_size = chrom_size - outputDirect = os.path.join(extra_files_path, genome) + outputDirect = os.path.join(extra_files_path, 'myHub') self.raw_folder = os.path.join(outputDirect, 'raw') #Store metadata of the tracks self.tracks = [] @@ -56,6 +56,8 @@ bedToGff3.bedToGff3(dataFile, self.chrom_size, 'trfbig', des_path) elif dataType == 'bedSpliceJunctions': bedToGff3.bedToGff3(dataFile, self.chrom_size, 'regtools', des_path) + elif dataType == 'bigpsl': + bedToGff3.bedToGff3(dataFile, self.chrom_size, 'blat', des_path) elif dataType == 'blastxml': blastxmlToGff3.blastxml2gff3(dataFile, des_path) elif dataType == 'gtf':
--- a/utils.py Wed May 31 15:45:47 2017 -0400 +++ b/utils.py Fri Jul 07 16:17:57 2017 -0400 @@ -57,18 +57,19 @@ sizes_dict[chrom_info[0]] = chrom_info[1] return sizes_dict -def child_blocks(parent_field, parent_attr, gff3): +def child_blocks(parent_field, parent_attr, gff3, child_type): num = 0 blockcount = int(parent_attr['blockcount']) chromstart = parent_attr['chromstarts'].split(',') blocksize = parent_attr['blocksizes'].split(',') + parent_start = parent_field['start'] while num < blockcount: child_attr = OrderedDict() child_field = parent_field - child_field['type'] = 'exon_junction' - child_field['start'] = int(chromstart[num]) + int(parent_field['start']) + child_field['type'] = child_type + child_field['start'] = int(chromstart[num]) + int(parent_start) child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1 - child_attr['ID'] = parent_attr['ID'] + '_exon_' + str(num+1) + child_attr['ID'] = parent_attr['ID'] + '_part_' + str(num+1) child_attr['Parent'] = parent_attr['ID'] write_features(child_field, child_attr, gff3) num = num + 1