annotate bedToGff3.py @ 5:e762f4b9e4bd draft

planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit a3a8cdf5c9b100db56b2534e8718b5fc976435ff
author yating-l
date Fri, 07 Jul 2017 17:21:23 -0400
parents 7e471cdd9e71
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
1 #!/usr/bin/env python
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
2
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
3 '''
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
4 Convert BED format to gff3
4
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
5 reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
0
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
6 '''
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
7 import os
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
8 from collections import OrderedDict
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
9 import utils
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
10
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
11 class bedToGff3():
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
12 def __init__(self, inputBedFile, chrom_sizes, bed_type, output):
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
13 self.input = inputBedFile
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
14 #file_dir = os.path.basename(inputBedFile)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
15 #print file_dir + "\n\n"
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
16 self.output = output
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
17 self.chrom_sizes = chrom_sizes
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
18 self.type = bed_type
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
19 if self.type == "trfbig":
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
20 self.trfbig_to_gff3()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
21 if self.type == "regtools":
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
22 self.splicejunctions_to_gff3()
4
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
23 if self.type == "blat":
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
24 self.bigpsl_to_gff3()
0
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
25
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
26 def trfbig_to_gff3(self):
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
27 gff3 = open(self.output, 'w')
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
28 gff3.write("##gff-version 3\n")
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
29 sizes_dict = utils.sequence_region(self.chrom_sizes)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
30 seq_regions = dict()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
31 with open(self.input, 'r') as bed:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
32 for line in bed:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
33 field = OrderedDict()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
34 attribute = OrderedDict()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
35 li = line.rstrip().split("\t")
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
36 field['seqid'] = li[0]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
37 if field['seqid'] not in seq_regions:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
38 end_region = sizes_dict[field['seqid']]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
39 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
40 seq_regions[field['seqid']] = end_region
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
41 field['source'] = li[3]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
42 field['type'] = 'tandem_repeat'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
43 # The first base in a chromosome is numbered 0 in BED format
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
44 field['start'] = str(int(li[1]) + 1)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
45 field['end'] = li[2]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
46 field['score'] = li[9]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
47 field['strand'] = '+'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
48 field['phase'] = '.'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
49 attribute['length of repeat unit'] = li[4]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
50 attribute['mean number of copies of repeat'] = li[5]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
51 attribute['length of consensus sequence'] = li[6]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
52 attribute['percentage match'] = li[7]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
53 attribute['percentage indel'] = li[8]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
54 attribute['percent of a\'s in repeat unit'] = li[10]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
55 attribute['percent of c\'s in repeat unit'] = li[11]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
56 attribute['percent of g\'s in repeat unit'] = li[12]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
57 attribute['percent of t\'s in repeat unit'] = li[13]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
58 attribute['entropy'] = li[14]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
59 attribute['sequence of repeat unit element'] = li[15]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
60 utils.write_features(field, attribute, gff3)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
61 gff3.close()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
62
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
63
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
64 def splicejunctions_to_gff3(self):
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
65 gff3 = open(self.output, 'w')
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
66 gff3.write("##gff-version 3\n")
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
67 sizes_dict = utils.sequence_region(self.chrom_sizes)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
68 seq_regions = dict()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
69 with open(self.input, 'r') as bed:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
70 for line in bed:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
71 field = OrderedDict()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
72 attribute = OrderedDict()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
73 li = line.rstrip().split("\t")
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
74 field['seqid'] = li[0]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
75 if field['seqid'] not in seq_regions:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
76 end_region = sizes_dict[field['seqid']]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
77 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
78 seq_regions[field['seqid']] = end_region
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
79 field['source'] = li[3]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
80 field['type'] = 'junction'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
81 # The first base in a chromosome is numbered 0 in BED format
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
82 field['start'] = int(li[1]) + 1
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
83 field['end'] = li[2]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
84 field['score'] = li[12]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
85 field['strand'] = li[5]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
86 field['phase'] = '.'
4
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
87 attribute['ID'] = li[0] + '_' + li[3]
0
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
88 attribute['Name'] = li[3]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
89 attribute['blockcount'] = li[9]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
90 attribute['blocksizes'] = li[10]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
91 attribute['chromstarts'] = li[11]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
92 utils.write_features(field, attribute, gff3)
4
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
93 utils.child_blocks(field, attribute, gff3, 'exon_junction')
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
94 gff3.close()
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
95
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
96 def bigpsl_to_gff3(self):
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
97 gff3 = open(self.output, 'w')
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
98 gff3.write("##gff-version 3\n")
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
99 sizes_dict = utils.sequence_region(self.chrom_sizes)
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
100 seq_regions = dict()
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
101 with open(self.input, 'r') as bed:
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
102 for line in bed:
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
103 field = OrderedDict()
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
104 attribute = OrderedDict()
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
105 li = line.rstrip().split("\t")
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
106 field['seqid'] = li[0]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
107 if field['seqid'] not in seq_regions:
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
108 end_region = sizes_dict[field['seqid']]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
109 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
110 seq_regions[field['seqid']] = end_region
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
111 field['source'] = 'UCSC BLAT alignment tool'
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
112 field['type'] = 'match'
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
113 # The first base in a chromosome is numbered 0 in BED format
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
114 field['start'] = str(int(li[1]) + 1)
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
115 field['end'] = li[2]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
116 field['score'] = li[4]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
117 field['strand'] = li[5]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
118 field['phase'] = '.'
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
119 attribute['ID'] = li[0] + '_' + li[3]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
120 attribute['Name'] = li[3]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
121 attribute['blockcount'] = li[9]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
122 attribute['blocksizes'] = li[10]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
123 attribute['chromstarts'] = li[11]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
124 attribute['ochrom_start'] = li[12]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
125 attribute['ochrom_end'] = li[13]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
126 attribute['ochrom_strand'] = li[14]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
127 attribute['ochrom_size'] = li[15]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
128 attribute['ochrom_starts'] = li[16]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
129 attribute['sequence on other chromosome'] = li[17]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
130 attribute['cds in ncbi format'] = li[18]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
131 attribute['size of target chromosome'] = li[19]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
132 attribute['number of bases matched'] = li[20]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
133 attribute['number of bases that don\'t match'] = li[21]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
134 attribute['number of bases that match but are part of repeats'] = li[22]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
135 attribute['number of \'N\' bases'] = li[23]
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
136 utils.write_features(field, attribute, gff3)
7e471cdd9e71 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents: 0
diff changeset
137 utils.child_blocks(field, attribute, gff3, 'match_part')
0
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
138 gff3.close()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
139