Mercurial > repos > yating-l > jbrowsearchivecreator
annotate bedToGff3.py @ 5:e762f4b9e4bd draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit a3a8cdf5c9b100db56b2534e8718b5fc976435ff
author | yating-l |
---|---|
date | Fri, 07 Jul 2017 17:21:23 -0400 |
parents | 7e471cdd9e71 |
children |
rev | line source |
---|---|
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
1 #!/usr/bin/env python |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
2 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
3 ''' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
4 Convert BED format to gff3 |
4
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
5 reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md |
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
6 ''' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
7 import os |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
8 from collections import OrderedDict |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
9 import utils |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
10 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
11 class bedToGff3(): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
12 def __init__(self, inputBedFile, chrom_sizes, bed_type, output): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
13 self.input = inputBedFile |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
14 #file_dir = os.path.basename(inputBedFile) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
15 #print file_dir + "\n\n" |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
16 self.output = output |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
17 self.chrom_sizes = chrom_sizes |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
18 self.type = bed_type |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
19 if self.type == "trfbig": |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
20 self.trfbig_to_gff3() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
21 if self.type == "regtools": |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
22 self.splicejunctions_to_gff3() |
4
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
23 if self.type == "blat": |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
24 self.bigpsl_to_gff3() |
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
25 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
26 def trfbig_to_gff3(self): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
27 gff3 = open(self.output, 'w') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
28 gff3.write("##gff-version 3\n") |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
29 sizes_dict = utils.sequence_region(self.chrom_sizes) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
30 seq_regions = dict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
31 with open(self.input, 'r') as bed: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
32 for line in bed: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
33 field = OrderedDict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
34 attribute = OrderedDict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
35 li = line.rstrip().split("\t") |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
36 field['seqid'] = li[0] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
37 if field['seqid'] not in seq_regions: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
38 end_region = sizes_dict[field['seqid']] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
39 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
40 seq_regions[field['seqid']] = end_region |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
41 field['source'] = li[3] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
42 field['type'] = 'tandem_repeat' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
43 # The first base in a chromosome is numbered 0 in BED format |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
44 field['start'] = str(int(li[1]) + 1) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
45 field['end'] = li[2] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
46 field['score'] = li[9] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
47 field['strand'] = '+' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
48 field['phase'] = '.' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
49 attribute['length of repeat unit'] = li[4] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
50 attribute['mean number of copies of repeat'] = li[5] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
51 attribute['length of consensus sequence'] = li[6] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
52 attribute['percentage match'] = li[7] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
53 attribute['percentage indel'] = li[8] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
54 attribute['percent of a\'s in repeat unit'] = li[10] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
55 attribute['percent of c\'s in repeat unit'] = li[11] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
56 attribute['percent of g\'s in repeat unit'] = li[12] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
57 attribute['percent of t\'s in repeat unit'] = li[13] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
58 attribute['entropy'] = li[14] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
59 attribute['sequence of repeat unit element'] = li[15] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
60 utils.write_features(field, attribute, gff3) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
61 gff3.close() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
62 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
63 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
64 def splicejunctions_to_gff3(self): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
65 gff3 = open(self.output, 'w') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
66 gff3.write("##gff-version 3\n") |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
67 sizes_dict = utils.sequence_region(self.chrom_sizes) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
68 seq_regions = dict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
69 with open(self.input, 'r') as bed: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
70 for line in bed: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
71 field = OrderedDict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
72 attribute = OrderedDict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
73 li = line.rstrip().split("\t") |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
74 field['seqid'] = li[0] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
75 if field['seqid'] not in seq_regions: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
76 end_region = sizes_dict[field['seqid']] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
77 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
78 seq_regions[field['seqid']] = end_region |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
79 field['source'] = li[3] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
80 field['type'] = 'junction' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
81 # The first base in a chromosome is numbered 0 in BED format |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
82 field['start'] = int(li[1]) + 1 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
83 field['end'] = li[2] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
84 field['score'] = li[12] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
85 field['strand'] = li[5] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
86 field['phase'] = '.' |
4
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
87 attribute['ID'] = li[0] + '_' + li[3] |
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
88 attribute['Name'] = li[3] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
89 attribute['blockcount'] = li[9] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
90 attribute['blocksizes'] = li[10] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
91 attribute['chromstarts'] = li[11] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
92 utils.write_features(field, attribute, gff3) |
4
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
93 utils.child_blocks(field, attribute, gff3, 'exon_junction') |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
94 gff3.close() |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
95 |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
96 def bigpsl_to_gff3(self): |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
97 gff3 = open(self.output, 'w') |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
98 gff3.write("##gff-version 3\n") |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
99 sizes_dict = utils.sequence_region(self.chrom_sizes) |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
100 seq_regions = dict() |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
101 with open(self.input, 'r') as bed: |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
102 for line in bed: |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
103 field = OrderedDict() |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
104 attribute = OrderedDict() |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
105 li = line.rstrip().split("\t") |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
106 field['seqid'] = li[0] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
107 if field['seqid'] not in seq_regions: |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
108 end_region = sizes_dict[field['seqid']] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
109 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n') |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
110 seq_regions[field['seqid']] = end_region |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
111 field['source'] = 'UCSC BLAT alignment tool' |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
112 field['type'] = 'match' |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
113 # The first base in a chromosome is numbered 0 in BED format |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
114 field['start'] = str(int(li[1]) + 1) |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
115 field['end'] = li[2] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
116 field['score'] = li[4] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
117 field['strand'] = li[5] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
118 field['phase'] = '.' |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
119 attribute['ID'] = li[0] + '_' + li[3] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
120 attribute['Name'] = li[3] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
121 attribute['blockcount'] = li[9] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
122 attribute['blocksizes'] = li[10] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
123 attribute['chromstarts'] = li[11] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
124 attribute['ochrom_start'] = li[12] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
125 attribute['ochrom_end'] = li[13] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
126 attribute['ochrom_strand'] = li[14] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
127 attribute['ochrom_size'] = li[15] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
128 attribute['ochrom_starts'] = li[16] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
129 attribute['sequence on other chromosome'] = li[17] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
130 attribute['cds in ncbi format'] = li[18] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
131 attribute['size of target chromosome'] = li[19] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
132 attribute['number of bases matched'] = li[20] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
133 attribute['number of bases that don\'t match'] = li[21] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
134 attribute['number of bases that match but are part of repeats'] = li[22] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
135 attribute['number of \'N\' bases'] = li[23] |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
136 utils.write_features(field, attribute, gff3) |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
137 utils.child_blocks(field, attribute, gff3, 'match_part') |
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
138 gff3.close() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
139 |