annotate fml_gff_groomer/scripts/gff_loci_merge.py @ 0:79726c328621 default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author vipints
date Tue, 07 Jun 2011 17:29:24 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
1 #!/usr/bin/env python
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
2 #
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
3 # This program is free software; you can redistribute it and/or modify
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
4 # it under the terms of the GNU General Public License as published by
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
5 # the Free Software Foundation; either version 3 of the License, or
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
6 # (at your option) any later version.
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
7 #
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
8 # Written (W) 2010 Vipin T Sreedharan, Friedrich Miescher Laboratory of the Max Planck Society
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
9 # Copyright (C) 2010 Friedrich Miescher Laboratory of the Max Planck Society
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
10 #
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
11 # Description : to merge same transcripts in single loci and define as an alternative spliced form for the gene.
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
12
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
13 def display_content(final_dict):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
14 """displaying the summary from GFF file"""
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
15
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
16 print "\tUnique combination of Source(s), Feature type(s) and corresponding count:"
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
17 for sftype, cnt in sorted(final_dict['gff_source_type'].items()):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
18 if sftype[1] == 'gene':print '\t' + str(cnt) + '\t' + str(sftype[0]) + ', '+ str(sftype[1])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
19
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
20 def available_limits(gff_file):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
21 """Figure out the available feature types from the given GFF file"""
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
22
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
23 gff_handle = open(gff_file, 'rU')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
24 filter_info = dict(gff_id = [0], gff_source_type = [1, 2],
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
25 gff_source = [1], gff_type = [2])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
26 cur_limits = dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
27 for filter_key in filter_info.keys():
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
28 cur_limits[filter_key] = collections.defaultdict(int)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
29 for line in gff_handle:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
30 if line.strip('\n\r')[0] != "#":
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
31 parts = [p.strip() for p in line.split('\t')]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
32 if len(parts) == 1 and re.search(r'\w+', parts[0]):continue ## GFF files with FASTA sequence together
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
33 assert len(parts) == 9, line
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
34 for filter_key, cur_indexes in filter_info.items():
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
35 cur_id = tuple([parts[i] for i in cur_indexes])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
36 cur_limits[filter_key][cur_id] += 1
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
37 # get rid of the default dicts
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
38 gff_handle.close()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
39 final_dict = dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
40 for key, value_dict in cur_limits.items():
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
41 if len(key) == 1:key = key[0]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
42 final_dict[key] = dict(value_dict)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
43 return final_dict
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
44
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
45 def GFFWriter(merged_info, genes, transcripts, exons, utr5, cds, utr3, out_file):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
46 """Write GFF3 file with merged feature description"""
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
47
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
48 out_fh = open(out_file, 'w')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
49 for ginfo, regions in merged_info.items():
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
50 gene_cnt = 1
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
51 for interval, features in sorted(regions.items()):# master gene feature
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
52 out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tgene\t' + str(interval[0]) + '\t' + str(interval[1]) + '\t.\t' + ginfo[2] + '\t.\tID=Gene_' + ginfo[0] + '_' + str(gene_cnt).zfill(5) + ';Name=Gene_' + ginfo[0] + '_' + str(gene_cnt).zfill(5) + '\n')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
53 for geneid in features:# corresponding transcript info
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
54 if geneid in transcripts:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
55 for tinfo in transcripts[geneid]:# transcript feature line
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
56 out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\t' + tinfo['type'] + '\t' + str(tinfo['start']) + '\t' + str(tinfo['stop']) + '\t.\t' + ginfo[2] + '\t.\tID=' + tinfo['ID']+ ';Parent=Gene_' + ginfo[0] + '_' + str(gene_cnt).zfill(5) + '\n')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
57 if tinfo['ID'] in utr5:# check for 5 prime UTR
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
58 for u5info in utr5[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tfive_prime_UTR\t' + str(u5info['start']) + '\t' + str(u5info['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
59 if tinfo['ID'] in cds:# check for CDS
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
60 for cdsinfo in cds[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tCDS\t' + str(cdsinfo['start']) + '\t' + str(cdsinfo['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
61 if tinfo['ID'] in utr3:# check for 3 prime UTR
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
62 for u3info in utr3[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tthree_prime_UTR\t' + str(u3info['start']) + '\t' + str(u3info['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
63 if tinfo['ID'] in exons:# check for exons
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
64 for exinfo in exons[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\texon\t' + str(exinfo['start']) + '\t' + str(exinfo['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
65 gene_cnt += 1
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
66 out_fh.close()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
67
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
68 def UniqLoci(genes, transcripts, exons):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
69 """determine unique location where features annotated multiple times"""
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
70
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
71 uniq_loci = dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
72 for gid, parts in genes.items():
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
73 gene_info = (parts['chr'], parts['source'], parts['strand'])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
74 if gene_info in uniq_loci:## same contig, orientation, source: look for merging transcripts based on the nearby location
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
75 if (int(parts['start']), int(parts['stop'])) in uniq_loci[gene_info].keys(): ## similar transcripts will catch here (start and stop are same may be exon, CDS or intron content may vary)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
76 uniq_loci[gene_info][(int(parts['start']), int(parts['stop']))].append(gid)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
77 else: # heuristic approach to include closely related region on a single master loci.
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
78 got_a_range = 0
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
79 for floc in uniq_loci[gene_info].keys():# look whether it lies closely to any intervel which is already defined
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
80 if (floc[1]-parts['start']) < 150 or (parts['stop']-floc[0]) < 150:continue ## TODO boundary spanning length in same orientation for genes of each species will be great.
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
81 if floc[0] <= parts['start'] and parts['start'] < floc[1]: # the start of the new candidate is inside of any of the already defined interval ?
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
82 non_coding = 0
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
83 try: # check for small transcript whether they belong to a existing one or a new non-coding candidate.
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
84 if len(transcripts[gid]) == 1:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
85 if len(exons[transcripts[gid][0]['ID']]) == 1:non_coding = 1
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
86 if non_coding == 0:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
87 if parts['stop'] > floc[1]:# making global gene coordinate from individual transcript model
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
88 entries = uniq_loci[gene_info][floc]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
89 del uniq_loci[gene_info][floc] # remove the existing interval, here we got a longer downstream position from the candidate
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
90 entries.append(gid)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
91 uniq_loci[gene_info][(floc[0], parts['stop'])] = entries
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
92 else:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
93 uniq_loci[gene_info][floc].append(gid)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
94 else:# create a new interval for non-coding type entry
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
95 uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
96 got_a_range = 1
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
97 break
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
98 except: # dont have any transcripts or exons defined.
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
99 break
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
100 elif floc[0] < parts['stop'] and parts['stop'] <= floc[1]: # the stop of the new candidate is inside of any of the pre-defined interval ? the candidate seems to be from more upstream
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
101 non_coding = 0
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
102 try:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
103 if len(transcripts[gid]) == 1:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
104 if len(exons[transcripts[gid][0]['ID']]) == 1:non_coding = 1
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
105 if non_coding == 0:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
106 entries = uniq_loci[gene_info][floc]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
107 del uniq_loci[gene_info][floc] # remove the existing interval, here we got a upstream position from which the candidate transcribing
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
108 entries.append(gid)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
109 uniq_loci[gene_info][(int(parts['start']), floc[1])] = entries
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
110 else: # create a new interval for non-coding type entry
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
111 uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
112 got_a_range = 1
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
113 break
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
114 except:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
115 break
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
116 elif floc[0] > parts['start'] and floc[1] < parts['stop']: # whether the whole feature floc region (--) resides in the candidate location (----------) ?
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
117 non_coding = 0 # here the candidate seems to be longer than the pre-defined interval, check all entries from the pre-defined interval whether it is a small region, any chance as non-coding.
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
118 try:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
119 for features in uniq_loci[gene_info][floc]:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
120 if len(transcripts[features]) == 1:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
121 if len(exons[transcripts[features][0]['ID']]) == 1:non_coding = 1
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
122 if non_coding == 1: # create a new interval for non coding
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
123 uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
124 else: # append the existing transcript cluster, here change the interval position based on the candidate location
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
125 entries = uniq_loci[gene_info][floc]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
126 del uniq_loci[gene_info][floc] # remove the existing interval, here we got a longer upstream and downstream region.
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
127 entries.append(gid)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
128 uniq_loci[gene_info][(parts['start'], parts['stop'])] = entries
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
129 got_a_range = 1
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
130 break
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
131 except:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
132 break
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
133 ## or create a new interval ??
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
134 if got_a_range == 0:uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
135 else:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
136 uniq_loci[gene_info] = {(int(parts['start']), int(parts['stop'])): [gid]}
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
137
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
138 return uniq_loci
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
139
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
140 def ParseGFF(gff_file):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
141 """feature extraction from provided GFF file"""
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
142
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
143 gff_handle = open(gff_file, 'rU')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
144 genes, transcripts, exons, utr5, cds, utr3 = dict(), dict(), dict(), dict(), dict(), dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
145 for gff_line in gff_handle:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
146 parts = gff_line.strip('\n\r').split('\t')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
147 if gff_line[0] == '#' or gff_line[0] == '>':continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
148 if len(parts) == 1:continue ## Some centers in the world create GFF files with FASTA sequence together
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
149 if len(parts) != 9:sys.stdout.write('Warning: Found invalid GFF line\n' + gff_line.strip('\n\r') + '\n');continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
150 if parts[3] == '' or parts[4] == '':sys.stdout.write('Warning: Found missing coordinate in GFF line\n' + gff_line.strip('\n\r') + '\n');continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
151 if parts[2] == 'gene':
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
152 gene_info = dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
153 gene_info['start'] = int(parts[3])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
154 gene_info['stop'] = int(parts[4])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
155 gene_info['chr'] = parts[0]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
156 gene_info['source'] = parts[1]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
157 gene_info['strand'] = parts[6]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
158 gid = ''
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
159 for attr in parts[-1].split(';'):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
160 if attr == '':continue ## GFF line may end with a ';' symbol
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
161 attr = attr.split('=')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
162 if attr[0] == 'ID':gid=attr[1];continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
163 gene_info[attr[0]] = attr[1]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
164 if gid != '': genes[gid] = gene_info
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
165 if parts[2] == 'mRNA' or parts[2] == 'transcript' or parts[2] == 'ncRNA' or parts[2] == 'tRNA' or parts[2] == 'snRNA' or parts[2] == 'scRNA' or parts[2] == 'snoRNA' or parts[2] == 'snlRNA' or parts[2] == 'rRNA' or parts[2] == 'miRNA':
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
166 mrna_info = dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
167 mrna_info['start'] = int(parts[3])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
168 mrna_info['stop'] = int(parts[4])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
169 mrna_info['chr'] = parts[0]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
170 mrna_info['strand'] = parts[6]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
171 mrna_info['type'] = parts[2]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
172 gid = ''
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
173 for attr in parts[-1].split(';'):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
174 if attr == '':continue ## GFF line may end with a ';' symbol
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
175 attr = attr.split('=')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
176 if attr[0] == 'Parent':gid=attr[1];continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
177 mrna_info[attr[0]] = attr[1]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
178 if gid in transcripts:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
179 transcripts[gid].append(mrna_info)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
180 else:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
181 transcripts[gid] = [mrna_info]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
182 if parts[2] == 'exon':
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
183 exon_info = dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
184 exon_info['start'] = int(parts[3])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
185 exon_info['stop'] = int(parts[4])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
186 exon_info['chr'] = parts[0]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
187 exon_info['strand'] = parts[6]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
188 tid = ''
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
189 for attr in parts[-1].split(';'):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
190 if attr == '':continue ## GFF line may end with a ';' symbol
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
191 attr = attr.split('=')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
192 if attr[0] == 'Parent':tid=attr[1];continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
193 exon_info[attr[0]] = attr[1]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
194 if tid in exons:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
195 exons[tid].append(exon_info)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
196 else:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
197 exons[tid] = [exon_info]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
198 if parts[2] == 'five_prime_UTR':
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
199 utr5_info = dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
200 utr5_info['start'] = int(parts[3])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
201 utr5_info['stop'] = int(parts[4])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
202 utr5_info['chr'] = parts[0]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
203 utr5_info['strand'] = parts[6]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
204 tid = ''
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
205 for attr in parts[-1].split(';'):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
206 if attr == '':continue ## GFF line may end with a ';' symbol
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
207 attr = attr.split('=')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
208 if attr[0] == 'Parent':tid=attr[1];continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
209 utr5_info[attr[0]] = attr[1]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
210 if tid in utr5:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
211 utr5[tid].append(utr5_info)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
212 else:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
213 utr5[tid] = [utr5_info]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
214 if parts[2] == 'CDS':
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
215 cds_info = dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
216 cds_info['start'] = int(parts[3])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
217 cds_info['stop'] = int(parts[4])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
218 cds_info['chr'] = parts[0]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
219 cds_info['strand'] = parts[6]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
220 tid = ''
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
221 for attr in parts[-1].split(';'):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
222 if attr == '':continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
223 attr = attr.split('=')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
224 if attr[0] == 'Parent':tid=attr[1];continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
225 cds_info[attr[0]] = attr[1]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
226 if tid in cds:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
227 cds[tid].append(cds_info)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
228 else:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
229 cds[tid] = [cds_info]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
230 if parts[2] == 'three_prime_UTR':
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
231 utr3_info = dict()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
232 utr3_info['start'] = int(parts[3])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
233 utr3_info['stop'] = int(parts[4])
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
234 utr3_info['chr'] = parts[0]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
235 utr3_info['strand'] = parts[6]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
236 tid = ''
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
237 for attr in parts[-1].split(';'):
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
238 if attr == '':continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
239 attr = attr.split('=')
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
240 if attr[0] == 'Parent':tid=attr[1];continue
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
241 utr3_info[attr[0]] = attr[1]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
242 if tid in utr3:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
243 utr3[tid].append(utr3_info)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
244 else:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
245 utr3[tid] = [utr3_info]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
246 gff_handle.close()
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
247 return genes, transcripts, exons, utr5, cds, utr3
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
248
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
249 import re, sys
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
250 import time
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
251 import collections
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
252
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
253 if __name__=='__main__':
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
254
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
255 stime = time.asctime( time.localtime(time.time()) )
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
256 print '-------------------------------------------------------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
257 print 'MergeLoci started on ' + stime
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
258 print '-------------------------------------------------------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
259 try:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
260 gff_file = sys.argv[1]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
261 out_file = sys.argv[2]
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
262 except:
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
263 sys.stderr.write("Missing GFF3 file, result file. terminating...\n")
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
264 sys.stderr.write("USAGE: gff_loci_merge.py <gff file> <result file>\n")
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
265 sys.exit(-1)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
266 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
267 print 'Level: 1- ' + 'Reading GFF file: ' + re.sub(r'/home/galaxy/galaxy-2.1.2009', r'GALAXYDIR', gff_file)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
268 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
269 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
270 print 'Level: 2- ' + 'BEFORE processing, Merging feature distribution in GFF file'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
271 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
272 # initial feature distribution in file
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
273 final_dict = available_limits(gff_file)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
274 display_content(final_dict)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
275 # determine the whole content from GFF file
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
276 genes, transcripts, exons, utr5, cds, utr3 = ParseGFF(gff_file)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
277 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
278 print 'Level: 3- ' + 'Start merging feature(s) from similar locations...'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
279 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
280 # determine the same gene loci on specific chromosome based on the same source
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
281 merged_regions = UniqLoci(genes, transcripts, exons)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
282 print '\tDone.'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
283 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
284 print 'Level: 4- ' + 'Writing merged feature annotation to GFF format...'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
285 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
286 # write new GFF file with merged loci information for gene feature
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
287 GFFWriter(merged_regions, genes, transcripts, exons, utr5, cds, utr3, out_file)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
288 print '\tDone.'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
289 # after processing display the feature distribution in the result file
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
290 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
291 print 'Level: 5- ' + 'Merged feature(s) summary from GFF file'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
292 print '--------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
293 final_dict = available_limits(out_file)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
294 display_content(final_dict)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
295 print
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
296 print '\tMerged result file: ' + re.sub(r'/home/galaxy/galaxy-2.1.2009', r'GALAXYDIR', out_file)
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
297 stime = time.asctime( time.localtime(time.time()) )
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
298 print '-------------------------------------------------------'
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
299 print 'MergeLoci finished at ' + stime
79726c328621 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
300 print '-------------------------------------------------------'