Mercurial > repos > alenail > chipsequtil
comparison chipsequtil/pieplot_macs/pieplots_macs.py @ 22:63dace20719b draft
Uploaded
author | alenail |
---|---|
date | Wed, 13 Apr 2016 17:36:07 -0400 |
parents | |
children | 5c1e5d771216 |
comparison
equal
deleted
inserted
replaced
21:248c4538e4ce | 22:63dace20719b |
---|---|
1 ''' | |
2 NAME | |
3 | |
4 pieplots_macs.py | |
5 | |
6 SYNOPSIS | |
7 | |
8 python pieplots_macs.py --genefile MACSoutfile_genes.txt --peakfile MACSoutfile_peaks.bed --outfile MACSdirectory/piechart.pdf | |
9 | |
10 | |
11 DESCRIPTION | |
12 | |
13 Peaks are assigned to the closest gene and then categorized according to their location at different genomic regions (promoter, intron, exon, or after the gene). Sites >10kb away from any gene are considered intergenic. (from Pamela) | |
14 | |
15 ''' | |
16 | |
17 __author__='Renan Escalante' | |
18 __email__='renanec@mit.edu' | |
19 | |
20 import matplotlib | |
21 matplotlib.use('pdf') | |
22 from matplotlib import pyplot as plt | |
23 matplotlib.rcParams['pdf.fonttype']=42 | |
24 matplotlib.rcParams['font.size']=14 | |
25 import sys | |
26 from argparse import ArgumentParser | |
27 import pandas as pd | |
28 | |
29 def map_peaks(gene,peak,outfile,macsFlag): | |
30 genefile = open(gene, 'r') | |
31 peakfile = open(peak, 'r') | |
32 | |
33 types = {'promoter':0, 'after':0, 'intron':0, 'exon': 0} | |
34 | |
35 #read mapped gene file, store closest map for each peak | |
36 peaks={} #{chrom:{peakStart:[dist, type]}} | |
37 for line in genefile: | |
38 words = line.strip().split('\t') | |
39 #ignore first line | |
40 if words[0] == 'knownGeneID': continue | |
41 chrom = words[2] | |
42 | |
43 | |
44 if not macsFlag: | |
45 try: | |
46 start = int(words[3]) | |
47 dist = abs(int(words[15])) | |
48 maptype = words[16] | |
49 if maptype == 'gene': | |
50 maptype = words[17] | |
51 except: | |
52 pass | |
53 | |
54 else: | |
55 start = int(words[3])-1 | |
56 dist = abs(int(words[12])) | |
57 maptype = words[14] | |
58 if maptype == 'gene': | |
59 maptype = words[15] | |
60 | |
61 | |
62 if chrom not in peaks: | |
63 #new chrom | |
64 peaks[chrom] = {start:[dist,maptype]} | |
65 else: | |
66 if start in peaks[chrom].keys(): | |
67 #account for duplicate entries - choose closest gene and store type | |
68 if dist < peaks[chrom][start][0]: | |
69 #closer gene | |
70 peaks[chrom][start] = [dist, maptype] | |
71 else: peaks[chrom][start] = [dist, maptype] | |
72 | |
73 #count types - 1 per peak in peak file | |
74 types = {'promoter':0, 'after':0, 'intron':0, 'exon': 0, 'inter': 0} | |
75 totalpks = 0 | |
76 #Read peak file in bed format | |
77 for line in peakfile: | |
78 words = line.strip().split('\t') | |
79 chrom = words[0] | |
80 start = int(words[1]) | |
81 if chrom in peaks: | |
82 if start in peaks[chrom]: | |
83 types[peaks[chrom][start][1]] += 1 | |
84 else: | |
85 types['inter'] += 1 | |
86 else: | |
87 types['inter'] += 1 | |
88 totalpks += 1 | |
89 | |
90 | |
91 #-------------------------------------------- | |
92 # make a square figure and axes | |
93 #-------------------------------------------- | |
94 | |
95 fig = plt.figure(figsize=(6,6)) | |
96 pie_ax = fig.add_axes((0.3,0.2,0.4,0.4)) | |
97 | |
98 # The slices will be ordered and plotted counter-clockwise. | |
99 labels = ['exon: %i'%types['exon'],'intron: %i'%types['intron'],'promoter: %i'%types['promoter'],'intergenic: %i'%types['inter'], 'after: %i'%types['after']] | |
100 fracs = [types['exon'], types['intron'], types['promoter'], types['inter'], types['after']] | |
101 | |
102 plt.pie(fracs, labels=labels) #, autopct='%1.1f%%') | |
103 | |
104 #Export data frame with all the counts | |
105 indexDataFrame = ['exon','intron','promoter','intergenic','after'] | |
106 df = pd.DataFrame(data=fracs, index=indexDataFrame) | |
107 dfFileName = outfile.replace("pdf","csv") | |
108 df.to_csv(dfFileName, sep=',') | |
109 #plt.title('MACS peaks in %s'%(name)) | |
110 plt.figtext(.5, .1, 'Total: %i'%totalpks, ha='center') | |
111 fig.savefig(outfile) | |
112 | |
113 def main(): | |
114 usage = "usage: %prog --genefile MACSoutfile_genes.txt --peakfile MACSoutfile_peaks.bed --outfile MACSdirectory/piechart.pdf" | |
115 parser = ArgumentParser(usage) | |
116 parser.add_argument("--genefile", dest="genefile", help="Path to file MACS_mfold10,30_pval1e-5_genes.txt") | |
117 parser.add_argument("--peakfile", dest="peakfile", help="Path to file MACS_mfold10,30_pval1e-5_peaks.bed") | |
118 parser.add_argument("--outfile", dest="outfile", default="MACS_piechart.pdf", help="Path to pdf file where you want to store the piechart") | |
119 parser.add_argument('--MACS',action='store_true',default=False,help='Set this value if you have MACS peaks') | |
120 | |
121 args=parser.parse_args() | |
122 | |
123 map_peaks(args.genefile, args.peakfile, args.outfile, args.MACS) | |
124 | |
125 | |
126 if __name__=='__main__': | |
127 main() |