comparison chipsequtil/pieplot_macs/pieplots_macs.py @ 22:63dace20719b draft

Uploaded
author alenail
date Wed, 13 Apr 2016 17:36:07 -0400
parents
children 5c1e5d771216
comparison
equal deleted inserted replaced
21:248c4538e4ce 22:63dace20719b
1 '''
2 NAME
3
4 pieplots_macs.py
5
6 SYNOPSIS
7
8 python pieplots_macs.py --genefile MACSoutfile_genes.txt --peakfile MACSoutfile_peaks.bed --outfile MACSdirectory/piechart.pdf
9
10
11 DESCRIPTION
12
13 Peaks are assigned to the closest gene and then categorized according to their location at different genomic regions (promoter, intron, exon, or after the gene). Sites >10kb away from any gene are considered intergenic. (from Pamela)
14
15 '''
16
17 __author__='Renan Escalante'
18 __email__='renanec@mit.edu'
19
20 import matplotlib
21 matplotlib.use('pdf')
22 from matplotlib import pyplot as plt
23 matplotlib.rcParams['pdf.fonttype']=42
24 matplotlib.rcParams['font.size']=14
25 import sys
26 from argparse import ArgumentParser
27 import pandas as pd
28
29 def map_peaks(gene,peak,outfile,macsFlag):
30 genefile = open(gene, 'r')
31 peakfile = open(peak, 'r')
32
33 types = {'promoter':0, 'after':0, 'intron':0, 'exon': 0}
34
35 #read mapped gene file, store closest map for each peak
36 peaks={} #{chrom:{peakStart:[dist, type]}}
37 for line in genefile:
38 words = line.strip().split('\t')
39 #ignore first line
40 if words[0] == 'knownGeneID': continue
41 chrom = words[2]
42
43
44 if not macsFlag:
45 try:
46 start = int(words[3])
47 dist = abs(int(words[15]))
48 maptype = words[16]
49 if maptype == 'gene':
50 maptype = words[17]
51 except:
52 pass
53
54 else:
55 start = int(words[3])-1
56 dist = abs(int(words[12]))
57 maptype = words[14]
58 if maptype == 'gene':
59 maptype = words[15]
60
61
62 if chrom not in peaks:
63 #new chrom
64 peaks[chrom] = {start:[dist,maptype]}
65 else:
66 if start in peaks[chrom].keys():
67 #account for duplicate entries - choose closest gene and store type
68 if dist < peaks[chrom][start][0]:
69 #closer gene
70 peaks[chrom][start] = [dist, maptype]
71 else: peaks[chrom][start] = [dist, maptype]
72
73 #count types - 1 per peak in peak file
74 types = {'promoter':0, 'after':0, 'intron':0, 'exon': 0, 'inter': 0}
75 totalpks = 0
76 #Read peak file in bed format
77 for line in peakfile:
78 words = line.strip().split('\t')
79 chrom = words[0]
80 start = int(words[1])
81 if chrom in peaks:
82 if start in peaks[chrom]:
83 types[peaks[chrom][start][1]] += 1
84 else:
85 types['inter'] += 1
86 else:
87 types['inter'] += 1
88 totalpks += 1
89
90
91 #--------------------------------------------
92 # make a square figure and axes
93 #--------------------------------------------
94
95 fig = plt.figure(figsize=(6,6))
96 pie_ax = fig.add_axes((0.3,0.2,0.4,0.4))
97
98 # The slices will be ordered and plotted counter-clockwise.
99 labels = ['exon: %i'%types['exon'],'intron: %i'%types['intron'],'promoter: %i'%types['promoter'],'intergenic: %i'%types['inter'], 'after: %i'%types['after']]
100 fracs = [types['exon'], types['intron'], types['promoter'], types['inter'], types['after']]
101
102 plt.pie(fracs, labels=labels) #, autopct='%1.1f%%')
103
104 #Export data frame with all the counts
105 indexDataFrame = ['exon','intron','promoter','intergenic','after']
106 df = pd.DataFrame(data=fracs, index=indexDataFrame)
107 dfFileName = outfile.replace("pdf","csv")
108 df.to_csv(dfFileName, sep=',')
109 #plt.title('MACS peaks in %s'%(name))
110 plt.figtext(.5, .1, 'Total: %i'%totalpks, ha='center')
111 fig.savefig(outfile)
112
113 def main():
114 usage = "usage: %prog --genefile MACSoutfile_genes.txt --peakfile MACSoutfile_peaks.bed --outfile MACSdirectory/piechart.pdf"
115 parser = ArgumentParser(usage)
116 parser.add_argument("--genefile", dest="genefile", help="Path to file MACS_mfold10,30_pval1e-5_genes.txt")
117 parser.add_argument("--peakfile", dest="peakfile", help="Path to file MACS_mfold10,30_pval1e-5_peaks.bed")
118 parser.add_argument("--outfile", dest="outfile", default="MACS_piechart.pdf", help="Path to pdf file where you want to store the piechart")
119 parser.add_argument('--MACS',action='store_true',default=False,help='Set this value if you have MACS peaks')
120
121 args=parser.parse_args()
122
123 map_peaks(args.genefile, args.peakfile, args.outfile, args.MACS)
124
125
126 if __name__=='__main__':
127 main()