annotate data_manager_build_alfa_indexes/data_manager/ALFA.py @ 4:6f0be85be8fb draft

Uploaded
author charles-bernard
date Thu, 27 Oct 2016 06:49:58 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1 #!/usr/bin/python
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
2 #-*- coding: utf-8 -*-
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
3
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
4 __author__ = 'noel & bahin'
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
5 ''' <decription> '''
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
6
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
7 import argparse
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
8 import os
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
9 import numpy
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
10 import sys
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
11 import subprocess
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
12 import matplotlib.pyplot as plt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
13 import matplotlib.cm as cmx
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
14 import matplotlib.colors as colors
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
15 import matplotlib.patheffects as PathEffects
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
16 import re
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
17 from matplotlib.backends.backend_pdf import PdfPages
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
18 # To correctly embbed the texts when saving plots in svg format
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
19 import matplotlib
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
20 matplotlib.rcParams['svg.fonttype'] = 'none'
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
21
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
22 ##########################################################################
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
23 # FUNCTIONS #
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
24 ##########################################################################
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
25
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
26 def init_dict(d, key, init):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
27 if key not in d:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
28 d[key] = init
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
29
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
30 def get_chromosome_lengths(args):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
31 """
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
32 Parse the file containing the chromosomes lengths.
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
33 If no length file is provided, browse the annotation file (GTF) to estimate the chromosome sizes (
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
34 """
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
35 lengths={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
36 gtf_chrom_names=set()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
37 force_get_lengths = False
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
38 # If the user provided the chromosome length file
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
39 if args.chr_len:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
40 with open(args.chr_len, 'r') as chr_len_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
41 for line in chr_len_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
42 lengths[line.split('\t')[0]] = int(line.rstrip().split('\t')[1])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
43 with open(args.annotation,'r') as gtf_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
44 for line in gtf_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
45 if not line.startswith('#'):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
46 chrom = line.split('\t')[0]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
47 if chrom not in gtf_chrom_names:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
48 gtf_chrom_names.add(chrom)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
49 for chrom in lengths.keys():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
50 if chrom not in gtf_chrom_names:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
51 print "Warning: at least one chromosome name ('"+chrom+"') of the file '"+args.chr_len+"'does not match any chromosome name if GTF and was ignored."
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
52 #del lengths[chrom]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
53 break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
54 for chrom in gtf_chrom_names:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
55 if force_get_lengths: break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
56 if chrom not in lengths.keys():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
57 print "WARNING: chromosome name '"+chrom+"' was found in gtf and does not match any chromosome name provided in",args.chr_len+". "
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
58 print "\t=> The chromosome lenghts will be approximated using annotations in the GTF file."
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
59 continue_value =""
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
60 while continue_value not in {"yes","y","no","n"}:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
61 continue_value = raw_input("\tDo you want to continue ('yes' or 'y')?\n\tElse write 'no' or 'n' to exit the script and check your file of lengths.\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
62 if continue_value == "no" or continue_value == "n":
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
63 sys.exit("Exiting")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
64 elif continue_value == "yes" or continue_value == "y":
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
65 force_get_lengths = True
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
66 break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
67 print "Error: use 'yes/y/no/n' only"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
68 if not force_get_lengths:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
69 return lengths
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
70 # Otherwise, (or if at least one chromosome was missing in chromosome lengths file) we consider the end of the last annotation of the chromosome in the GTF file as the chromosome length
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
71 with open(args.annotation, 'r') as gtf_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
72 for line in gtf_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
73 if not line.startswith('#'):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
74 chrom = line.split('\t')[0]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
75 end = int(line.split('\t')[4])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
76 init_dict(lengths, chrom, 0)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
77 lengths[chrom] = max(lengths[chrom], end)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
78 if force_get_lengths:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
79 print "The chromosome lenghts have been approximated using the last annotations in the GTF file."
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
80 return lengths
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
81
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
82 def write_feature_on_index(feat,chrom, start, stop, sign, stranded_genome_index, unstranded_genome_index=None):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
83 grouped_by_biotype_features = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
84 for biotype,categs in feat.iteritems():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
85 categ_list=[]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
86 for cat in set(categs):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
87 categ_list.append(cat)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
88 grouped_by_biotype_features.append(":".join((str(biotype),",".join(categ_list))))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
89 stranded_genome_index.write('\t'.join((chrom, start, stop, sign,''))+'\t'.join(grouped_by_biotype_features)+'\n')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
90 if unstranded_genome_index :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
91 unstranded_genome_index.write('\t'.join((chrom, start, stop, '.',''))+'\t'.join(grouped_by_biotype_features)+'\n')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
92
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
93
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
94 def add_info(cpt, feat_values, start, stop, chrom=None, unstranded_genome_index=None, stranded_genome_index = None , biotype_prios=None, coverage=1, categ_prios=None):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
95 """
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
96 From an annotated genomic interval (start/stop positions and associated feature : one or more category(ies)/biotype pair(s) )
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
97 - If a file is provided: write the information at the end of the index file being generated;
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
98 - else : browse the features and update the counts of categories/biotypes found in the genome.
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
99 """
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
100 ## Writing in the file is it was provided
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
101 if stranded_genome_index :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
102 unstranded_features = None
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
103 # If only one strand has a feature, this feature will directly be written on the unstranded index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
104 if feat_values[0] == {} :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
105 # A interval with no feature corresponds to a region annotated only on the reverse strand : update 'antisense' counts
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
106 stranded_genome_index.write('\t'.join((chrom, start, stop, '+','antisense\n')))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
107 write_feature_on_index(feat_values[1], chrom ,start, stop, '-', stranded_genome_index, unstranded_genome_index)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
108 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
109 if feat_values[1] == {} :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
110 write_feature_on_index(feat_values[0], chrom ,start, stop, '+', stranded_genome_index, unstranded_genome_index)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
111 stranded_genome_index.write('\t'.join((chrom, start, stop, '-','antisense\n')))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
112
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
113 # Else, the unstranded index should contain the union of plus and minus features
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
114 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
115 write_feature_on_index(feat_values[0], chrom ,start, stop, '+', stranded_genome_index)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
116 write_feature_on_index(feat_values[1], chrom ,start, stop, '-', stranded_genome_index)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
117 unstranded_feat = dict(feat_values[0], **feat_values[1])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
118 for name in set(feat_values[0]) & set(feat_values[1]):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
119 unstranded_feat[name]+=feat_values[0][name]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
120 write_feature_on_index(unstranded_feat, chrom ,start, stop, '.', unstranded_genome_index)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
121
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
122 ## Increasing category counter(s)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
123 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
124 # Default behavior if no biotype priorities : category with the highest priority for each found biotype has the same weight (1/n_biotypes)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
125 if not biotype_prios:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
126 nb_feat = len(feat_values)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
127 # For every categ(s)/biotype pairs
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
128 for feat in feat_values:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
129 cur_prio = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
130 selected_categ = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
131 # Separate categorie(s) and biotype
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
132 try:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
133 biot,cats = feat.split(":")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
134 # Error if feature equal "antisense" : update the 'antisense/antisense' counts
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
135 except ValueError :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
136 try :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
137 cpt[(feat,feat)] += (int(stop) - int(start)) * coverage / float(nb_feat)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
138 except :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
139 cpt[(feat,feat)] = (int(stop) - int(start)) * coverage / float(nb_feat)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
140 return
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
141 # Browse the categories and get only the one(s) with highest priority
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
142 for cat in cats.split(','):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
143 try: prio = prios[cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
144 except:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
145 #TODO Find a way to add unknown categories
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
146 if cat not in unknown_feature:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
147 print >> sys.stderr, "Warning: Unknown categorie %s found and ignored\n...\r" %cat,
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
148 unknown_feature.append(cat)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
149 continue
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
150 if prio > cur_prio :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
151 cur_prio = prio
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
152 selected_categ = [cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
153 if prio == cur_prio :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
154 if cat != selected_categ :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
155 try:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
156 if cat not in selected_categ :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
157 selected_categ.append(cat)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
158 except TypeError :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
159 selected_categ = [selected_categ,cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
160 # Increase each counts by the coverage divided by the number of categories and biotypes
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
161 nb_cats = len(selected_categ)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
162 for cat in selected_categ :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
163 try:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
164 cpt[(cat,biot)] += (int(stop) - int(start)) * coverage / (float(nb_feat * nb_cats))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
165 except KeyError:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
166 cpt[(cat,biot)] = (int(stop) - int(start)) * coverage / (float(nb_feat * nb_cats))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
167 #else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
168 #cpt[(cats,biot)] = (int(stop) - int(start)) / float(nb_feat) * coverage
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
169 # Else, apply biotype selection according to the priority set
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
170 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
171 #TODO Add an option to pass biotype priorities and handle it
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
172 pass
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
173
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
174 def print_chrom(features_dict, chrom, stranded_index_file, unstranded_index_file, cpt_genome):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
175 with open(unstranded_index_file,'a') as findex, open(stranded_index_file,'a') as fstrandedindex:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
176 # Initialization of variables : start position of the first interval and associated features for +/- strands
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
177 start = ""
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
178 for pos in sorted(features_dict['+'].keys()):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
179 if start != "":
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
180 add_info(cpt_genome, [features_plus,features_minus], str(start), str(pos), chrom, stranded_genome_index = fstrandedindex, unstranded_genome_index = findex)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
181 start = pos
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
182 features_plus = features_dict['+'][pos]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
183 features_minus = features_dict['-'][pos]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
184
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
185
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
186 def create_genome_index(annotation, unstranded_genome_index, stranded_genome_index,cpt_genome,prios,biotypes,chrom_sizes):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
187 ''' Create an index of the genome annotations and save it in a file'''
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
188 print '\n### Generating genome indexes\n',
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
189 sys.stdout.flush()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
190 # Initializations
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
191 intervals_dict = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
192 max_value = -1
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
193 prev_chrom = ''
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
194 reverse_strand = {'+':'-','-':'+'}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
195 i = 0 # Line counter
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
196 # Write the chromosomes lengths as comment lines before the genome index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
197 with open(unstranded_genome_index,'w') as findex, open(stranded_genome_index,'w') as fstrandedindex:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
198 for key,value in chrom_sizes.items():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
199 findex.write("#%s\t%s\n" %(key, value))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
200 fstrandedindex.write("#%s\t%s\n" %(key, value))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
201 # Running through the GTF file and writing into genome index files
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
202 with open(annotation, 'r') as gtf_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
203 for line in gtf_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
204 # Print messages after X processed lines
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
205 i += 1
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
206 if i % 100000 == 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
207 print >> sys.stderr, '\r%s line processed...' %str(i)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
208 print '\r \r. . .',
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
209 sys.stdout.flush()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
210 elif i % 20000 == 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
211 print '\r \r. . .',
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
212 sys.stdout.flush()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
213 elif i % 2000 == 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
214 print '.',
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
215 sys.stdout.flush()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
216 # Processing lines except comment ones
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
217 if not line.startswith('#'):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
218 # Getting the line infos
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
219 line_split=line[:-1].split('\t')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
220 chrom = line_split[0]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
221 cat=line_split[2]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
222 start = int(line_split[3]) - 1
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
223 stop = int(line_split[4])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
224 strand = line_split[6]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
225 antisense = reverse_strand[strand]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
226 biotype=line_split[8].split('biotype')[1].split(';')[0].strip('" ')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
227 feat = [(cat,biotype)]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
228 # Registering chromosome info in the genome index file if this is a new chromosome or a annotation not overlapping previously recorded features
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
229 if start > max_value or chrom != prev_chrom:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
230 # Write the previous features
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
231 if intervals_dict != 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
232 if chrom != prev_chrom :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
233 print_chrom(intervals_dict, prev_chrom, stranded_genome_index, unstranded_genome_index, cpt_genome)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
234 print "\rChromosome '" + prev_chrom + "' registered."
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
235 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
236 print_chrom(intervals_dict, chrom, stranded_genome_index, unstranded_genome_index, cpt_genome)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
237 prev_chrom = chrom
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
238 # (Re)Initializing the chromosome lists
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
239 intervals_dict = {strand:{start:{biotype:[cat]}, stop:{}},antisense:{start:{},stop:{}}}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
240 max_value = stop
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
241
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
242 # Update the dictionary which represents intervals for every disctinct annotation
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
243 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
244 # Get intervals on the same strand as the current feature
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
245 stranded_intervals = intervals_dict[strand]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
246 start_added = False
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
247 for pos in sorted(stranded_intervals.keys()):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
248 #print pos
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
249 #print stranded_intervals[pos]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
250 #print
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
251 # While the position is below the feature's interval, store the features
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
252 if pos < start :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
253 cur_cat_dict = dict(stranded_intervals[pos])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
254 cur_antisense_dict = dict(intervals_dict[antisense][pos])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
255
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
256 # If the start position already exists: update it by addind the new feature
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
257 elif pos == start :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
258 cur_cat_dict = dict(stranded_intervals[pos])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
259 cur_antisense_dict = dict(intervals_dict[antisense][pos])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
260 #print "cur",cur_cat_dict
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
261 try : stranded_intervals[pos][biotype] = stranded_intervals[pos][biotype]+[cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
262 except KeyError: stranded_intervals[pos][biotype] = [cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
263 start_added = True
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
264 #print "cur",cur_cat_dict
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
265
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
266 elif pos > start :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
267 # Create a new entry for the start position if necessary
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
268 if not start_added :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
269 #print "cur",cur_cat_dict
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
270 stranded_intervals[start] = dict(cur_cat_dict)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
271 stranded_intervals[start][biotype] = [cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
272 #stranded_intervals[pos][biotype].append(cat)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
273 intervals_dict[antisense][start] = cur_antisense_dict
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
274 start_added = True
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
275 #print "cur",cur_cat_dict
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
276 # While the position is below the stop, just add the new feature
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
277 if pos < stop :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
278 cur_cat_dict = dict(stranded_intervals[pos])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
279 cur_antisense_dict = dict(intervals_dict[antisense][pos])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
280 try: stranded_intervals[pos][biotype] = stranded_intervals[pos][biotype] + [cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
281 except KeyError: stranded_intervals[pos][biotype] = [cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
282 # Close the created interval : create an entry at the stop position and restore the features
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
283 elif pos > stop :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
284 stranded_intervals[stop] = dict(cur_cat_dict)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
285 intervals_dict[antisense][stop] = cur_antisense_dict
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
286 break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
287 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
288 break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
289 #try:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
290 #cur_cat_dict = list(stranded_intervals[pos][biotype])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
291 #except KeyError: cur_cat_dict = list()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
292 #print stranded_intervals[pos]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
293 #print
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
294 # Extend the dictionary if necessary
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
295 if stop > max_value:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
296 max_value = stop
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
297 stranded_intervals[stop] = {}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
298 intervals_dict[antisense][stop] = {}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
299 #except KeyError:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
300 #print intervals_dict
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
301 #quit()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
302 #intervals_dict[strand] = {strand:{start:{biotype:[cat]}, stop:{}},antisense:{start:{},stop:{}}}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
303 #continue
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
304 #for sign in ['-','+']:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
305 #print sign
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
306 #for key,val in sorted(intervals_dict[sign].iteritems()):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
307 #print key,'\t',val
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
308 #print
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
309 #print "-------\n"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
310
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
311
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
312 #Store the categories of the last chromosome
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
313 print_chrom(intervals_dict, chrom, stranded_genome_index, unstranded_genome_index, cpt_genome)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
314 print "\rChromosome '" + prev_chrom + "' registered.\nDone!"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
315
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
316
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
317 def create_bedgraph_files(bams,strandness):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
318 samples_files = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
319 labels = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
320 print "\n### Generating the bedgraph files"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
321 for n in range(0, len(bams), 2):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
322 print "\rProcessing '%s'\n..." %bams[n],
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
323 sys.stdout.flush()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
324 #Get the label for this sample
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
325 label = bams[n+1]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
326 #Modify it to contain only alphanumeric caracters (avoid files generation with dangerous names)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
327 modified_label = "_".join(re.findall(r"[\w']+", label))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
328 if strandness in ["reverse","fr-secondstrand"]:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
329 subprocess.call('bedtools genomecov -bg -split -strand - -ibam ' + bams[n] + ' > ' + modified_label + '.plus.bedgraph', shell=True)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
330 subprocess.call('bedtools genomecov -bg -split -strand + -ibam ' + bams[n] + ' > ' + modified_label + '.minus.bedgraph', shell=True)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
331 elif strandness in ["forward","fr-firststrand"]:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
332 subprocess.call('bedtools genomecov -bg -split -strand + -ibam ' + bams[n] + ' > ' + modified_label + '.plus.bedgraph', shell=True)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
333 subprocess.call('bedtools genomecov -bg -split -strand - -ibam ' + bams[n] + ' > ' + modified_label + '.minus.bedgraph', shell=True)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
334 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
335 subprocess.call('bedtools genomecov -bg -split -ibam ' + bams[n] + ' > ' + modified_label + '.bedgraph', shell=True)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
336 samples_files.append(modified_label)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
337 labels.append(label)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
338 print "\rDone!"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
339 return samples_files, labels
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
340
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
341 def read_gtf(gtf_index_file, sign):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
342 global gtf_line, gtf_chrom, gtf_start, gtf_stop, gtf_features, endGTF
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
343 strand = ""
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
344 while strand != sign :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
345 gtf_line = gtf_index_file.readline()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
346 # If the GTF file is finished
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
347 if not gtf_line:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
348 endGTF = True
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
349 return endGTF
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
350 splitline = gtf_line.rstrip().split('\t')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
351 try: strand = splitline[3]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
352 # strand information can not be found in the file file header
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
353 except IndexError: pass
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
354 gtf_chrom = splitline[0]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
355 gtf_features = splitline[4:]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
356 gtf_start, gtf_stop = map(int, splitline[1:3])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
357 return endGTF
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
358
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
359 def read_counts_files(counts_files):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
360 cpt={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
361 cpt_genome={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
362 labels=[]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
363 for fcounts in counts_files:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
364 label=os.path.splitext(os.path.basename(fcounts))[0]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
365 labels.append(label)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
366 cpt[label]={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
367 with open (fcounts,"r") as f:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
368 for line in f:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
369 if line[0]=="#":
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
370 continue
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
371 line_split=line[:-1].split('\t')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
372 feature=tuple(line_split[0].split(','))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
373 cpt[label][feature]=float(line_split[1])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
374 cpt_genome[feature]=float(line_split[2])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
375 return cpt,cpt_genome,labels
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
376
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
377
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
378 def get_chromosome_names_in_index(genome_index):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
379 chrom_list = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
380 with open(genome_index, 'r') as findex:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
381 chrom = ""
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
382 for line in findex:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
383 cur_chrom = line.split('\t')[0]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
384 if cur_chrom == chrom:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
385 pass
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
386 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
387 chrom = cur_chrom
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
388 if chrom not in chrom_list:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
389 chrom_list.append(chrom)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
390 return set(chrom_list)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
391
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
392
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
393 def intersect_bedgraphs_and_index_to_counts_categories(samples_files,samples_names,prios,genome_index, strandness, biotype_prios = None):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
394 global gtf_line, gtf_chrom, gtf_start, gtf_stop, gtf_cat, endGTF
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
395 print "\n### Intersecting files with indexes"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
396 unknown_chrom = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
397 cpt = {} # Counter for the nucleotides in the BAM input file(s)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
398 for n in range(len(samples_files)):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
399 sample_file=samples_files[n]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
400 sample_name=samples_names[n]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
401 # Initializing the category counter dict for this sample
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
402 init_dict(cpt, sample_name, {})
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
403 if strandness == "unstranded":
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
404 strands = [("",".")]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
405 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
406 strands = [('.plus','+'), ('.minus','-')]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
407
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
408 # Intersecting the BEDGRAPH and genome index files
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
409 print "\rProcessing '%s'\n. . ." %sample_file,
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
410 sys.stdout.flush()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
411
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
412 for strand,sign in strands:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
413 prev_chrom = ''
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
414 endGTF = False # Reaching the next chr or the end of the GTF index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
415 intergenic_adds = 0.0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
416 i = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
417 i_chgt = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
418 with open(sample_file + strand + '.bedgraph', 'r') as bam_count_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
419 # Running through the BEDGRAPH file
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
420 for bam_line in bam_count_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
421 i += 1
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
422 if i % 10000 == 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
423 print ".",
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
424 sys.stdout.flush()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
425 if i % 100000 == 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
426 print "\r \r. . .",
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
427 sys.stdout.flush()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
428 # Getting the BAM line info
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
429 bam_chrom = bam_line.split('\t')[0]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
430 bam_start, bam_stop, bam_cpt = map(float, bam_line.split('\t')[1:4])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
431 # Skip the line if the chromosome is not in the index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
432 if bam_chrom not in chrom_list:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
433 if bam_chrom not in unknown_chrom:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
434 unknown_chrom.append(bam_chrom)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
435 print "\r \r Chromosome '" + bam_chrom + "' not found in index."
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
436 continue
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
437 # If this is a new chromosome (or the first one)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
438 if bam_chrom != prev_chrom:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
439 i_chgt = i
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
440 intergenic_adds = 0.0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
441 # (Re)opening the GTF index and looking for the first line of the matching chr
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
442 try: gtf_index_file.close()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
443 except UnboundLocalError: pass
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
444 gtf_index_file = open(genome_index, 'r')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
445 endGTF = False
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
446 read_gtf(gtf_index_file, sign)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
447 while bam_chrom != gtf_chrom:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
448 read_gtf(gtf_index_file, sign)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
449 if endGTF:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
450 break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
451 prev_chrom = bam_chrom
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
452
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
453 # Looking for the first matching annotation in the GTF index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
454 while (not endGTF) and (gtf_chrom == bam_chrom) and (bam_start >= gtf_stop):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
455 read_gtf(gtf_index_file, sign)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
456 if gtf_chrom != bam_chrom:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
457 endGTF = True
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
458 # Processing BAM lines before the first GTF annotation if there are
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
459 if bam_start < gtf_start:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
460 # Increase the 'intergenic' category counter with all or part of the BAM interval
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
461 try:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
462 intergenic_adds += min(bam_stop,gtf_start)-bam_start
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
463 cpt[sample_name][('intergenic','intergenic')] += (min(bam_stop, gtf_start) - bam_start) * bam_cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
464 except KeyError:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
465 cpt[sample_name][('intergenic','intergenic')] = (min(bam_stop, gtf_start) - bam_start) * bam_cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
466 # Go to next line if the BAM interval was totally upstream the first GTF annotation, carry on with the remaining part otherwise
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
467 if endGTF or (bam_stop <= gtf_start):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
468 continue
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
469 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
470 bam_start = gtf_start
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
471
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
472 # We can start the crossover
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
473 while not endGTF:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
474 # Update category counter
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
475 add_info(cpt[sample_name], gtf_features, bam_start, min(bam_stop,gtf_stop), coverage = bam_cpt, categ_prios = prios)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
476 # Read the next GTF file line if the BAM line is not entirely covered
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
477 if bam_stop > gtf_stop:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
478 # Update the BAM start pointer
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
479 bam_start = gtf_stop
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
480 endGTF = read_gtf(gtf_index_file, sign)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
481 # If we read a new chromosome in the GTF file then it is considered finished
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
482 if bam_chrom != gtf_chrom:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
483 endGTF = True
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
484 if endGTF:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
485 break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
486 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
487 # Next if the BAM line is entirely covered
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
488 bam_start = bam_stop
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
489 break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
490
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
491 # Processing the end of the BAM line if necessary
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
492 if endGTF and (bam_stop > bam_start):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
493 try:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
494 cpt[sample_name][('intergenic','intergenic')] += (bam_stop - bam_start) * bam_cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
495 except KeyError:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
496 cpt[sample_name][('intergenic','intergenic')] = (bam_stop - bam_start) * bam_cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
497 gtf_index_file.close()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
498 print "\r \rDone!"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
499 return cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
500
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
501 def write_counts_in_files(cpt,genome_counts):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
502 for label,dico in cpt.items():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
503 label = "_".join(re.findall(r"[\w']+", label))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
504 with open(label+".categories_counts","w") as fout:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
505 fout.write("#Category,biotype\tCounts_in_bam\tSize_in_genome\n" )
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
506 for feature,counts in dico.items():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
507 fout.write("%s\t%s\t%s\n" %(','.join(feature),counts,genome_counts[feature]))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
508
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
509 def recategorize_the_counts(cpt,cpt_genome,final):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
510 final_cat_cpt={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
511 final_genome_cpt={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
512 for f in cpt:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
513 #print "\nFinal categories for",f,"sample"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
514 final_cat_cpt[f]={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
515 for final_cat in final:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
516 tot = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
517 tot_genome=0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
518 for cat in final[final_cat]:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
519 tot += cpt[f][cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
520 tot_genome+=cpt_genome[cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
521 #output_file.write('\t'.join((final_cat, str(tot))) + '\n')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
522 #print '\t'.join((final_cat, str(tot)))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
523 final_cat_cpt[f][final_cat]=tot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
524 final_genome_cpt[final_cat]=tot_genome
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
525 return final_cat_cpt,final_genome_cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
526
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
527 def group_counts_by_categ(cpt,cpt_genome,final,selected_biotype):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
528 final_cat_cpt={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
529 final_genome_cpt={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
530 filtered_cat_cpt = {}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
531 for f in cpt:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
532 final_cat_cpt[f]={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
533 filtered_cat_cpt[f] = {}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
534 for final_cat in final:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
535 tot = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
536 tot_filter = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
537 tot_genome=0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
538 for cat in final[final_cat]:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
539 for key,value in cpt[f].items():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
540 if key[0] == cat :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
541 tot += value
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
542 tot_genome+=cpt_genome[key]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
543 if key[1] == selected_biotype :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
544 tot_filter += value
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
545 #output_file.write('\t'.join((final_cat, str(tot))) + '\n')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
546 #print '\t'.join((final_cat, str(tot)))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
547 final_cat_cpt[f][final_cat]=tot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
548 if tot_genome == 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
549 final_genome_cpt[final_cat]= 1e-100
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
550 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
551 final_genome_cpt[final_cat]=tot_genome
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
552 filtered_cat_cpt[f][final_cat]=tot_filter
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
553 #if 'antisense' in final_genome_cpt: final_genome_cpt['antisense'] = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
554 return final_cat_cpt,final_genome_cpt,filtered_cat_cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
555
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
556 def group_counts_by_biotype(cpt,cpt_genome,biotypes):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
557 final_cpt={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
558 final_genome_cpt={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
559 for f in cpt:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
560 final_cpt[f]={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
561 for biot in biotypes:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
562 tot = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
563 tot_genome=0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
564 try:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
565 for final_biot in biotypes[biot]:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
566 for key,value in cpt[f].items():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
567 if key[1] == final_biot :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
568 tot += value
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
569 #if key[1] != 'antisense':
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
570 tot_genome+=cpt_genome[key]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
571 except:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
572 for key,value in cpt[f].items():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
573 if key[1] == biot :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
574 tot += value
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
575 tot_genome+=cpt_genome[key]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
576 if tot != 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
577 final_cpt[f][biot]=tot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
578 final_genome_cpt[biot]=tot_genome
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
579 return final_cpt,final_genome_cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
580
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
581 #def get_cmap(N):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
582 #'''Returns a function that maps each index in 0, 1, ... N-1 to a distinct
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
583 #RGB color.'''
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
584 #color_norm = colors.Normalize(vmin=0, vmax=N-1)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
585 #scalar_map = cmx.ScalarMappable(norm=color_norm, cmap='hsv')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
586 #def map_index_to_rgb_color(index):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
587 #return scalar_map.to_rgba(index)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
588 #return map_index_to_rgb_color
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
589
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
590 def one_sample_plot(ordered_categs, percentages, enrichment, n_cat, index, index_enrichment, bar_width, counts_type, title) :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
591 ### Initialization
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
592 fig = plt.figure(figsize=(13,9))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
593 ax1 = plt.subplot2grid((2,4),(0,0),colspan=2)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
594 ax2 = plt.subplot2grid((2,4),(1,0),colspan=2)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
595 cmap= plt.get_cmap('Spectral')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
596 cols=[cmap(x) for x in xrange(0,256,256/n_cat)]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
597 if title:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
598 ax1.set_title(title+"in: %s" %samples_names[0])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
599 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
600 ax1.set_title(counts_type+" distribution in mapped reads in: %s" %samples_names[0])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
601 ax2.set_title('Normalized counts of '+counts_type)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
602
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
603
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
604 ### Barplots
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
605 #First barplot: percentage of reads in each categorie
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
606 ax1.bar(index, percentages, bar_width,
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
607 color=cols)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
608 #Second barplot: enrichment relative to the genome for each categ
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
609 # (the reads count in a categ is divided by the categ size in the genome)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
610 ax2.bar(index_enrichment, enrichment, bar_width,
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
611 color=cols,)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
612 ### Piecharts
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
613 pielabels = [ordered_categs[i] if percentages[i]>0.025 else '' for i in xrange(n_cat)]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
614 sum_enrichment = numpy.sum(enrichment)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
615 pielabels_enrichment = [ordered_categs[i] if enrichment[i]/sum_enrichment>0.025 else '' for i in xrange(n_cat)]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
616 # Categories piechart
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
617 ax3 = plt.subplot2grid((2,4),(0,2))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
618 pie_wedge_collection, texts = ax3.pie(percentages,labels=pielabels, shadow=True, colors=cols)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
619 # Enrichment piechart
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
620 ax4 = plt.subplot2grid((2,4),(1,2))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
621 pie_wedge_collection, texts = ax4.pie(enrichment,labels=pielabels_enrichment, shadow=True, colors=cols)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
622 # Add legends (append percentages to labels)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
623 labels = [" ".join((ordered_categs[i],"({:.1%})".format(percentages[i]))) for i in range(len(ordered_categs))]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
624 ax3.legend(pie_wedge_collection,labels,loc='center',fancybox=True, shadow=True,prop={'size':'medium'}, bbox_to_anchor=(1.7,0.5))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
625 labels = [" ".join((ordered_categs[i],"({:.1%})".format(enrichment[i]/sum_enrichment))) for i in range(len(ordered_categs))]# if ordered_categs[i] != 'antisense']
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
626 ax4.legend(pie_wedge_collection,labels,loc='center',fancybox=True, shadow=True,prop={'size':'medium'},bbox_to_anchor=(1.7,0.5))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
627 # Set aspect ratio to be equal so that pie is drawn as a circle
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
628 ax3.set_aspect('equal')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
629 ax4.set_aspect('equal')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
630 return fig, ax1, ax2
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
631
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
632 def make_plot(ordered_categs,samples_names,categ_counts,genome_counts,pdf, counts_type, threshold, title = None ,svg = None, png = None):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
633 # From ordered_categs, keep only the features (categs or biotypes) that we can find in at least one sample.
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
634 existing_categs = set()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
635 for sample in categ_counts.values():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
636 existing_categs |= set(sample.keys())
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
637 ordered_categs = filter(existing_categs.__contains__, ordered_categs)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
638 n_cat = len(ordered_categs)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
639 n_exp=len(samples_names)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
640 ##Initialization of the matrix of counts (nrow=nb_experiements, ncol=nb_categorie)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
641 counts=numpy.matrix(numpy.zeros(shape=(n_exp,n_cat)))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
642 for exp in xrange(len(samples_names)):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
643 for cat in xrange(len(ordered_categs)):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
644 try: counts[exp,cat]=categ_counts[samples_names[exp]][ordered_categs[cat]]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
645 except: pass
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
646
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
647 ##Normalize the categorie sizes by the total size to get percentages
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
648 sizes=[]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
649 sizes_sum=0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
650 for cat in ordered_categs:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
651 sizes.append(genome_counts[cat])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
652 sizes_sum+=genome_counts[cat]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
653 if 'antisense' in ordered_categs:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
654 antisense_pos = ordered_categs.index('antisense')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
655 sizes[antisense_pos] = 1e-100
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
656 for cpt in xrange(len(sizes)):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
657 sizes[cpt]/=float(sizes_sum)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
658
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
659 ## Create array which contains the percentage of reads in each categ for every sample
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
660 percentages=numpy.array(counts/numpy.sum(counts,axis=1))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
661 ## Create the enrichment array (counts divided by the categorie sizes in the genome)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
662 enrichment=numpy.array(percentages/sizes)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
663 if 'antisense_pos' in locals():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
664 for i in xrange(len(samples_names)):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
665 enrichment[i][antisense_pos] = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
666 #enrichment=numpy.log(numpy.array(percentages/sizes))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
667 for exp in xrange(n_exp):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
668 for i in xrange(n_cat):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
669 val = enrichment[exp][i]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
670 if val > 1:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
671 enrichment[exp][i] = val-1
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
672 elif val == 1 or val == 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
673 enrichment[exp][i] = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
674 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
675 enrichment[exp][i] = -1/val+1
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
676
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
677 #### Finally, produce the plot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
678
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
679 ##Get the colors from the colormap
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
680 ncolor=16
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
681 cmap = ["#e47878", "#68b4e5", "#a3ea9b", "#ea9cf3", "#e5c957", "#a3ecd1", "#e97ca0", "#66d985", "#8e7ae5", "#b3e04b", "#b884e4", "#e4e758", "#738ee3", "#e76688", "#70dddd", "#e49261"]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
682 if n_exp > ncolor:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
683 cmap = plt.get_cmap('Set3',n_exp)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
684 cmap = [cmap(i) for i in xrange(n_exp)]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
685
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
686 ## Parameters for the plot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
687 opacity = 1
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
688 #Create a vector which contains the position of each bar
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
689 index = numpy.arange(n_cat)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
690 #Size of the bars (depends on the categs number)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
691 bar_width = 0.9/n_exp
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
692
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
693
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
694 ##Initialise the subplot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
695 # if there is only one sample, also plot piecharts
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
696 #if n_exp == 1 and counts_type.lower() == 'categories':
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
697 #fig, ax1, ax2 = one_sample_plot(ordered_categs, percentages[0], enrichment[0], n_cat, index, bar_width, counts_type, title)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
698 ## If more than one sample
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
699 #else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
700 fig, (ax1,ax2) = plt.subplots(2,figsize=(5+(n_cat+2*n_exp)/3,10))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
701 # Store the bars objects for enrichment plot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
702 rects = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
703 #For each sample/experiment
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
704 for i in range(n_exp):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
705 #First barplot: percentage of reads in each categorie
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
706 ax1.bar(index+i*bar_width, percentages[i], bar_width,
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
707 alpha=opacity,
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
708 color=cmap[i],
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
709 label=samples_names[i], edgecolor='#FFFFFF', lw=0)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
710 #Second barplot: enrichment relative to the genome for each categ
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
711 # (the reads count in a categ is divided by the categ size in the genome)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
712 rects.append( ax2.bar(index+i*bar_width, enrichment[i], bar_width,
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
713 alpha=opacity,
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
714 color=cmap[i],
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
715 label=samples_names[i], edgecolor=cmap[i], lw=0))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
716
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
717 ## Graphical options for the plot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
718 # Adding of the legend
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
719 ax1.legend(loc='best',frameon=False)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
720 #ax2.legend(loc='upper center',bbox_to_anchor=(0.5,-0.1), fancybox=True, shadow=True)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
721 # Main titles
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
722 if title:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
723 ax1.set_title(title)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
724 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
725 ax1.set_title(counts_type+" distribution in mapped reads")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
726 ax2.set_title('Normalized counts of '+counts_type)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
727
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
728 # Adding enrichment baseline
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
729 #ax2.axhline(y=0,color='black',linestyle='dashed',linewidth='1.5')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
730 # Axes limits
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
731 ax1.set_xlim(-0.1,len(ordered_categs)+0.1)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
732 if len(sizes) == 1: ax1.set_xlim(-2,3)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
733 ax2.set_xlim(ax1.get_xlim())
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
734 # Set axis limits (max/min values + 5% margin)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
735 ax2_ymin = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
736 ax2_ymax = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
737 for sample_values in enrichment:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
738 ax2_ymin.append(min(sample_values))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
739 ax2_ymax.append(max(sample_values))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
740 ax2_ymax = max(ax2_ymax)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
741 ax2_ymin = min(ax2_ymin)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
742 margin_top, margin_bottom = (abs(0.05*ax2_ymax), abs(0.05*ax2_ymin))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
743 ax1.set_ylim(0,ax1.get_ylim()[1]*1.05)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
744 if threshold:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
745 threshold_bottom = -abs(float(threshold[0]))+1
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
746 threshold_top = float(threshold[1])-1
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
747
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
748 for i in xrange(n_exp):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
749 for y in xrange(n_cat):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
750 val = enrichment[i][y]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
751 if not numpy.isnan(val) and not (threshold_bottom < val < threshold_top):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
752 rect = rects[i][y]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
753 rect_height = rect.get_height()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
754 if rect.get_y() < 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
755 diff = rect_height + threshold_bottom
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
756 rect.set_y(threshold_bottom)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
757 ax2_ymin = threshold_bottom
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
758 margin_bottom = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
759 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
760 diff = rect_height - threshold_top
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
761 ax2_ymax = threshold_top
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
762 margin_top = 0
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
763 rect.set_height(rect.get_height()-diff)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
764 if margin_top != 0 and margin_bottom != 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
765 margin_top, margin_bottom = [max(margin_top, margin_bottom) for i in xrange(2)]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
766 ax2.set_ylim(ax2_ymin-margin_bottom,ax2_ymax+margin_top)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
767 # Y axis title
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
768 ax1.set_ylabel('Proportion of reads (%)')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
769 ax2.set_ylabel('Enrichment relative to genome')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
770 # X axis title
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
771 ax1.set_xlabel(counts_type)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
772 ax2.set_xlabel(counts_type)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
773 # Add the categories on the x-axis
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
774 ax1.set_xticks(index + bar_width*n_exp/2)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
775 ax2.set_xticks(index + bar_width*n_exp/2)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
776 if counts_type.lower() != 'categories':
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
777 ax1.set_xticklabels(ordered_categs,rotation='30',ha='right')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
778 ax2.set_xticklabels(ordered_categs,rotation='30',ha='right')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
779 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
780 ax1.set_xticklabels(ordered_categs)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
781 ax2.set_xticklabels(ordered_categs)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
782 # Display fractions values in percentages
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
783 ax1.set_yticklabels([str(int(i*100)) for i in ax1.get_yticks()])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
784 # Correct y-axis ticks labels for enrichment subplot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
785 #ax2.set_yticklabels([str(i+1)+"$^{+1}$" if i>0 else 1 if i==0 else str(-(i-1))+"$^{-1}$" for i in ax2.get_yticks()])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
786 yticks = list(ax2.get_yticks())
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
787 yticks = [ yticks[i]-1 if yticks[i]>9 else yticks[i]+1 if yticks[i]<-9 else yticks[i] for i in xrange(len(yticks))]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
788 ax2.set_yticks(yticks)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
789 ax2.set_yticklabels([str(int(i+1))+"$^{+1}$" if i>0 and i%1==0 else str(i+1)+"$^{+1}$" if i>0 else 1 if i==0 else str(int(-(i-1)))+"$^{-1}$" if i<0 and i%1==0 else str(-(i-1))+"$^{-1}$" for i in ax2.get_yticks()])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
790 #ax2.set_yticklabels([i+1 if i>0 else 1 if i==0 else "$\\frac{1}{%s}$" %-(i-1) for i in ax2.get_yticks()])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
791 # Change appearance of 'antisense' bars on enrichment plot since we cannot calculate an enrichment for this artificial category
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
792 if 'antisense_pos' in locals(): #ax2.text(antisense_pos+bar_width/2,ax2.get_ylim()[1]/10,'NA')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
793 for i in xrange(n_exp) :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
794 rect = rects[i][antisense_pos]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
795 rect.set_y(ax2.get_ylim()[0])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
796 rect.set_height(ax2.get_ylim()[1] - ax2.get_ylim()[0])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
797 rect.set_hatch('/')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
798 rect.set_fill(False)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
799 rect.set_linewidth(0)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
800 #rect.set_color('lightgrey')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
801 #rect.set_edgecolor('#EDEDED')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
802 rect.set_color('#EDEDED')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
803 ax2.text(index[antisense_pos] + bar_width*n_exp/2 - 0.1, (ax2_ymax+ax2_ymin)/2, 'NA')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
804 # Add text for features absent in sample
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
805 for i in xrange(n_exp):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
806 for y in xrange(n_cat):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
807 if percentages[i][y] == 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
808 txt = ax1.text(y + bar_width*(i+0.5), 0.02, 'Absent in sample', rotation = 'vertical', color = cmap[i], horizontalalignment ='center', verticalalignment = 'bottom')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
809 txt.set_path_effects([PathEffects.Stroke(linewidth=0.5),PathEffects.Normal()])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
810 elif enrichment[i][y] == 0:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
811 rects[i][y].set_linewidth(1)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
812
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
813 # Remove top/right/bottom axes
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
814 for ax in [ax1,ax2]:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
815 ax.spines['top'].set_visible(False)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
816 ax.spines['right'].set_visible(False)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
817 ax.spines['bottom'].set_visible(False)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
818 ax.tick_params(axis='x', which='both', bottom='on', top='off', labelbottom='on')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
819 ax.tick_params(axis='y', which='both', left='on', right='off', labelleft='on')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
820
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
821
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
822
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
823 #ax1.legend(loc='center right', bbox_to_anchor=(1.2, 0),fancybox=True, shadow=True)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
824 ## Showing the plot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
825 plt.tight_layout()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
826 fig.subplots_adjust(wspace=0.2)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
827 if pdf:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
828 pdf.savefig()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
829 plt.close()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
830 elif svg:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
831 if svg == True:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
832 plt.savefig(counts_type+'.svg')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
833 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
834 if os.path.splitext(svg)[1] == '.svg':
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
835 plt.savefig('.'.join((os.path.splitext(svg)[0],counts_type,'svg')))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
836 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
837 plt.savefig('.'.join((svg,counts_type,'svg')))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
838 elif png:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
839 if png == True:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
840 plt.savefig(counts_type+'.png')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
841 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
842 if os.path.splitext(png)[1] == '.png':
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
843 plt.savefig('.'.join((os.path.splitext(png)[0],counts_type,'png')))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
844 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
845 plt.savefig('.'.join((png,counts_type,'png')))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
846 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
847 plt.show()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
848 ## Save on disk it as a png image
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
849 #fig.savefig('image_output.png', dpi=300, format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
850
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
851
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
852 def filter_categs_on_biotype(selected_biotype,cpt) :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
853 filtered_cpt = {}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
854 for sample in cpt:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
855 filtered_cpt[sample] = {}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
856 for feature,count in cpt[sample].items():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
857 if feature[1] == selected_biotype:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
858 filtered_cpt[sample][feature[0]] = count
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
859 return filtered_cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
860
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
861
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
862 ##########################################################################
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
863 # MAIN #
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
864 ##########################################################################
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
865
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
866 def usage_message(name=None):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
867 return '''
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
868 Generate genome indexes:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
869 python ALFA.py -a GTF_FILE [-g GENOME_INDEX]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
870 [--chr_len CHR_LENGTHS_FILE]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
871 Process BAM file(s):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
872 python ALFA.py -g GENOME_INDEX -i BAM1 LABEL1 [BAM2 LABEL2 ...]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
873 [--bedgraph] [-s STRAND]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
874 [-n] [--pdf output.pdf]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
875 [-d {1,2,3,4}] [-t YMIN YMAX]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
876 Index genome + process BAM:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
877 python ALFA.py -a GTF_FILE [-g GENOME_INDEX]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
878 -i BAM1 LABEL1 [BAM2 LABEL2 ...]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
879 [--chr_len CHR_LENGTHS_FILE]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
880 [--bedgraph][-s STRAND]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
881 [-n] [--pdf output.pdf]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
882 [-d {1,2,3,4}] [-t YMIN YMAX]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
883
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
884 Process previously created ALFA counts file(s):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
885 python ALFA.py -c COUNTS1 [COUNTS2 ...]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
886 [-s STRAND]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
887 [-n] [--pdf output.pdf]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
888 [-d {1,2,3,4}] [-t YMIN YMAX]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
889
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
890 '''
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
891
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
892
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
893 #### Parse command line arguments and store them in 'options'
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
894 parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, usage=usage_message())
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
895 parser.add_argument('--version', action='version', version='version 1.0', help="show program's version number and exit\n\n-----------\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
896 # Options concernant l'index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
897 parser.add_argument('-g','--genome_index', help="Genome index files path and basename for existing index, or path and basename for new index creation\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
898 parser.add_argument('-a','--annotation', metavar = "GTF_FILE", help='Genomic annotations file (GTF format)\n\n')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
899 parser.add_argument('--chr_len', help='Tabulated file containing chromosome names and lengths\n\n-----------\n\n')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
900
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
901 # Options pour l'étape d'intersection
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
902 parser.add_argument('-i','--input','--bam', dest='input', metavar=('BAM_FILE1 LABEL1',""), nargs='+', help='Input BAM file(s) and label(s). The BAM files must be sorted by position.\n\n')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
903 parser.add_argument('--bedgraph', action='store_const',default = False, const = True, help="Use this options if your input file(s) is(are) already in bedgraph format\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
904 parser.add_argument('-c','--counts',metavar=('COUNTS_FILE',""), nargs='+', help="Use this options instead of '-i/--input' to provide ALFA counts files as input \ninstead of bam/bedgraph files.\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
905 parser.add_argument('-s','--strandness', dest="strandness", nargs=1, action = 'store', default = ['unstranded'], choices = ['unstranded','forward','reverse','fr-firststrand','fr-secondstrand'], metavar="", help ="Library orientation. Choose within: 'unstranded', 'forward'/'fr-firststrand' \nor 'reverse'/'fr-secondstrand'. (Default: 'unstranded')\n\n-----------\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
906
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
907 # Options concernant le plot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
908 parser.add_argument('-biotype_filter',nargs=1,help=argparse.SUPPRESS)#"Make an extra plot of categories distribution using only counts of the specified biotype.")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
909 parser.add_argument('-d','--categories_depth', type=int, default='3', choices=range(1,5), help = "Use this option to set the hierarchical level that will be considered in the GTF file (default=3): \n(1) gene,intergenic; \n(2) intron,exon,intergenic; \n(3) 5'UTR,CDS,3'UTR,intron,intergenic; \n(4) start_codon,5'UTR,CDS,3'UTR,stop_codon,intron,intergenic. \n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
910 parser.add_argument('--pdf', nargs='?', default=False, help="Save produced plots in PDF format at specified path ('categories_plots.pdf' if no argument provided)\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
911 parser.add_argument('--png', nargs='?', default=False, const=True, help="Save produced plots in PNG format with provided argument as basename \nor 'categories.png' and 'biotypes.png' if no argument provided\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
912 parser.add_argument('--svg', nargs='?', default=False, const=True, help="Save produced plots in SVG format with provided argument as basename \nor 'categories.svg' and 'biotypes.svg' if no argument provided\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
913 parser.add_argument('-n','--no_plot', dest='quiet', action='store_const', default=False, const=True, help="Do not show plots\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
914 parser.add_argument('-t','--threshold', dest='threshold', nargs = 2, metavar=("ymin","ymax"), type=float , help="Set axis limits for enrichment plots\n\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
915
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
916 if len(sys.argv)==1:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
917 parser.print_usage()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
918 sys.exit(1)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
919
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
920 options = parser.parse_args()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
921
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
922 def required_arg(arg, aliases):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
923 if not arg:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
924 print >> sys.stderr, "\nError: %s argument is missing.\n" %aliases
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
925 parser.print_usage()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
926 sys.exit()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
927
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
928 def check_files_enxtension(files):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
929 return
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
930
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
931 # Booleans for steps to be executed
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
932 make_index = False
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
933 intersect_reads = False
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
934 process_counts = False
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
935
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
936 #### Check arguments conformity and define which steps have to be perfomed
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
937 if options.counts :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
938 # Aucun autre argument requis
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
939 # Vérifier extension input
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
940
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
941 # Action : Faire le plot uniquement
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
942 process_counts = True
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
943 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
944 if options.annotation :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
945 # Vérifier si présence -gi
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
946 if options.genome_index :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
947 genome_index_basename = options.genome_index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
948 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
949 genome_index_basename = options.annotation.split("/")[-1].split(".gtf")[0]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
950 # Vérifier si un fichier existe déjà:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
951 if os.path.isfile(genome_index_basename+".stranded.index") :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
952 if options.input:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
953 print >> sys.stderr, "\nWarning: a index file named '%s' already exists and will be used. If you want to create a new index, please delete this file or specify an other path." %(genome_index_basename+".stranded.index")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
954 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
955 sys.exit("\nError: a index file named %s already exists. If you want to create a new index, please delete this file or specify an other path.\n" %(genome_index_basename+".stranded.index"))
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
956 # sinon -> action : index à faire
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
957 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
958 make_index = True
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
959 # si l'index n'est pas à faire :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
960 if options.input:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
961 # Arguments requis: input, genome_index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
962 if 'genome_index_basename' not in locals():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
963 required_arg(options.genome_index, "-g/--genome_index")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
964 genome_index_basename = options.genome_index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
965 required_arg(options.input, "-i/--input/--bam")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
966 for i in xrange(0, len(options.input), 2):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
967 try :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
968 extension = os.path.splitext(options.input[i+1])[1]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
969 if extension == ".bam" or extension == '.bedgraph' or extension == '.bg':
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
970 sys.exit("Error: it seems input files and associated labels are not correctly provided.\n\
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
971 Make sure to follow the expected format : -i Input_file1 Label1 [Input_file2 Label2 ...].")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
972 except:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
973 sys.exit("Error: it seems input files and associated labels are not correctly provided.\nMake sure to follow the expected format : -i Input_file1 Label1 [Input_file2 Label2 ...].")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
974
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
975 intersect_reads = True
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
976 # Vérifier input's extension
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
977 #TODO
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
978 if not (options.counts or options.input or options.annotation):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
979 sys.exit("\nError : some arguments are missing At least '-a', '-c' or '-i' is required. Please refer to help (-h/--help) and usage cases for more details.\n")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
980 if not options.counts:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
981 # Declare genome_index variables
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
982 stranded_genome_index = genome_index_basename+".stranded.index"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
983 unstranded_genome_index = genome_index_basename+".unstranded.index"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
984 if options.strandness[0] == "unstranded":
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
985 genome_index = unstranded_genome_index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
986 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
987 genome_index = stranded_genome_index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
988
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
989
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
990
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
991 #### Initialization of some variables
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
992
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
993 # Initializing the category priority order, coding biotypes and the final list
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
994 prios = {'start_codon': 7, 'stop_codon': 7, 'five_prime_utr': 6, 'three_prime_utr': 6, 'UTR': 6, 'CDS': 5, 'exon': 4, 'transcript': 3, 'gene': 2, 'antisense': 1,'intergenic': 0}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
995
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
996 biotype_prios = None
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
997 #biotype_prios = {"protein_coding":1, "miRNA":2}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
998
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
999 # Possibles groups of categories to plot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1000 categs_group1={'start': ['start_codon'], '5UTR': ['five_prime_utr','UTR'], 'CDS': ['CDS', 'exon'], '3UTR': ['three_prime_utr'], 'stop': ['stop_codon'], 'introns': ['transcript', 'gene'], 'intergenic': ['intergenic'], 'antisense': ['antisense']}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1001 categs_group2={'5UTR': ['five_prime_utr', 'UTR'], 'CDS': ['CDS', 'exon','start_codon','stop_codon'], '3UTR': ['three_prime_utr'], 'introns': ['transcript', 'gene'], 'intergenic': ['intergenic'], 'antisense': ['antisense']}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1002 categs_group3={'exons': ['five_prime_utr', 'three_prime_utr', 'UTR', 'CDS', 'exon','start_codon','stop_codon'], 'introns': ['transcript', 'gene'], 'intergenic': ['intergenic'], 'antisense': ['antisense']}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1003 categs_group4={'gene': ['five_prime_utr', 'three_prime_utr', 'UTR', 'CDS', 'exon','start_codon','stop_codon', 'transcript', 'gene'], 'intergenic': ['intergenic'], 'antisense': ['antisense']}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1004 categs_groups = [categs_group4,categs_group3,categs_group2,categs_group1] # Order and merging for the final plot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1005 cat_list = ['5UTR', 'start', 'CDS', 'stop', '3UTR', 'exons', 'introns', 'gene', 'intergenic', 'antisense']
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1006
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1007
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1008 # biotypes list
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1009 biotypes ={'protein_coding','polymorphic_pseudogene','TR_C_gene','TR_D_gene','TR_J_gene','TR_V_gene','IG_C_gene','IG_D_gene','IG_J_gene','IG_V_gene',"3prime_overlapping_ncrna","lincRNA","macro_lncRNA","miRNA","misc_RNA","Mt_rRNA","Mt_tRNA","processed_transcript","ribozyme","rRNA","scaRNA","sense_intronic","sense_overlapping","snoRNA","snRNA","sRNA","TEC","vaultRNA","antisense","transcribed_processed_pseudogene","transcribed_unitary_pseudogene","transcribed_unprocessed_pseudogene","translated_unprocessed_pseudogene","TR_J_pseudogene","TR_V_pseudogene","unitary_pseudogene","unprocessed_pseudogene","processed_pseudogene","IG_C_pseudogene","IG_J_pseudogene","IG_V_pseudogene","pseudogene","ncRNA","tRNA"} # Type: set (to access quickly)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1010
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1011 # Grouping of biotypes:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1012 biotypes_group1={'protein_coding':['protein_coding'],'pseudogenes':['polymorphic_pseudogene',"transcribed_processed_pseudogene","transcribed_unitary_pseudogene","transcribed_unprocessed_pseudogene","translated_unprocessed_pseudogene","TR_J_pseudogene","TR_V_pseudogene","unitary_pseudogene","unprocessed_pseudogene","processed_pseudogene","IG_C_pseudogene","IG_J_pseudogene","IG_V_pseudogene","pseudogene"],'TR':['TR_C_gene','TR_D_gene','TR_J_gene','TR_V_gene'],'IG':['IG_C_gene','IG_D_gene','IG_J_gene','IG_V_gene'],\
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1013 'MT_RNA':["Mt_rRNA","Mt_tRNA"],\
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1014 'ncRNA':["lincRNA","macro_lncRNA","3prime_overlapping_ncrna","ncRNA"],\
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1015 "others":["misc_RNA","processed_transcript","ribozyme","scaRNA","sense_intronic","sense_overlapping","TEC","vaultRNA"],
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1016 "antisense":["antisense"]}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1017 for biot in ["miRNA","snoRNA","snRNA","rRNA","sRNA","tRNA"]:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1018 biotypes_group1[biot]=[biot]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1019
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1020 # # Initializing the unkown features lits
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1021 unknown_feature = []
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1022
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1023 # Initializing the genome category counter dict
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1024 cpt_genome = {}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1025
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1026
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1027 if process_counts :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1028 #### If input files are the categories counts, just load them and continue to recategorization step
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1029 cpt,cpt_genome,samples_names = read_counts_files(options.counts)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1030 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1031 #### Create genome index if needed and get the sizes of categories
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1032 if make_index :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1033 #### Get the chromosome lengths
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1034 lengths = get_chromosome_lengths(options)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1035 # Generating the genome index files if the user didn't provide them
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1036 create_genome_index(options.annotation, unstranded_genome_index, stranded_genome_index, cpt_genome, prios, biotypes, lengths)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1037
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1038
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1039 #print '\nChr lengths:', lengths
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1040
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1041 if intersect_reads:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1042 # If the indexes already exist, read them to compute the sizes of the categories in the genome and retrieve the chromosome lengths
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1043 if not make_index :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1044 print "\n### Reading genome indexes\n...\r",
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1045 sys.stdout.flush()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1046 lengths={}
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1047 with open(genome_index, 'r') as genome_index_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1048 for line in genome_index_file:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1049 if line[0] == "#":
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1050 lengths[line.split('\t')[0][1:]] = int(line.split('\t')[1])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1051 else :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1052 add_info(cpt_genome, line.rstrip().split('\t')[4:], line.split('\t')[1], line.split('\t')[2], biotype_prios = None, categ_prios = prios)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1053
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1054 #### Computing the genome intergenic count: sum of the chr lengths minus sum of the genome annotated intervals
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1055 cpt_genome[('intergenic','intergenic')] = sum(lengths.itervalues()) - sum([v for x,v in cpt_genome.iteritems() if x != ('antisense','antisense')])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1056 if not make_index :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1057 print "Done!"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1058 #print '\nGenome category counts:'
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1059 #for key,val in cpt_genome.iteritems():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1060 #print key,"\t",val
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1061
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1062
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1063 #### Create the Bedgraph files if needed and get the files list
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1064
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1065 if not options.bedgraph:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1066 # Generating the BEDGRAPH files is the user provided BAM file(s) and get the samples labels (this names will be used in the plot legend)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1067 samples_files, samples_names = create_bedgraph_files(options.input,options.strandness[0])
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1068 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1069 # Just initialize the files list with the bedgraph paths
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1070 samples_files = [options.input[i] for i in range(0,len(options.input),2)]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1071 # and get the labels
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1072 samples_names = [options.input[i] for i in range(1,len(options.input),2)]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1073 #### Retrieving chromosome names saved in index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1074 chrom_list = get_chromosome_names_in_index(genome_index)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1075 #### Processing the BEDGRAPH files: intersecting the bedgraph with the genome index and count the number of aligned positions in each category
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1076 cpt = intersect_bedgraphs_and_index_to_counts_categories(samples_files,samples_names,prios,genome_index, options.strandness[0], biotype_prios = None)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1077
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1078 #### Write the counts on disk
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1079 write_counts_in_files(cpt,cpt_genome)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1080
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1081 if not (intersect_reads or process_counts) or (options.quiet and options.pdf == False):
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1082 quit("\n### End of program")
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1083 print "\n### Generating plots"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1084 # Updating the biotypes lists (biotypes and 'biotype_group1'): adding the 'unknow biotypes' found in gtf/index
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1085 if unknown_feature == []: # 'unknown_feature' is define only during the index generation
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1086 # Browse the feature to determine whether some biotypes are 'unknown'
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1087 for sample,counts in cpt.items():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1088 for (cat,biot) in counts:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1089 if biot not in biotypes and cat not in unknown_feature:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1090 unknown_feature.append(biot)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1091 for new_biot in unknown_feature:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1092 biotypes.add(new_biot)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1093 biotypes_group1["others"].append(new_biot)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1094 biotypes = sorted(biotypes)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1095 # move antisense categ to the end of the list
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1096 biotypes.remove('antisense')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1097 biotypes.append('antisense')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1098 biotypes_group1 = sorted(biotypes_group1)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1099
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1100
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1101 #print '\nCounts for every category/biotype pair: ',cpt
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1102
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1103 # Generating plots
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1104 if options.pdf != False:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1105 if options.pdf == None:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1106 options.pdf = "categories_plots.pdf"
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1107 pdf = PdfPages(options.pdf)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1108 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1109 pdf = False
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1110
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1111 selected_biotype = None
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1112 if options.biotype_filter:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1113 options.biotype_filter = options.biotype_filter[0]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1114 for sample in cpt:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1115 for feature in cpt[sample]:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1116 biotype = feature[1]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1117 if options.biotype_filter.lower() == biotype.lower():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1118 selected_biotype=biotype
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1119 break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1120 if selected_biotype == None :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1121 print "\nError: biotype '"+options.biotype_filter+"' not found. Please check the biotype name and that this biotype exists in your sample(s)."
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1122 sys.exit()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1123
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1124 #Print a warning message if the UTRs are not specified as 5' or 3' (they will be ploted as 5'UTR)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1125 if 'UTR' in [categ[0] for counts in cpt.values() for categ in counts.keys()]:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1126 print '''\nWARNING: (some) 5'UTR/3'UTR are not precisely defined. Consequently, positions annotated as "UTR" will be counted as "5'UTR"\n'''
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1127
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1128 #### Make the plot by categories
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1129 #### Recategorizing with the final categories
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1130 final_cats=categs_groups[options.categories_depth-1]
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1131 final_cat_cpt,final_genome_cpt, filtered_cat_cpt = group_counts_by_categ(cpt,cpt_genome,final_cats,selected_biotype)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1132 #### Display the distribution of specified categories (or biotypes) in samples on a barplot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1133 # Remove the 'antisense' category if the library type is 'unstranded'
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1134 for dic in cpt.values():
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1135 if ('antisense','antisense') in dic.keys(): break
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1136 else:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1137 cat_list.remove('antisense')
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1138 make_plot(cat_list,samples_names,final_cat_cpt,final_genome_cpt,pdf, "categories",options.threshold, svg = options.svg, png = options.png)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1139 if selected_biotype :
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1140 make_plot(cat_list,samples_names,filtered_cat_cpt,final_genome_cpt,pdf, "categories",options.threshold,title="Categories distribution for '"+selected_biotype+"' biotype", svg = options.svg, png = options.png)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1141
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1142 #### Make the plot by biotypes
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1143 #### Recategorizing with the final categories
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1144 final_cat_cpt,final_genome_cpt = group_counts_by_biotype(cpt,cpt_genome,biotypes)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1145 #### Display the distribution of specified categories (or biotypes) in samples on a barplot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1146 make_plot(biotypes,samples_names,final_cat_cpt,final_genome_cpt,pdf, "biotypes",options.threshold, svg = options.svg, png = options.png)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1147
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1148
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1149
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1150 ##### Recategorizing with the final categories
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1151 #final_cat_cpt,final_genome_cpt = group_counts_by_biotype(cpt,cpt_genome,biotypes_group1)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1152 ##### Display the distribution of specified categories (or biotypes) in samples on a barplot
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1153 #make_plot(biotypes_group1,samples_names,final_cat_cpt,final_genome_cpt,pdf,"Biotype groups", options.threshold, title="Biotypes distribution in mapped reads \n(biotypes are grouped by 'family')", svg = options.svg, png = options.png)
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1154
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1155
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1156 if options.pdf:
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1157 pdf.close()
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1158 print "\n### Plots saved in pdf file: %s" %options.pdf
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1159
6f0be85be8fb Uploaded
charles-bernard
parents:
diff changeset
1160 print "\n### End of program"