| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 ''' | 
|  | 4 Author: Silvia Chang | 
|  | 5 Metagenomic Final Project | 
|  | 6 Fall 2014 | 
|  | 7 | 
|  | 8 Script to create fragment recruitment plots, after read-reference alignment using Bowtie (SAM output) and FR-HIT (fr-hit output) programs | 
|  | 9 Simplified/revised version of the fragrecruit project from https://github.com/mstrosaker/fragrecruit | 
|  | 10 | 
|  | 11 Usage: ./frp_tool.py [options] --input --output | 
|  | 12 | 
|  | 13 ''' | 
|  | 14 import matplotlib.pyplot as plt | 
|  | 15 import os, sys, getopt, imp, errno | 
|  | 16 # import shutil | 
|  | 17 from refseq_parser import SequenceFile | 
|  | 18 from SAM_parser import SAMFile | 
|  | 19 from FRHIT_parser import FRHITFile | 
|  | 20 | 
|  | 21 class FragmentPlotter: | 
|  | 22     '''Contains all functions to create fragment recruitment plots''' | 
|  | 23 | 
|  | 24     # Color list. Can only handle 5 samples. Add more colors for more samples. | 
|  | 25     colors = [ | 
|  | 26         (0.78, 0.59, 0.17, 1.0),   #dark gold | 
|  | 27         (0.99, 0.60, 0.17, 1.0),   #light orange | 
|  | 28         (0.60, 0.82, 0.47, 1.0),   #medium/light green | 
|  | 29         (0.95, 0.40, 0.40, 1.0),   #dark pink/fuschia | 
|  | 30         (0.17, 0.50, 0.69, 1.0),   #medium teal/blue | 
|  | 31         (0.85, 0.64, 0.60, 1.0),   #light plum | 
|  | 32     ] | 
|  | 33 | 
|  | 34     def __init__(self, microbiome, pt_color, ymin=50): | 
|  | 35         self.microbiome = microbiome | 
|  | 36         self.decorated = False | 
|  | 37         self.pt_color = pt_color | 
|  | 38         self.ymin = ymin | 
|  | 39 | 
|  | 40     def new_plot(self, ref_name, contigs, begin, length): | 
|  | 41         ''' Clears previous data and start a blank plot''' | 
|  | 42         # refgenome - string, organism name of the refgenome | 
|  | 43         # contigs - a list containing a [name,length] pair of the refgenome | 
|  | 44         # begin, length - integers indicate beginning/length of plot | 
|  | 45 | 
|  | 46         plt.close() | 
|  | 47         # Initialize variables that will contain all the fragments information | 
|  | 48         self.refgenome = ref_name | 
|  | 49         self.contigs = contigs | 
|  | 50         self.begin = begin | 
|  | 51         self.samples = [] | 
|  | 52         self.n_points = 0 | 
|  | 53         self.x_points = [] | 
|  | 54         self.y_points = [] | 
|  | 55         self.clr_points = [] | 
|  | 56         self.decorated = False | 
|  | 57 | 
|  | 58         self.full_length = 0 | 
|  | 59 | 
|  | 60         # Builds the full length of the reference genome, or partial based on begin and length variables | 
|  | 61         for contig in self.contigs: | 
|  | 62             self.full_length += contig[1] | 
|  | 63 | 
|  | 64         # Sets the range of the x axis | 
|  | 65         if length != 0: | 
|  | 66             self.xextent = begin + length | 
|  | 67         else: | 
|  | 68             self.xextent = self.full_length | 
|  | 69 | 
|  | 70     def decorate(self): | 
|  | 71         ''' Decorates the plot by adding axes, axes labels, title, tickmarks, etc.''' | 
|  | 72         if not self.decorated: | 
|  | 73             self.decorated = True | 
|  | 74 | 
|  | 75             # Sets the rc parameters, add x-axis tickmarks and labels them | 
|  | 76             plt.rc('xtick', labelsize=8) | 
|  | 77             ticks = [self.begin] # beginning tick | 
|  | 78             ticks.append(self.xextent/2) # mid tick | 
|  | 79             ticks.append(self.xextent) # last tick | 
|  | 80 | 
|  | 81             labels = [int(self.begin)] # beginning nucleotide | 
|  | 82             labels.append(str(self.xextent/2)) # mid nucleotide | 
|  | 83             labels.append(str(self.xextent)) # last nucleotide | 
|  | 84 | 
|  | 85             plt.xticks(ticks, labels) | 
|  | 86 | 
|  | 87             # Sets x and y axes boundaries and labels | 
|  | 88             plt.xlim(self.begin, self.xextent) | 
|  | 89             plt.ylim(self.ymin, 100) | 
|  | 90             plt.ylabel("% Identity") | 
|  | 91 | 
|  | 92             # Set plot titles | 
|  | 93             plt.title(self.refgenome) | 
|  | 94             plt.suptitle(self.microbiome) | 
|  | 95 | 
|  | 96     def draw(self, filename): | 
|  | 97         '''Creates a scatter plot of fragments after they're added in add_points() and save the plots in the specified file name''' | 
|  | 98 | 
|  | 99         # Sets points size and shape based on number of points recruited | 
|  | 100         if self.n_points > 10000: | 
|  | 101             pt_shape = 'o' | 
|  | 102             pt_size = 1 | 
|  | 103         else: | 
|  | 104             pt_shape ='s' | 
|  | 105             pt_size = 3 | 
|  | 106 | 
|  | 107         # Generate the scatter plot | 
|  | 108 #         plt.scatter(self.x_points, self.y_points, s=pt_size, c=self.clr_points, marker=pt_shape, alpha = 0.1, edgecolors='none') | 
|  | 109 | 
|  | 110         # Adds the legend on the plot. Currently, can only add one. Uncomment the plt.legend method. | 
|  | 111         plt.scatter(self.x_points, self.y_points, s=pt_size, c=self.clr_points, marker=pt_shape, alpha = 0.1, edgecolors='none', label=self.samples[0]) | 
|  | 112         plt.legend(loc='lower left', ncol=3, scatterpoints=1, markerscale=8, prop={'size':10}) | 
|  | 113 | 
|  | 114         # Adds titles, axes, etc | 
|  | 115         self.decorate() | 
|  | 116 | 
|  | 117         # Save the images | 
|  | 118         plt.savefig(filename, dpi=150, bbox_inches=0) | 
|  | 119 | 
|  | 120     def legend(self, filename, width=4): | 
|  | 121         '''Create a legend for the plot''' | 
|  | 122         plt.figure(figsize=(width, 0.45*len(self.samples))) | 
|  | 123         plt.axis('off') | 
|  | 124         plt.ylim(-1,0.1) | 
|  | 125 | 
|  | 126         i = 0 | 
|  | 127         ystep = 0 | 
|  | 128         for s in self.samples: | 
|  | 129             plt.text(0.00, -1*ystep-0.1, 'text', | 
|  | 130                        backgroundcolor=self.colors[i], color=self.colors[i]) | 
|  | 131             plt.text(0.27, -1*ystep-0.1, s) | 
|  | 132             i += 1 | 
|  | 133             ystep += 0.2 | 
|  | 134 | 
|  | 135         plt.title("Samples") | 
|  | 136         plt.savefig(filename, bbox_inches=0) | 
|  | 137 | 
|  | 138     def show(self): | 
|  | 139         self.decorate() | 
|  | 140         plt.show() | 
|  | 141 | 
|  | 142     def add_sample(self, name): | 
|  | 143         self.samples.append(name) | 
|  | 144 | 
|  | 145     def add_points(self, sample, ref_contig, coordinate, identity): | 
|  | 146         x= 0 | 
|  | 147 | 
|  | 148         # Builds the reference length | 
|  | 149         for c  in self.contigs: | 
|  | 150             if (ref_contig == c[0]): | 
|  | 151                 break | 
|  | 152             x += c[1] | 
|  | 153 | 
|  | 154         # Applies new color to different samples | 
|  | 155         clr = self.pt_color | 
|  | 156         for s in self.samples: | 
|  | 157             if(sample == s): | 
|  | 158                 break | 
|  | 159             clr += 1 | 
|  | 160 | 
|  | 161         if x + coordinate >= self.begin and x + coordinate <= self.xextent: | 
|  | 162             self.n_points += 1 | 
|  | 163             self.x_points.append(x + coordinate) | 
|  | 164             self.y_points.append(identity) | 
|  | 165             self.clr_points.append(self.colors[clr]) | 
|  | 166 | 
|  | 167 def usage(): | 
|  | 168     print ('''Usage: | 
|  | 169     %s [options] <aligned_input> <ref_genome> | 
|  | 170       Options | 
|  | 171         -h: display this help message | 
|  | 172         -i <int>: minimum identity to include in plot (y-axis extent) | 
|  | 173         -b <int>: begin the plot at the nth nucleotide in the ref genome | 
|  | 174         -l <int>: include n nucleotides in the x-axis of the plot | 
|  | 175            the default behavior is to plot the entirety of the genome | 
|  | 176         -t <str>: title of the plot (e.g. name of reference genome). Put in quotes. | 
|  | 177         -m <str>: Name of the microbiome. Put in quotes. | 
|  | 178         -c <int>: number indicating color for plot points. | 
|  | 179            ''' % sys.argv[0]) | 
|  | 180 | 
|  | 181 if __name__ == '__main__': | 
|  | 182     # Parse all command-line arguments | 
|  | 183     # variable opts contains a list of the key-value pair of (option, value); variable args contains a list of the configuration file | 
|  | 184     try: | 
|  | 185         opts, args = getopt.getopt(sys.argv[1:], 'hi:b:l:t:m:c:', ['help', 'identity=', 'begin=', 'length=', 'title=', 'microbiome=', 'color=']) | 
|  | 186     except getopt.GetoptError as err: | 
|  | 187         print(str(err)) | 
|  | 188         usage() | 
|  | 189         sys.exit(2) | 
|  | 190 | 
|  | 191     # Set some default values | 
|  | 192     identity = 50.0 | 
|  | 193     begin = 0 | 
|  | 194     length = 0 | 
|  | 195     title = '' | 
|  | 196     microbiome='' | 
|  | 197     color = 0 | 
|  | 198 | 
|  | 199     # Parse options and set user-defined values | 
|  | 200     for opt, value in opts: | 
|  | 201         if opt in ('-h', '--help'): | 
|  | 202             usage() | 
|  | 203             sys.exit(0) | 
|  | 204         elif opt in ('-i', '--identity'): | 
|  | 205             identity = float(value) | 
|  | 206         elif opt in ('-b', '--begin'): | 
|  | 207             begin = int(value) | 
|  | 208         elif opt in ('-l', '--length'): | 
|  | 209             length = int(length) | 
|  | 210         elif opt in ('-t', '--title'): | 
|  | 211             title = str(value) | 
|  | 212         elif opt in ('-m', '--microbiome'): | 
|  | 213             microbiome = str(value) | 
|  | 214         elif opt in ('-c', '--color'): | 
|  | 215             color = int(value) | 
|  | 216         else: | 
|  | 217             assert False, "invalid option" | 
|  | 218 | 
|  | 219     # Verify there are input files: input and ref_genome | 
|  | 220     if len(args) == 0: | 
|  | 221         print ('Need input files.') | 
|  | 222         usage() | 
|  | 223         sys.exit | 
|  | 224 | 
|  | 225     # Load and read the configuration file. Treats the file as module object | 
|  | 226 #     config = imp.load_source('*', args[0]) | 
|  | 227 | 
|  | 228     aligned_input= args[0] | 
|  | 229     ref_genome = args[1] | 
|  | 230 | 
|  | 231     # Instantiate plotting object | 
|  | 232     plotter = FragmentPlotter(microbiome, color, ymin=identity) | 
|  | 233 | 
|  | 234 | 
|  | 235     # Begin the plotting commands, starting each with the reference genome | 
|  | 236 #     for refgenome in ref_genome: | 
|  | 237     '''refgenome = [(name, taxonID, file_path), ..., ]''' | 
|  | 238     contigs = [] | 
|  | 239 | 
|  | 240     # Parse the reference genome's FASTA file. | 
|  | 241     fasta = open(ref_genome, 'r') | 
|  | 242     for refseq in SequenceFile(fasta): | 
|  | 243         contigs.append([refseq.name.split('|')[3], len(refseq.seq)]) | 
|  | 244 | 
|  | 245     fasta.close() | 
|  | 246 | 
|  | 247     # Sets the ref genome's accession # as the title if title is not set (i.e. empty) | 
|  | 248     if title == '': | 
|  | 249         title = contigs[0][0] | 
|  | 250 | 
|  | 251     # Initialize the new plot | 
|  | 252     plotter.new_plot(title, contigs, begin, length) | 
|  | 253 | 
|  | 254     count_samp = 0 | 
|  | 255 #     for sample in aligned_input: | 
|  | 256     '''sample = [(name, {taxonID:path}), ...,]''' | 
|  | 257 #     plotter.add_sample("test") | 
|  | 258     count_samp += 1 | 
|  | 259 | 
|  | 260     # Call the appropriate parser based on sample file extension | 
|  | 261 #             file = '%s%s' % (config.basepath, sample[1][refgenome[1]]) | 
|  | 262 | 
|  | 263 #     if aligned_input.endswith('.sam'): | 
|  | 264 #         parser_func = SAMFile | 
|  | 265 #     elif aligned_input.endswith('.fr-hit'): | 
|  | 266 #         parser_func = FRHITFile | 
|  | 267 #     else: | 
|  | 268 #         print('Invalid file type') | 
|  | 269 | 
|  | 270     parser_func = SAMFile | 
|  | 271 | 
|  | 272     sample_name ="" | 
|  | 273     for frag in parser_func(aligned_input): | 
|  | 274         if frag.identity >= identity: | 
|  | 275             sample_name = frag.name.split('.')[0] | 
|  | 276             plotter.add_points(frag.name, frag.refseq, frag.location, frag.identity) | 
|  | 277 | 
|  | 278     plotter.add_sample(sample_name) | 
|  | 279         # Put this inside the sample for-loop to create individual plot per sample | 
|  | 280     plotter.draw('%s.png' % ("frp_image")) | 
|  | 281 | 
|  | 282 | 
|  | 283 #     plotter.show() | 
|  | 284 | 
|  | 285         # Draw the points and save file with taxonID as base name. Put this outside the sample for-loop to create an all-samples plot. | 
|  | 286         # Put this inside the sample for-loop to create individual plot per sample | 
|  | 287 #         plotter.draw('%s/%s.png' % (config.shortname, refgenome[1])) | 
|  | 288 | 
|  | 289 #     plotter.legend('%s/legend.png' % (config.shortname)) | 
|  | 290 | 
|  | 291     # Make a tarball of the resulting PNGs | 
|  | 292 #     os.system('tar -cvf %s.tar %s >/dev/null' % | 
|  | 293 #               ( config.shortname, config.shortname)) | 
|  | 294 #     os.system('gzip %s.tar' % (config.shortname)) | 
|  | 295 #     shutil.rmtree(config.shortname) | 
|  | 296 | 
|  | 297 | 
|  | 298 | 
|  | 299 | 
|  | 300 |