# HG changeset patch # User schang # Date 1417567174 18000 # Node ID cbdcc962173097b13bec76e2cdb74027b01da31f # Parent e223b827a13ef05c7798a27c3882117854d7b8ad Deleted selected files diff -r e223b827a13e -r cbdcc9621730 frp_tool.py --- a/frp_tool.py Tue Dec 02 19:38:14 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,300 +0,0 @@ -#!/usr/bin/env python - -''' -Author: Silvia Chang -Metagenomic Final Project -Fall 2014 - -Script to create fragment recruitment plots, after read-reference alignment using Bowtie (SAM output) and FR-HIT (fr-hit output) programs -Simplified/revised version of the fragrecruit project from https://github.com/mstrosaker/fragrecruit - -Usage: ./frp_tool.py [options] --input --output - -''' -import matplotlib.pyplot as plt -import os, sys, getopt, imp, errno -# import shutil -from refseq_parser import SequenceFile -from SAM_parser import SAMFile -from FRHIT_parser import FRHITFile - -class FragmentPlotter: - '''Contains all functions to create fragment recruitment plots''' - - # Color list. Can only handle 5 samples. Add more colors for more samples. - colors = [ - (0.78, 0.59, 0.17, 1.0), #dark gold - (0.99, 0.60, 0.17, 1.0), #light orange - (0.60, 0.82, 0.47, 1.0), #medium/light green - (0.95, 0.40, 0.40, 1.0), #dark pink/fuschia - (0.17, 0.50, 0.69, 1.0), #medium teal/blue - (0.85, 0.64, 0.60, 1.0), #light plum - ] - - def __init__(self, microbiome, pt_color, ymin=50): - self.microbiome = microbiome - self.decorated = False - self.pt_color = pt_color - self.ymin = ymin - - def new_plot(self, ref_name, contigs, begin, length): - ''' Clears previous data and start a blank plot''' - # refgenome - string, organism name of the refgenome - # contigs - a list containing a [name,length] pair of the refgenome - # begin, length - integers indicate beginning/length of plot - - plt.close() - # Initialize variables that will contain all the fragments information - self.refgenome = ref_name - self.contigs = contigs - self.begin = begin - self.samples = [] - self.n_points = 0 - self.x_points = [] - self.y_points = [] - self.clr_points = [] - self.decorated = False - - self.full_length = 0 - - # Builds the full length of the reference genome, or partial based on begin and length variables - for contig in self.contigs: - self.full_length += contig[1] - - # Sets the range of the x axis - if length != 0: - self.xextent = begin + length - else: - self.xextent = self.full_length - - def decorate(self): - ''' Decorates the plot by adding axes, axes labels, title, tickmarks, etc.''' - if not self.decorated: - self.decorated = True - - # Sets the rc parameters, add x-axis tickmarks and labels them - plt.rc('xtick', labelsize=8) - ticks = [self.begin] # beginning tick - ticks.append(self.xextent/2) # mid tick - ticks.append(self.xextent) # last tick - - labels = [int(self.begin)] # beginning nucleotide - labels.append(str(self.xextent/2)) # mid nucleotide - labels.append(str(self.xextent)) # last nucleotide - - plt.xticks(ticks, labels) - - # Sets x and y axes boundaries and labels - plt.xlim(self.begin, self.xextent) - plt.ylim(self.ymin, 100) - plt.ylabel("% Identity") - - # Set plot titles - plt.title(self.refgenome) - plt.suptitle(self.microbiome) - - def draw(self, filename): - '''Creates a scatter plot of fragments after they're added in add_points() and save the plots in the specified file name''' - - # Sets points size and shape based on number of points recruited - if self.n_points > 10000: - pt_shape = 'o' - pt_size = 1 - else: - pt_shape ='s' - pt_size = 3 - - # Generate the scatter plot -# plt.scatter(self.x_points, self.y_points, s=pt_size, c=self.clr_points, marker=pt_shape, alpha = 0.1, edgecolors='none') - - # Adds the legend on the plot. Currently, can only add one. Uncomment the plt.legend method. - plt.scatter(self.x_points, self.y_points, s=pt_size, c=self.clr_points, marker=pt_shape, alpha = 0.1, edgecolors='none', label=self.samples[0]) - plt.legend(loc='lower left', ncol=3, scatterpoints=1, markerscale=8, prop={'size':10}) - - # Adds titles, axes, etc - self.decorate() - - # Save the images - plt.savefig(filename, dpi=150, bbox_inches=0) - - def legend(self, filename, width=4): - '''Create a legend for the plot''' - plt.figure(figsize=(width, 0.45*len(self.samples))) - plt.axis('off') - plt.ylim(-1,0.1) - - i = 0 - ystep = 0 - for s in self.samples: - plt.text(0.00, -1*ystep-0.1, 'text', - backgroundcolor=self.colors[i], color=self.colors[i]) - plt.text(0.27, -1*ystep-0.1, s) - i += 1 - ystep += 0.2 - - plt.title("Samples") - plt.savefig(filename, bbox_inches=0) - - def show(self): - self.decorate() - plt.show() - - def add_sample(self, name): - self.samples.append(name) - - def add_points(self, sample, ref_contig, coordinate, identity): - x= 0 - - # Builds the reference length - for c in self.contigs: - if (ref_contig == c[0]): - break - x += c[1] - - # Applies new color to different samples - clr = self.pt_color - for s in self.samples: - if(sample == s): - break - clr += 1 - - if x + coordinate >= self.begin and x + coordinate <= self.xextent: - self.n_points += 1 - self.x_points.append(x + coordinate) - self.y_points.append(identity) - self.clr_points.append(self.colors[clr]) - -def usage(): - print ('''Usage: - %s [options] - Options - -h: display this help message - -i : minimum identity to include in plot (y-axis extent) - -b : begin the plot at the nth nucleotide in the ref genome - -l : include n nucleotides in the x-axis of the plot - the default behavior is to plot the entirety of the genome - -t : title of the plot (e.g. name of reference genome). Put in quotes. - -m : Name of the microbiome. Put in quotes. - -c : number indicating color for plot points. - ''' % sys.argv[0]) - -if __name__ == '__main__': - # Parse all command-line arguments - # variable opts contains a list of the key-value pair of (option, value); variable args contains a list of the configuration file - try: - opts, args = getopt.getopt(sys.argv[1:], 'hi:b:l:t:m:c:', ['help', 'identity=', 'begin=', 'length=', 'title=', 'microbiome=', 'color=']) - except getopt.GetoptError as err: - print(str(err)) - usage() - sys.exit(2) - - # Set some default values - identity = 50.0 - begin = 0 - length = 0 - title = '' - microbiome='' - color = 0 - - # Parse options and set user-defined values - for opt, value in opts: - if opt in ('-h', '--help'): - usage() - sys.exit(0) - elif opt in ('-i', '--identity'): - identity = float(value) - elif opt in ('-b', '--begin'): - begin = int(value) - elif opt in ('-l', '--length'): - length = int(length) - elif opt in ('-t', '--title'): - title = str(value) - elif opt in ('-m', '--microbiome'): - microbiome = str(value) - elif opt in ('-c', '--color'): - color = int(value) - else: - assert False, "invalid option" - - # Verify there are input files: input and ref_genome - if len(args) == 0: - print ('Need input files.') - usage() - sys.exit - - # Load and read the configuration file. Treats the file as module object -# config = imp.load_source('*', args[0]) - - aligned_input= args[0] - ref_genome = args[1] - - # Instantiate plotting object - plotter = FragmentPlotter(microbiome, color, ymin=identity) - - - # Begin the plotting commands, starting each with the reference genome -# for refgenome in ref_genome: - '''refgenome = [(name, taxonID, file_path), ..., ]''' - contigs = [] - - # Parse the reference genome's FASTA file. - fasta = open(ref_genome, 'r') - for refseq in SequenceFile(fasta): - contigs.append([refseq.name.split('|')[3], len(refseq.seq)]) - - fasta.close() - - # Sets the ref genome's accession # as the title if title is not set (i.e. empty) - if title == '': - title = contigs[0][0] - - # Initialize the new plot - plotter.new_plot(title, contigs, begin, length) - - count_samp = 0 -# for sample in aligned_input: - '''sample = [(name, {taxonID:path}), ...,]''' -# plotter.add_sample("test") - count_samp += 1 - - # Call the appropriate parser based on sample file extension -# file = '%s%s' % (config.basepath, sample[1][refgenome[1]]) - -# if aligned_input.endswith('.sam'): -# parser_func = SAMFile -# elif aligned_input.endswith('.fr-hit'): -# parser_func = FRHITFile -# else: -# print('Invalid file type') - - parser_func = SAMFile - - sample_name ="" - for frag in parser_func(aligned_input): - if frag.identity >= identity: - sample_name = frag.name.split('.')[0] - plotter.add_points(frag.name, frag.refseq, frag.location, frag.identity) - - plotter.add_sample(sample_name) - # Put this inside the sample for-loop to create individual plot per sample - plotter.draw('%s.png' % ("frp_image")) - - -# plotter.show() - - # Draw the points and save file with taxonID as base name. Put this outside the sample for-loop to create an all-samples plot. - # Put this inside the sample for-loop to create individual plot per sample -# plotter.draw('%s/%s.png' % (config.shortname, refgenome[1])) - -# plotter.legend('%s/legend.png' % (config.shortname)) - - # Make a tarball of the resulting PNGs -# os.system('tar -cvf %s.tar %s >/dev/null' % -# ( config.shortname, config.shortname)) -# os.system('gzip %s.tar' % (config.shortname)) -# shutil.rmtree(config.shortname) - - - - - \ No newline at end of file