comparison fsd.py @ 10:f7136e93604b draft

planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
author mheinzl
date Tue, 15 May 2018 13:49:34 -0400
parents 43b16247c35a
children b615c7e51f40
comparison
equal deleted inserted replaced
9:2c5fc522ded4 10:f7136e93604b
9 # The program produces a plot which shows the distribution of family sizes of the all SSCSs from the input files and 9 # The program produces a plot which shows the distribution of family sizes of the all SSCSs from the input files and
10 # a CSV file with the data of the plot, as well as a TXT file with all tags of the DCS and their family sizes. 10 # a CSV file with the data of the plot, as well as a TXT file with all tags of the DCS and their family sizes.
11 # If only one file is provided, then a family size distribution, which is separated after SSCSs without a partner and DCSs, is produced. 11 # If only one file is provided, then a family size distribution, which is separated after SSCSs without a partner and DCSs, is produced.
12 # Whereas a family size distribution with multiple data in one plot is produced, when more than one file (up to 4) is given. 12 # Whereas a family size distribution with multiple data in one plot is produced, when more than one file (up to 4) is given.
13 13
14 # USAGE: python FSD_Galaxy_1.4_commandLine_FINAL.py filename --inputFile2 filename2 --inputFile3 filename3 --inputFile4 filename4 / 14 # USAGE: python FSD_Galaxy_1.4_commandLine_FINAL.py --inputFile1 filename --inputName1 filename --inputFile2 filename2 --inputName2 filename2 --inputFile3 filename3 --inputName3 filename3 --inputFile4 filename4 --inputName4 filename4 --sep "characterWhichSeparatesCSVFile" --output_csv outptufile_name_csv --output_pdf outptufile_name_pdf
15 # --title_file outputFileName --sep "characterWhichSeparatesCSVFile"
16 15
17 import numpy 16 import numpy
18 import matplotlib.pyplot as plt 17 import matplotlib.pyplot as plt
19 from matplotlib.backends.backend_pdf import PdfPages 18 from matplotlib.backends.backend_pdf import PdfPages
20 import argparse 19 import argparse
53 parser = make_argparser() 52 parser = make_argparser()
54 args=parser.parse_args(argv[1:]) 53 args=parser.parse_args(argv[1:])
55 54
56 firstFile = args.inputFile1 55 firstFile = args.inputFile1
57 name1 = args.inputName1 56 name1 = args.inputName1
58 #firstFile = args.inputName1
59 57
60 secondFile = args.inputFile2 58 secondFile = args.inputFile2
61 name2 = args.inputName2 59 name2 = args.inputName2
62 thirdFile = args.inputFile3 60 thirdFile = args.inputFile3
63 name3 = args.inputName3 61 name3 = args.inputName3
73 exit(4) 71 exit(4)
74 72
75 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format 73 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format
76 plt.rcParams['patch.edgecolor'] = "black" 74 plt.rcParams['patch.edgecolor'] = "black"
77 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color 75 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color
78 plt.rcParams['xtick.labelsize'] = 12 76 plt.rcParams['xtick.labelsize'] = 14
79 plt.rcParams['ytick.labelsize'] = 12 77 plt.rcParams['ytick.labelsize'] = 14
80 78
81 list_to_plot = [] 79 list_to_plot = []
82 label = [] 80 label = []
83 data_array_list = [] 81 data_array_list = []
84 82
85 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf: 83 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf:
86 fig = plt.figure() 84 fig = plt.figure()
87 plt.subplots_adjust(bottom=0.25) 85 plt.subplots_adjust(bottom=0.25)
88 if firstFile != str(None): 86 if firstFile != str(None):
89 print(firstFile)
90
91 file1 = readFileReferenceFree(firstFile) 87 file1 = readFileReferenceFree(firstFile)
92 integers = numpy.array(file1[:, 0]).astype(int) ## keep original family sizes 88 integers = numpy.array(file1[:, 0]).astype(int) ## keep original family sizes
93 89
94 # for plot: replace all big family sizes by 22 90 # for plot: replace all big family sizes by 22
95 data1 = numpy.array(file1[:, 0]).astype(int) 91 data1 = numpy.array(file1[:, 0]).astype(int)
215 ticks1 = map(str, ticks) 211 ticks1 = map(str, ticks)
216 ticks1[len(ticks1) - 1] = ">20" 212 ticks1[len(ticks1) - 1] = ">20"
217 plt.xticks(numpy.array(ticks), ticks1) 213 plt.xticks(numpy.array(ticks), ticks1)
218 214
219 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1)) 215 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1))
220 plt.title("Family Size Distribution", fontsize=14) 216 # plt.title("Family Size Distribution", fontsize=14)
221 plt.xlabel("No. of Family Members", fontsize=14) 217 plt.xlabel("Family size", fontsize=14)
222 plt.ylabel("Absolute Frequency", fontsize=14) 218 plt.ylabel("Absolute Frequency", fontsize=14)
223 plt.margins(0.01, None) 219 plt.margins(0.01, None)
224 plt.grid(b=True, which="major", color="#424242", linestyle=":") 220 plt.grid(b=True, which="major", color="#424242", linestyle=":")
225 pdf.savefig(fig) 221 pdf.savefig(fig)
226 plt.close() 222 plt.close()
323 plt.xticks(numpy.array(ticks), ticks1) 319 plt.xticks(numpy.array(ticks), ticks1)
324 singl = counts[0][2][0] # singletons 320 singl = counts[0][2][0] # singletons
325 last = counts[0][2][len(counts[0][0]) - 1] # large families 321 last = counts[0][2][len(counts[0][0]) - 1] # large families
326 322
327 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) 323 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True)
328 plt.title(name1, fontsize=14) 324 # plt.title(name1, fontsize=14)
329 plt.xlabel("No. of Family Members", fontsize=14) 325 plt.xlabel("Family size", fontsize=14)
330 plt.ylabel("Absolute Frequency", fontsize=14) 326 plt.ylabel("Absolute Frequency", fontsize=14)
331 plt.margins(0.01, None) 327 plt.margins(0.01, None)
332 plt.grid(b=True, which="major", color="#424242", linestyle=":") 328 plt.grid(b=True, which="major", color="#424242", linestyle=":")
333 329
334 ## extra information beneath the plot 330 ## extra information beneath the plot