annotate fsd_regions.py @ 4:b202c97deabe draft

planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
author mheinzl
date Mon, 08 Oct 2018 05:53:50 -0400
parents 2631864873d7
children 52454637bc45
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
1 #!/usr/bin/env python
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
2
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
3 # Family size distribution of tags which were aligned to the reference genome
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
4 #
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
6 # Contact: monika.heinzl@edumail.at
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
7 #
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
9 # and a TXT with tags of reads that overlap the regions of the reference genome as input.
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
10 # The program produces a plot which shows the distribution of family sizes of the tags from the input files and
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
11 # a tabular file with the data of the plot.
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
12
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
13 # USAGE: python FSD_regions_1.6_FINAL.py --inputFile filenameSSCS --inputName1 filenameSSCS --ref_genome filenameRefGenome --output_tabular outptufile_name_tabular --output_pdf outptufile_name_pdf
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
14
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
15 import argparse
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
16 import sys
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
17
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
18 import matplotlib.pyplot as plt
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
19 import numpy
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
20 from matplotlib.backends.backend_pdf import PdfPages
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
21
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
22 plt.switch_backend('agg')
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
23
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
24
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
25 def readFileReferenceFree(file, delim):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
26 with open(file, 'r') as dest_f:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
27 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter=delim, comments='#', dtype='string')
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
28 return(data_array)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
29
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
30
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
31 def make_argparser():
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
32 parser = argparse.ArgumentParser(description='Family Size Distribution of tags which were aligned to regions of the reference genome')
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
33 parser.add_argument('--inputFile', help='Tabular File with three columns: ab or ba, tag and family size.')
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
34 parser.add_argument('--inputName1')
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
35 parser.add_argument('--ref_genome', help='TXT File with tags of reads that overlap the region.')
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
36 parser.add_argument('--output_pdf', default="data.pdf", type=str, help='Name of the pdf and tabular file.')
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
37 parser.add_argument('--output_tabular', default="data.tabular", type=str, help='Name of the pdf and tabular file.')
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
38 return parser
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
39
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
40
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
41 def compare_read_families_refGenome(argv):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
42 parser = make_argparser()
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
43 args = parser.parse_args(argv[1:])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
44
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
45 firstFile = args.inputFile
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
46 name1 = args.inputName1
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
47 name1 = name1.split(".tabular")[0]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
48 refGenome = args.ref_genome
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
49 title_file = args.output_pdf
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
50 title_file2 = args.output_tabular
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
51 sep = "\t"
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
52
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
53 with open(title_file2, "w") as output_file, PdfPages(title_file) as pdf:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
54 data_array = readFileReferenceFree(firstFile, "\t")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
55
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
56 mut_array = readFileReferenceFree(refGenome, " ")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
57 length_regions = len(mut_array)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
58
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
59 seq = numpy.array(data_array[:, 1])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
60 tags = numpy.array(data_array[:, 2])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
61 quant = numpy.array(data_array[:, 0]).astype(int)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
62 group = numpy.array(mut_array[:, 0])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
63
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
64 all_ab = seq[numpy.where(tags == "ab")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
65 all_ba = seq[numpy.where(tags == "ba")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
66 quant_ab = quant[numpy.where(tags == "ab")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
67 quant_ba = quant[numpy.where(tags == "ba")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
68
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
69 seqDic_ab = dict(zip(all_ab, quant_ab))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
70 seqDic_ba = dict(zip(all_ba, quant_ba))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
71
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
72 seq_mut = numpy.array(mut_array[:, 1])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
73
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
74 groupUnique, group_index = numpy.unique(group, return_index=True)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
75 groupUnique = groupUnique[numpy.argsort(group_index)]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
76
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
77 lst_ab = []
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
78 for i in seq_mut:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
79 lst_ab.append(seqDic_ab.get(i))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
80
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
81 lst_ba = []
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
82 for i in seq_mut:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
83 lst_ba.append(seqDic_ba.get(i))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
84
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
85 quant_ab = numpy.array(lst_ab)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
86 quant_ba = numpy.array(lst_ba)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
87
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
88 quantAfterRegion = []
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
89 for i in groupUnique:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
90 dataAB = quant_ab[numpy.where(group == i)[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
91 dataBA = quant_ba[numpy.where(group == i)[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
92 bigFamilies = numpy.where(dataAB > 20)[0]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
93 dataAB[bigFamilies] = 22
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
94 bigFamilies = numpy.where(dataBA > 20)[0]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
95 dataBA[bigFamilies] = 22
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
96
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
97 quantAll = numpy.concatenate((dataAB, dataBA))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
98 quantAfterRegion.append(quantAll)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
99
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
100 maximumX = numpy.amax(numpy.concatenate(quantAfterRegion))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
101 minimumX = numpy.amin(numpy.concatenate(quantAfterRegion))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
102
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
103 # PLOT
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
104 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
105 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color
1
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
106 plt.rcParams['xtick.labelsize'] = 14
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
107 plt.rcParams['ytick.labelsize'] = 14
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
108 plt.rcParams['patch.edgecolor'] = "black"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
109 fig = plt.figure()
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
110 plt.subplots_adjust(bottom=0.3)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
111
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
112 colors = ["#6E6E6E", "#0431B4", "#5FB404", "#B40431", "#F4FA58", "#DF7401", "#81DAF5"]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
113
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
114 col = []
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
115 for i in range(0, len(groupUnique)):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
116 col.append(colors[i])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
117
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
118 counts = plt.hist(quantAfterRegion, bins=range(minimumX, maximumX + 1), stacked=False, label=groupUnique,
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
119 align="left", alpha=1, color=col, edgecolor="black", linewidth=1)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
120 ticks = numpy.arange(minimumX - 1, maximumX, 1)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
121
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
122 ticks1 = map(str, ticks)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
123 ticks1[len(ticks1) - 1] = ">20"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
124 plt.xticks(numpy.array(ticks), ticks1)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
125 count = numpy.bincount(map(int, quant_ab)) # original counts
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
126
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
127 legend = "max. family size =\nabsolute frequency=\nrelative frequency=\n\ntotal nr. of reads="
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
128 plt.text(0.15, 0.105, legend, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
129
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
130 legend = "AB\n{}\n{}\n{:.5f}\n\n{:,}" \
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
131 .format(max(map(int, quant_ab)), count[len(count) - 1], float(count[len(count) - 1]) / sum(count),
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
132 sum(numpy.array(data_array[:, 0]).astype(int)))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
133 plt.text(0.35, 0.105, legend, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
134
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
135 count2 = numpy.bincount(map(int, quant_ba)) # original counts
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
136
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
137 legend = "BA\n{}\n{}\n{:.5f}" \
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
138 .format(max(map(int, quant_ba)), count2[len(count2) - 1], float(count2[len(count2) - 1]) / sum(count2))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
139 plt.text(0.45, 0.15, legend, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
140
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
141 legend1 = "total nr. of tags="
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
142 legend2 = "total numbers * \n{:,}".format(length_regions)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
143 plt.text(0.6, 0.2, legend1, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
144 plt.text(0.75, 0.2, legend2, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
145 legend4 = "* In the plot, both family sizes of the ab and ba strands were used.\nWhereas the total numbers indicate only the single count of the tags per region.\n"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
146 plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
147
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
148 space = numpy.arange(0, len(groupUnique), 0.02)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
149 for i, s, count in zip(groupUnique, space, quantAfterRegion):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
150 plt.text(0.6, 0.05 + s, "{}=\n".format(i), size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
151 plt.text(0.75, 0.05 + s, "{:,}\n".format(len(count) / 2), size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
152
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
153 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True)
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
154 # plt.title(name1, fontsize=14)
1
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
155 plt.xlabel("Family size", fontsize=14)
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
156 plt.ylabel("Absolute Frequency", fontsize=14)
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
157 plt.grid(b=True, which="major", color="#424242", linestyle=":")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
158 plt.margins(0.01, None)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
159
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
160 # plt.savefig("{}_regions.pdf".format(title_file), bbox_inch="tight")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
161 pdf.savefig(fig, bbox_inch="tight")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
162 plt.close()
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
163
1
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
164 output_file.write("Dataset:{}{}\n".format(sep, name1))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
165 output_file.write("{}AB{}BA\n".format(sep, sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
166 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(map(int, quant_ab)), sep, max(map(int, quant_ba))))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
167 output_file.write("absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1]))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
168 output_file.write("relative frequency:{}{:.3f}{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count), sep, float(count2[len(count2) - 1]) / sum(count2)))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
169 output_file.write("total nr. of reads{}{}\n".format(sep, sum(numpy.array(data_array[:, 0]).astype(int))))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
170 output_file.write("\n\nValues from family size distribution\n")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
171 output_file.write("{}".format(sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
172 for i in groupUnique:
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
173 output_file.write("{}{}".format(i, sep))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
174 output_file.write("\n")
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
175 j = 0
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
176 for fs in counts[1][0:len(counts[1]) - 1]:
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
177 if fs == 21:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
178 fs = ">20"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
179 else:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
180 fs = "={}".format(fs)
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
181 output_file.write("FS{}{}".format(fs, sep))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
182 for n in range(len(groupUnique)):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
183 output_file.write("{}{}".format(int(counts[0][n][j]), sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
184 output_file.write("\n")
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
185 j += 1
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
186 output_file.write("sum{}".format(sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
187 for i in counts[0]:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
188 output_file.write("{}{}".format(int(sum(i)), sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
189 output_file.write("\n")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
190 output_file.write("\n\nIn the plot, both family sizes of the ab and ba strands were used.\nWhereas the total numbers indicate only the single count of the tags per region.\n")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
191 output_file.write("Region{}total nr. of tags per region\n".format(sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
192 for i, count in zip(groupUnique, quantAfterRegion):
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
193 output_file.write("{}{}{}\n".format(i, sep, len(count) / 2))
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
194 output_file.write("sum of tags{}{}\n".format(sep, length_regions))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
195
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
196 print("Files successfully created!")
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
197 # print("Files saved under {}.pdf and {}.csv in {}!".format(title_file, title_file, os.getcwd()))
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
198
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
199
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
200 if __name__ == '__main__':
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
201 sys.exit(compare_read_families_refGenome(sys.argv))