annotate fsd_regions.py @ 5:52454637bc45 draft

planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
author mheinzl
date Wed, 17 Oct 2018 05:23:33 -0400
parents b202c97deabe
children 26014c24323a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
1 #!/usr/bin/env python
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
2
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
3 # Family size distribution of tags which were aligned to the reference genome
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
4 #
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
6 # Contact: monika.heinzl@edumail.at
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
7 #
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
9 # and a TXT with tags of reads that overlap the regions of the reference genome as input.
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
10 # The program produces a plot which shows the distribution of family sizes of the tags from the input files and
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
11 # a tabular file with the data of the plot.
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
12
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
13 # USAGE: python FSD_regions_1.6_FINAL.py --inputFile filenameSSCS --inputName1 filenameSSCS --ref_genome filenameRefGenome --output_tabular outptufile_name_tabular --output_pdf outptufile_name_pdf
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
14
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
15 import argparse
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
16 import re
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
17 import sys
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
18 from collections import OrderedDict
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
19
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
20 import matplotlib.pyplot as plt
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
21 import numpy
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
22 from matplotlib.backends.backend_pdf import PdfPages
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
23
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
24 plt.switch_backend('agg')
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
25
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
26
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
27 def readFileReferenceFree(file, delim):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
28 with open(file, 'r') as dest_f:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
29 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter=delim, comments='#', dtype='string')
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
30 return(data_array)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
31
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
32
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
33 def make_argparser():
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
34 parser = argparse.ArgumentParser(description='Family Size Distribution of tags which were aligned to regions of the reference genome')
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
35 parser.add_argument('--inputFile', help='Tabular File with three columns: ab or ba, tag and family size.')
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
36 parser.add_argument('--inputName1')
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
37 parser.add_argument('--ref_genome', help='TXT File with tags of reads that overlap the region.')
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
38 parser.add_argument('--output_pdf', default="data.pdf", type=str, help='Name of the pdf and tabular file.')
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
39 parser.add_argument('--output_tabular', default="data.tabular", type=str, help='Name of the pdf and tabular file.')
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
40 return parser
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
41
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
42
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
43 def compare_read_families_refGenome(argv):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
44 parser = make_argparser()
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
45 args = parser.parse_args(argv[1:])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
46
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
47 firstFile = args.inputFile
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
48 name1 = args.inputName1
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
49 name1 = name1.split(".tabular")[0]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
50 refGenome = args.ref_genome
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
51 title_file = args.output_pdf
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
52 title_file2 = args.output_tabular
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
53 sep = "\t"
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
54
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
55 with open(title_file2, "w") as output_file, PdfPages(title_file) as pdf:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
56 data_array = readFileReferenceFree(firstFile, "\t")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
57
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
58 mut_array = readFileReferenceFree(refGenome, " ")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
59 length_regions = len(mut_array)
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
60 group = numpy.array(mut_array[:, 0])
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
61 seq_mut = numpy.array(mut_array[:, 1])
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
62 alt_group = numpy.array(mut_array[:, 2])
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
63
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
64 seq = numpy.array(data_array[:, 1])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
65 tags = numpy.array(data_array[:, 2])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
66 quant = numpy.array(data_array[:, 0]).astype(int)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
67
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
68 all_ab = seq[numpy.where(tags == "ab")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
69 all_ba = seq[numpy.where(tags == "ba")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
70 quant_ab = quant[numpy.where(tags == "ab")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
71 quant_ba = quant[numpy.where(tags == "ba")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
72
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
73 seqDic_ab = dict(zip(all_ab, quant_ab))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
74 seqDic_ba = dict(zip(all_ba, quant_ba))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
75
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
76 if re.search('^(\d)+_(\d)+', str(mut_array[0,0])) is None:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
77 seq_mut, seqMut_index = numpy.unique(numpy.array(mut_array[:, 1]), return_index=True)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
78 group = mut_array[seqMut_index,0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
79 alt_group = mut_array[seqMut_index,2]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
80 mut_array = mut_array[seqMut_index,:]
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
81
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
82 groupUnique, group_index = numpy.unique(group, return_index=True)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
83 groupUnique = groupUnique[numpy.argsort(group_index)]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
84 lst_ab = []
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
85 lst_ba = []
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
86 for i in seq_mut:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
87 lst_ab.append(seqDic_ab.get(i))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
88 lst_ba.append(seqDic_ba.get(i))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
89
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
90 quant_ab = numpy.array(lst_ab)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
91 quant_ba = numpy.array(lst_ba)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
92
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
93 quantAfterRegion = OrderedDict()
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
94 for key in groupUnique:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
95 quantAfterRegion[key] = []
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
96
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
97 for i in groupUnique:
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
98 index_of_current_region = numpy.where(group == i)[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
99 quant_ba_i = quant_ba[index_of_current_region]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
100 alt_group_i = alt_group[index_of_current_region]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
101 index_alternative_refs = numpy.where(alt_group_i != "=")[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
102
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
103 dataAB = quant_ab[index_of_current_region]
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
104 bigFamilies = numpy.where(dataAB > 20)[0]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
105 dataAB[bigFamilies] = 22
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
106 for el in dataAB:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
107 quantAfterRegion[i].append(el)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
108
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
109 if len(index_alternative_refs) == 0:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
110 dataBA = quant_ba_i
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
111 bigFamilies = numpy.where(dataBA > 20)[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
112 dataBA[bigFamilies] = 22
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
113 for el2 in dataBA:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
114 quantAfterRegion[i].append(el2)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
115 else: # get tags where 2nd mate is aligned to a different ref genome
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
116 unique_alt = numpy.unique(alt_group_i[index_alternative_refs])
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
117 for alt in unique_alt:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
118 ind_alt_tags = numpy.where(alt_group_i == alt)[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
119 dataBA = quant_ba_i[ind_alt_tags]
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
120
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
121 bigFamilies = numpy.where(dataBA > 20)[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
122 if len(bigFamilies) != 0:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
123 if len(bigFamilies) == 1 and type(dataBA) != list:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
124 dataBA = 22
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
125 quantAfterRegion[alt].append(dataBA)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
126 else:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
127 dataBA[bigFamilies] = 22
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
128 for el3 in dataBA:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
129 quantAfterRegion[alt].append(el3)
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
130
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
131 index_inverse = [x for x in range(0, len(index_of_current_region)) if x not in index_alternative_refs]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
132 data_BA_other = quant_ba_i[index_inverse]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
133 bigFamilies_other = numpy.where(data_BA_other > 20)[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
134
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
135 if len(bigFamilies_other) != 0:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
136 if len(bigFamilies_other) == 1 and type(data_BA_other) != list:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
137 data_BA_other = 22
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
138 quantAfterRegion[i].append(data_BA_other)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
139 else:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
140 data_BA_other[bigFamilies_other] = 22
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
141 for el3 in data_BA_other:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
142 quantAfterRegion[i].append(el3)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
143
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
144 quantAfterRegion = quantAfterRegion.values()
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
145 maximumX = numpy.amax(numpy.concatenate(quantAfterRegion))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
146 minimumX = numpy.amin(numpy.concatenate(quantAfterRegion))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
147
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
148 # PLOT
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
149 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
150 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color
1
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
151 plt.rcParams['xtick.labelsize'] = 14
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
152 plt.rcParams['ytick.labelsize'] = 14
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
153 plt.rcParams['patch.edgecolor'] = "black"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
154 fig = plt.figure()
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
155 plt.subplots_adjust(bottom=0.3)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
156
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
157 colors = ["#6E6E6E", "#0431B4", "#5FB404", "#B40431", "#F4FA58", "#DF7401", "#81DAF5"]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
158
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
159 col = []
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
160 for i in range(0, len(groupUnique)):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
161 col.append(colors[i])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
162
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
163 counts = plt.hist(quantAfterRegion, bins=range(minimumX, maximumX + 1), stacked=False, label=groupUnique,
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
164 align="left", alpha=1, color=col, edgecolor="black", linewidth=1)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
165 ticks = numpy.arange(minimumX - 1, maximumX, 1)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
166
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
167 ticks1 = map(str, ticks)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
168 ticks1[len(ticks1) - 1] = ">20"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
169 plt.xticks(numpy.array(ticks), ticks1)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
170 count = numpy.bincount(map(int, quant_ab)) # original counts
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
171
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
172 legend = "max. family size =\nabsolute frequency=\nrelative frequency=\n\ntotal nr. of reads="
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
173 plt.text(0.15, 0.105, legend, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
174
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
175 legend = "AB\n{}\n{}\n{:.5f}\n\n{:,}" \
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
176 .format(max(map(int, quant_ab)), count[len(count) - 1], float(count[len(count) - 1]) / sum(count),
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
177 sum(numpy.array(data_array[:, 0]).astype(int)))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
178 plt.text(0.35, 0.105, legend, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
179
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
180 count2 = numpy.bincount(map(int, quant_ba)) # original counts
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
181
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
182 legend = "BA\n{}\n{}\n{:.5f}" \
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
183 .format(max(map(int, quant_ba)), count2[len(count2) - 1], float(count2[len(count2) - 1]) / sum(count2))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
184 plt.text(0.45, 0.15, legend, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
185
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
186 plt.text(0.55, 0.22, "total nr. of tags=", size=11, transform=plt.gcf().transFigure)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
187 plt.text(0.7, 0.22, "{:,}".format(length_regions), size=11, transform=plt.gcf().transFigure)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
188
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
189 legend4 = '* The total numbers indicate the count of the ab and ba tags per region.\nAn equal sign ("=") is used in the column ba tags, if the counts and the region are identical to the ab tags.'
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
190 plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
191
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
192 space = numpy.arange(0, len(groupUnique), 0.02)
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
193 plt.text(0.7, 0.18, "total number of *\nab", size=11, transform=plt.gcf().transFigure)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
194 plt.text(0.78, 0.18, "ba tags", size=11, transform=plt.gcf().transFigure)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
195 lengths_array_ab = []
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
196 lengths_array_ba = []
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
197
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
198 index_array = 0
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
199 for i, s, count in zip(groupUnique, space, quantAfterRegion):
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
200 index_of_current_region = numpy.where(group == i)[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
201
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
202 plt.text(0.55, 0.14 - s, "{}=\n".format(i), size=11, transform=plt.gcf().transFigure)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
203 if re.search('^(\d)+_(\d)+', str(mut_array[0, 0])) is None:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
204 nr_tags_ab = len(numpy.unique(mut_array[index_of_current_region, 1]))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
205 else:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
206 nr_tags_ab = len(mut_array[index_of_current_region, 1])
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
207
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
208 plt.text(0.7, 0.14 - s, "{:,}\n".format(nr_tags_ab), size=11, transform=plt.gcf().transFigure)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
209
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
210 alt_group_i = alt_group[index_of_current_region]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
211 alternative = numpy.where(alt_group_i != "=")[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
212 unique_alt = numpy.unique(alt_group_i[alternative])
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
213 lengths_of_alt_aligned_tags = []
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
214 if len(alternative) != 0:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
215 for alt in unique_alt:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
216 ind_alt_tags = numpy.where(alt_group_i == alt)[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
217 name = "{:,} to {}".format(len(ind_alt_tags), alt)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
218 lengths_of_alt_aligned_tags.append(name)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
219 ind_alt_tags_inverse = numpy.where(alt_group_i != alt)[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
220 name_inverse = "{:,} to {}".format(len(ind_alt_tags_inverse), i)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
221 lengths_of_alt_aligned_tags.append(name_inverse)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
222 plt.text(0.78, 0.14 - s, "{}\n".format(", ".join(lengths_of_alt_aligned_tags)), size=10, transform=plt.gcf().transFigure)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
223 lengths_array_ab.append(nr_tags_ab)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
224 lengths_array_ba.append(",".join(lengths_of_alt_aligned_tags))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
225 else:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
226 plt.text(0.78, 0.14 - s, "=\n", size=11,transform=plt.gcf().transFigure)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
227 lengths_array_ab.append(nr_tags_ab)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
228 lengths_array_ba.append(nr_tags_ab)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
229 index_array += 1
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
230
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
231 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True)
1
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
232 plt.xlabel("Family size", fontsize=14)
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
233 plt.ylabel("Absolute Frequency", fontsize=14)
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
234 plt.grid(b=True, which="major", color="#424242", linestyle=":")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
235 plt.margins(0.01, None)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
236
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
237 pdf.savefig(fig, bbox_inch="tight")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
238 plt.close()
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
239
1
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
240 output_file.write("Dataset:{}{}\n".format(sep, name1))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
241 output_file.write("{}AB{}BA\n".format(sep, sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
242 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(map(int, quant_ab)), sep, max(map(int, quant_ba))))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
243 output_file.write("absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1]))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
244 output_file.write("relative frequency:{}{:.3f}{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count), sep, float(count2[len(count2) - 1]) / sum(count2)))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
245 output_file.write("total nr. of reads{}{}\n".format(sep, sum(numpy.array(data_array[:, 0]).astype(int))))
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
246 output_file.write("total nr. of tags{}{}\n".format(sep, length_regions))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
247
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
248 output_file.write("\n\nValues from family size distribution\n")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
249 output_file.write("{}".format(sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
250 for i in groupUnique:
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
251 output_file.write("{}{}".format(i, sep))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
252 output_file.write("\n")
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
253 j = 0
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
254 for fs in counts[1][0:len(counts[1]) - 1]:
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
255 if fs == 21:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
256 fs = ">20"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
257 else:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
258 fs = "={}".format(fs)
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
259 output_file.write("FS{}{}".format(fs, sep))
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
260
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
261 if len(groupUnique) == 1:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
262 output_file.write("{}{}".format(int(counts[0][j]), sep))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
263 else:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
264 for n in range(len(groupUnique)):
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
265 output_file.write("{}{}".format(int(counts[0][n][j]), sep))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
266
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
267 output_file.write("\n")
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
268 j += 1
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
269 output_file.write("sum{}".format(sep))
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
270 if len(groupUnique) == 1:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
271 output_file.write("{}{}".format(int(sum(counts[0])), sep))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
272 else:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
273 for i in counts[0]:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
274 output_file.write("{}{}".format(int(sum(i)), sep))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
275 output_file.write("\n")
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
276 output_file.write("\n\nRegion{}total nr. of ab{}ba tags\n".format(sep, sep))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
277
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
278 for ab, ba, i in zip(lengths_array_ab, lengths_array_ba, groupUnique):
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
279 output_file.write("{}{}{}{}{}\n".format(i, sep, ab, sep, ba))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
280
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
281 print("Files successfully created!")
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
282
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
283
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
284 if __name__ == '__main__':
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
285 sys.exit(compare_read_families_refGenome(sys.argv))