2
|
1 #!/usr/bin/env python
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 import sys
|
|
5 #from galaxy.tools.read_file import *
|
|
6 from Bio import SeqIO
|
|
7 import os
|
|
8 from read_file import *
|
|
9
|
|
10 fasta_file = sys.argv[1]
|
|
11 map_file = sys.argv[2]
|
|
12 result_file = sys.argv[3]
|
|
13
|
|
14 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt")
|
|
15
|
|
16 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
|
|
17 length_seq = {};
|
|
18 for seq in fasta_sequences:
|
|
19 nuc = seq.id;
|
|
20 length_seq[nuc] = len(seq.seq.tostring());
|
|
21
|
|
22
|
|
23
|
|
24 mapping = {}
|
|
25 transcripts = []
|
|
26
|
|
27 f = open("map_info.txt");
|
|
28 for aline in f.readlines():
|
|
29 tline = aline.strip();
|
|
30 tl = tline.split('\t');
|
|
31 if tl[0].strip() not in transcripts:
|
|
32 transcripts.append(tl[0].strip());
|
|
33 mapping[tl[0].strip()] = [];
|
|
34
|
|
35 mapping[tl[0].strip()].append(tl[1].strip());
|
|
36
|
|
37 distribution = {};
|
|
38 coverage = {};
|
|
39 for transcript in length_seq:
|
|
40 distribution[transcript] = [];
|
|
41 for i in range(0, length_seq[transcript]):
|
|
42 distribution[transcript].append(0);
|
|
43 sum_count = float(0);
|
|
44 if transcript in mapping:
|
|
45 for j in range(0, len(mapping[transcript])):
|
|
46 index = mapping[transcript][j];
|
|
47 #count = reads[mapping[transcript][j][0]];
|
|
48 sum_count = sum_count + 1;
|
|
49 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
|
|
50 coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
|
|
51 else:
|
|
52 coverage[transcript] = 0
|
|
53
|
|
54
|
|
55
|
|
56
|
|
57
|
|
58 h = file(result_file, 'w')
|
|
59 for transcript in length_seq:
|
|
60 h.write(transcript);
|
|
61 h.write('\n')
|
|
62 for i in range(0, length_seq[transcript]):
|
|
63 h.write(str(distribution[transcript][i]))
|
|
64 h.write('\t')
|
|
65 h.write('\n')
|
|
66 h.write('\n')
|
|
67
|
|
68
|
|
69
|
|
70
|
|
71
|
|
72 f.close();
|
|
73 h.close()
|
|
74
|
|
75
|
|
76
|
|
77
|