57
|
1 #!/usr/bin/env python
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 import sys
|
|
5 #from galaxy.tools.read_file import *
|
|
6 from Bio import SeqIO
|
|
7 import os
|
|
8 from read_file import *
|
|
9 import random
|
|
10 import string
|
|
11
|
|
12 fasta_file = sys.argv[1]
|
|
13 map_file = sys.argv[2]
|
|
14 result_file = sys.argv[3]
|
|
15
|
|
16 ospath = os.path.realpath(sys.argv[0])
|
|
17 ost = ospath.split('/')
|
|
18 syspath = ""
|
|
19 for i in range(len(ost)-1):
|
|
20 syspath = syspath+ost[i].strip()
|
|
21 syspath = syspath+'/'
|
|
22
|
|
23 rs = ''.join(random.sample(string.ascii_letters + string.digits, 8))
|
|
24 os.system("mkdir "+syspath+"output_"+rs)
|
|
25
|
|
26 syspathrs = syspath+"output_"+rs+'/'
|
|
27
|
|
28 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt")
|
|
29
|
|
30 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
|
|
31 length_seq = {};
|
|
32 for seq in fasta_sequences:
|
|
33 nuc = seq.id;
|
|
34 length_seq[nuc] = len(seq.seq.tostring());
|
|
35
|
|
36
|
|
37
|
|
38 mapping = {}
|
|
39 transcripts = []
|
|
40
|
|
41 f = open(syspathrs+"map_info.txt");
|
|
42 for aline in f.readlines():
|
|
43 tline = aline.strip();
|
|
44 tl = tline.split('\t');
|
|
45 if tl[0].strip() not in transcripts:
|
|
46 transcripts.append(tl[0].strip());
|
|
47 mapping[tl[0].strip()] = [];
|
|
48
|
|
49 mapping[tl[0].strip()].append(tl[1].strip());
|
|
50
|
|
51 distribution = {};
|
|
52 coverage = {};
|
|
53 for transcript in length_seq:
|
|
54 distribution[transcript] = [];
|
|
55 for i in range(0, length_seq[transcript]):
|
|
56 distribution[transcript].append(0);
|
|
57 sum_count = float(0);
|
|
58 if transcript in mapping:
|
|
59 for j in range(0, len(mapping[transcript])):
|
|
60 index = mapping[transcript][j];
|
|
61 #count = reads[mapping[transcript][j][0]];
|
|
62 sum_count = sum_count + 1;
|
|
63 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
|
|
64 coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
|
|
65 else:
|
|
66 coverage[transcript] = 0
|
|
67
|
|
68
|
|
69
|
|
70
|
|
71
|
|
72 h = file(result_file, 'w')
|
|
73 for transcript in length_seq:
|
|
74 h.write(transcript);
|
|
75 h.write('\n')
|
|
76 for i in range(0, length_seq[transcript]):
|
|
77 h.write(str(distribution[transcript][i]))
|
|
78 h.write('\t')
|
|
79 h.write('\n')
|
|
80 h.write('\n')
|
|
81
|
|
82 os.system("rm -r "+syspathrs)
|
|
83
|
|
84
|
|
85
|
|
86 f.close();
|
|
87 h.close()
|
|
88
|
|
89
|
|
90
|
|
91
|