comparison structurefold/get_reads/get_read.py @ 113:aedb21527abd draft

Uploaded
author tyty
date Tue, 14 Apr 2015 14:09:42 -0400
parents
children
comparison
equal deleted inserted replaced
112:87ec0ecdc2af 113:aedb21527abd
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 from Bio import SeqIO
6 import os
7 from read_file import *
8 import random
9 import string
10
11 fasta_file = sys.argv[1]
12 map_file = sys.argv[2]
13 result_file = sys.argv[3]
14
15 syspathrs = os.getcwd()
16
17 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt")
18
19 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
20 length_seq = {};
21 for seq in fasta_sequences:
22 nuc = seq.id;
23 length_seq[nuc] = len(seq.seq.tostring());
24
25
26
27 mapping = {}
28 transcripts = []
29
30 f = open(syspathrs+"map_info.txt");
31 for aline in f.readlines():
32 tline = aline.strip();
33 tl = tline.split('\t');
34 if tl[0].strip() not in transcripts:
35 transcripts.append(tl[0].strip());
36 mapping[tl[0].strip()] = [];
37
38 mapping[tl[0].strip()].append(tl[1].strip());
39
40 distribution = {};
41 coverage = {};
42 for transcript in length_seq:
43 distribution[transcript] = [];
44 for i in range(0, length_seq[transcript]):
45 distribution[transcript].append(0);
46 sum_count = float(0);
47 if transcript in mapping:
48 for j in range(0, len(mapping[transcript])):
49 index = mapping[transcript][j];
50 #count = reads[mapping[transcript][j][0]];
51 sum_count = sum_count + 1;
52 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
53 coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
54 else:
55 coverage[transcript] = 0
56
57
58
59
60
61 h = file(result_file, 'w')
62 for transcript in length_seq:
63 h.write(transcript);
64 h.write('\n')
65 for i in range(0, length_seq[transcript]):
66 h.write(str(distribution[transcript][i]))
67 h.write('\t')
68 h.write('\n')
69 h.write('\n')
70
71 #os.system("rm -r "+syspathrs)
72
73
74
75 f.close();
76 h.close()
77
78
79
80