comparison get_reads/get_read.py @ 39:c5f80e82efb7 draft

Uploaded
author tyty
date Mon, 20 Oct 2014 14:57:26 -0400
parents 297cdb01d656
children
comparison
equal deleted inserted replaced
38:b35dc7b728e5 39:c5f80e82efb7
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 #from galaxy.tools.read_file import *
6 from Bio import SeqIO
7 import os
8 from read_file import *
9
10 fasta_file = sys.argv[1]
11 map_file = sys.argv[2]
12 result_file = sys.argv[3]
13
14 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt")
15
16 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
17 length_seq = {};
18 for seq in fasta_sequences:
19 nuc = seq.id;
20 length_seq[nuc] = len(seq.seq.tostring());
21
22
23
24 mapping = {}
25 transcripts = []
26
27 f = open("map_info.txt");
28 for aline in f.readlines():
29 tline = aline.strip();
30 tl = tline.split('\t');
31 if tl[0].strip() not in transcripts:
32 transcripts.append(tl[0].strip());
33 mapping[tl[0].strip()] = [];
34
35 mapping[tl[0].strip()].append(tl[1].strip());
36
37 distribution = {};
38 coverage = {};
39 for transcript in length_seq:
40 distribution[transcript] = [];
41 for i in range(0, length_seq[transcript]):
42 distribution[transcript].append(0);
43 sum_count = float(0);
44 if transcript in mapping:
45 for j in range(0, len(mapping[transcript])):
46 index = mapping[transcript][j];
47 #count = reads[mapping[transcript][j][0]];
48 sum_count = sum_count + 1;
49 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
50 coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
51 else:
52 coverage[transcript] = 0
53
54
55
56
57
58 h = file(result_file, 'w')
59 for transcript in length_seq:
60 h.write(transcript);
61 h.write('\n')
62 for i in range(0, length_seq[transcript]):
63 h.write(str(distribution[transcript][i]))
64 h.write('\t')
65 h.write('\n')
66 h.write('\n')
67
68
69
70
71
72 f.close();
73 h.close()
74
75
76
77