annotate get_reads/get_read.py~ @ 31:76e51d19a53a draft

Uploaded
author tyty
date Mon, 20 Oct 2014 14:44:19 -0400
parents 90bc1ead3abd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
1 #!/usr/bin/env python
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
2 # -*- coding: utf-8 -*-
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
3
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
4 import sys
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
5 #from galaxy.tools.read_file import *
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
6 from Bio import SeqIO
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
7 import os
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
8 from read_file import *
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
9
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
10 fasta_file = sys.argv[1]
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
11 map_file = sys.argv[2]
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
12 result_file = sys.argv[3]
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
13
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
14 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt")
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
15
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
16 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
17 length_seq = {};
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
18 for seq in fasta_sequences:
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
19 nuc = seq.id;
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
20 length_seq[nuc] = len(seq.seq.tostring());
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
21
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
22
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
23
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
24 mapping = {}
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
25 transcripts = []
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
26
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
27 f = open("map_info.txt");
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
28 for aline in f.readlines():
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
29 tline = aline.strip();
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
30 tl = tline.split('\t');
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
31 if tl[0].strip() not in transcripts:
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
32 transcripts.append(tl[0].strip());
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
33 mapping[tl[0].strip()] = [];
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
34
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
35 mapping[tl[0].strip()].append(tl[1].strip());
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
36
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
37 distribution = {};
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
38 coverage = {};
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
39 for transcript in length_seq:
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
40 distribution[transcript] = [];
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
41 for i in range(0, length_seq[transcript]):
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
42 distribution[transcript].append(0);
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
43 sum_count = float(0);
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
44 if transcript in mapping:
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
45 for j in range(0, len(mapping[transcript])):
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
46 index = mapping[transcript][j];
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
47 #count = reads[mapping[transcript][j][0]];
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
48 sum_count = sum_count + 1;
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
49 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
50 coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
51 else:
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
52 coverage[transcript] = 0
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
53
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
54
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
55
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
56
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
57
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
58 h = file(result_file, 'w')
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
59 for transcript in length_seq:
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
60 h.write(transcript);
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
61 h.write('\n')
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
62 for i in range(0, length_seq[transcript]):
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
63 h.write(str(distribution[transcript][i]))
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
64 h.write('\t')
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
65 h.write('\n')
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
66 h.write('\n')
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
67
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
68
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
69
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
70
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
71
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
72 f.close();
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
73 h.close()
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
74
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
75
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
76
90bc1ead3abd Uploaded
tyty
parents:
diff changeset
77