Mercurial > repos > tyty > structurefold
comparison get_reads/get_read.py @ 105:abe41dc167f1 draft
Uploaded
author | tyty |
---|---|
date | Thu, 19 Mar 2015 17:43:24 -0400 |
parents | 58df1060fea1 |
children |
comparison
equal
deleted
inserted
replaced
104:60278e5df493 | 105:abe41dc167f1 |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 import sys | |
5 from Bio import SeqIO | |
6 import os | |
7 from read_file import * | |
8 import random | |
9 import string | |
10 | |
11 fasta_file = sys.argv[1] | |
12 map_file = sys.argv[2] | |
13 result_file = sys.argv[3] | |
14 | |
15 syspathrs = os.getcwd() | |
16 | |
17 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt") | |
18 | |
19 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); | |
20 length_seq = {}; | |
21 for seq in fasta_sequences: | |
22 nuc = seq.id; | |
23 length_seq[nuc] = len(seq.seq.tostring()); | |
24 | |
25 | |
26 | |
27 mapping = {} | |
28 transcripts = [] | |
29 | |
30 f = open(syspathrs+"map_info.txt"); | |
31 for aline in f.readlines(): | |
32 tline = aline.strip(); | |
33 tl = tline.split('\t'); | |
34 if tl[0].strip() not in transcripts: | |
35 transcripts.append(tl[0].strip()); | |
36 mapping[tl[0].strip()] = []; | |
37 | |
38 mapping[tl[0].strip()].append(tl[1].strip()); | |
39 | |
40 distribution = {}; | |
41 coverage = {}; | |
42 for transcript in length_seq: | |
43 distribution[transcript] = []; | |
44 for i in range(0, length_seq[transcript]): | |
45 distribution[transcript].append(0); | |
46 sum_count = float(0); | |
47 if transcript in mapping: | |
48 for j in range(0, len(mapping[transcript])): | |
49 index = mapping[transcript][j]; | |
50 #count = reads[mapping[transcript][j][0]]; | |
51 sum_count = sum_count + 1; | |
52 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; | |
53 coverage[transcript] = float(sum_count)/float(length_seq[transcript]); | |
54 else: | |
55 coverage[transcript] = 0 | |
56 | |
57 | |
58 | |
59 | |
60 | |
61 h = file(result_file, 'w') | |
62 for transcript in length_seq: | |
63 h.write(transcript); | |
64 h.write('\n') | |
65 for i in range(0, length_seq[transcript]): | |
66 h.write(str(distribution[transcript][i])) | |
67 h.write('\t') | |
68 h.write('\n') | |
69 h.write('\n') | |
70 | |
71 #os.system("rm -r "+syspathrs) | |
72 | |
73 | |
74 | |
75 f.close(); | |
76 h.close() | |
77 | |
78 | |
79 | |
80 |