annotate get_reads/get_read.py @ 58:971ef91f6b68 draft

Uploaded
author tyty
date Tue, 18 Nov 2014 01:01:38 -0500
parents 31d688722e7a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
57
31d688722e7a Uploaded
tyty
parents:
diff changeset
1 #!/usr/bin/env python
31d688722e7a Uploaded
tyty
parents:
diff changeset
2 # -*- coding: utf-8 -*-
31d688722e7a Uploaded
tyty
parents:
diff changeset
3
31d688722e7a Uploaded
tyty
parents:
diff changeset
4 import sys
31d688722e7a Uploaded
tyty
parents:
diff changeset
5 #from galaxy.tools.read_file import *
31d688722e7a Uploaded
tyty
parents:
diff changeset
6 from Bio import SeqIO
31d688722e7a Uploaded
tyty
parents:
diff changeset
7 import os
31d688722e7a Uploaded
tyty
parents:
diff changeset
8 from read_file import *
31d688722e7a Uploaded
tyty
parents:
diff changeset
9 import random
31d688722e7a Uploaded
tyty
parents:
diff changeset
10 import string
31d688722e7a Uploaded
tyty
parents:
diff changeset
11
31d688722e7a Uploaded
tyty
parents:
diff changeset
12 fasta_file = sys.argv[1]
31d688722e7a Uploaded
tyty
parents:
diff changeset
13 map_file = sys.argv[2]
31d688722e7a Uploaded
tyty
parents:
diff changeset
14 result_file = sys.argv[3]
31d688722e7a Uploaded
tyty
parents:
diff changeset
15
31d688722e7a Uploaded
tyty
parents:
diff changeset
16 ospath = os.path.realpath(sys.argv[0])
31d688722e7a Uploaded
tyty
parents:
diff changeset
17 ost = ospath.split('/')
31d688722e7a Uploaded
tyty
parents:
diff changeset
18 syspath = ""
31d688722e7a Uploaded
tyty
parents:
diff changeset
19 for i in range(len(ost)-1):
31d688722e7a Uploaded
tyty
parents:
diff changeset
20 syspath = syspath+ost[i].strip()
31d688722e7a Uploaded
tyty
parents:
diff changeset
21 syspath = syspath+'/'
31d688722e7a Uploaded
tyty
parents:
diff changeset
22
31d688722e7a Uploaded
tyty
parents:
diff changeset
23 rs = ''.join(random.sample(string.ascii_letters + string.digits, 8))
31d688722e7a Uploaded
tyty
parents:
diff changeset
24 os.system("mkdir "+syspath+"output_"+rs)
31d688722e7a Uploaded
tyty
parents:
diff changeset
25
31d688722e7a Uploaded
tyty
parents:
diff changeset
26 syspathrs = syspath+"output_"+rs+'/'
31d688722e7a Uploaded
tyty
parents:
diff changeset
27
31d688722e7a Uploaded
tyty
parents:
diff changeset
28 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt")
31d688722e7a Uploaded
tyty
parents:
diff changeset
29
31d688722e7a Uploaded
tyty
parents:
diff changeset
30 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
31d688722e7a Uploaded
tyty
parents:
diff changeset
31 length_seq = {};
31d688722e7a Uploaded
tyty
parents:
diff changeset
32 for seq in fasta_sequences:
31d688722e7a Uploaded
tyty
parents:
diff changeset
33 nuc = seq.id;
31d688722e7a Uploaded
tyty
parents:
diff changeset
34 length_seq[nuc] = len(seq.seq.tostring());
31d688722e7a Uploaded
tyty
parents:
diff changeset
35
31d688722e7a Uploaded
tyty
parents:
diff changeset
36
31d688722e7a Uploaded
tyty
parents:
diff changeset
37
31d688722e7a Uploaded
tyty
parents:
diff changeset
38 mapping = {}
31d688722e7a Uploaded
tyty
parents:
diff changeset
39 transcripts = []
31d688722e7a Uploaded
tyty
parents:
diff changeset
40
31d688722e7a Uploaded
tyty
parents:
diff changeset
41 f = open(syspathrs+"map_info.txt");
31d688722e7a Uploaded
tyty
parents:
diff changeset
42 for aline in f.readlines():
31d688722e7a Uploaded
tyty
parents:
diff changeset
43 tline = aline.strip();
31d688722e7a Uploaded
tyty
parents:
diff changeset
44 tl = tline.split('\t');
31d688722e7a Uploaded
tyty
parents:
diff changeset
45 if tl[0].strip() not in transcripts:
31d688722e7a Uploaded
tyty
parents:
diff changeset
46 transcripts.append(tl[0].strip());
31d688722e7a Uploaded
tyty
parents:
diff changeset
47 mapping[tl[0].strip()] = [];
31d688722e7a Uploaded
tyty
parents:
diff changeset
48
31d688722e7a Uploaded
tyty
parents:
diff changeset
49 mapping[tl[0].strip()].append(tl[1].strip());
31d688722e7a Uploaded
tyty
parents:
diff changeset
50
31d688722e7a Uploaded
tyty
parents:
diff changeset
51 distribution = {};
31d688722e7a Uploaded
tyty
parents:
diff changeset
52 coverage = {};
31d688722e7a Uploaded
tyty
parents:
diff changeset
53 for transcript in length_seq:
31d688722e7a Uploaded
tyty
parents:
diff changeset
54 distribution[transcript] = [];
31d688722e7a Uploaded
tyty
parents:
diff changeset
55 for i in range(0, length_seq[transcript]):
31d688722e7a Uploaded
tyty
parents:
diff changeset
56 distribution[transcript].append(0);
31d688722e7a Uploaded
tyty
parents:
diff changeset
57 sum_count = float(0);
31d688722e7a Uploaded
tyty
parents:
diff changeset
58 if transcript in mapping:
31d688722e7a Uploaded
tyty
parents:
diff changeset
59 for j in range(0, len(mapping[transcript])):
31d688722e7a Uploaded
tyty
parents:
diff changeset
60 index = mapping[transcript][j];
31d688722e7a Uploaded
tyty
parents:
diff changeset
61 #count = reads[mapping[transcript][j][0]];
31d688722e7a Uploaded
tyty
parents:
diff changeset
62 sum_count = sum_count + 1;
31d688722e7a Uploaded
tyty
parents:
diff changeset
63 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
31d688722e7a Uploaded
tyty
parents:
diff changeset
64 coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
31d688722e7a Uploaded
tyty
parents:
diff changeset
65 else:
31d688722e7a Uploaded
tyty
parents:
diff changeset
66 coverage[transcript] = 0
31d688722e7a Uploaded
tyty
parents:
diff changeset
67
31d688722e7a Uploaded
tyty
parents:
diff changeset
68
31d688722e7a Uploaded
tyty
parents:
diff changeset
69
31d688722e7a Uploaded
tyty
parents:
diff changeset
70
31d688722e7a Uploaded
tyty
parents:
diff changeset
71
31d688722e7a Uploaded
tyty
parents:
diff changeset
72 h = file(result_file, 'w')
31d688722e7a Uploaded
tyty
parents:
diff changeset
73 for transcript in length_seq:
31d688722e7a Uploaded
tyty
parents:
diff changeset
74 h.write(transcript);
31d688722e7a Uploaded
tyty
parents:
diff changeset
75 h.write('\n')
31d688722e7a Uploaded
tyty
parents:
diff changeset
76 for i in range(0, length_seq[transcript]):
31d688722e7a Uploaded
tyty
parents:
diff changeset
77 h.write(str(distribution[transcript][i]))
31d688722e7a Uploaded
tyty
parents:
diff changeset
78 h.write('\t')
31d688722e7a Uploaded
tyty
parents:
diff changeset
79 h.write('\n')
31d688722e7a Uploaded
tyty
parents:
diff changeset
80 h.write('\n')
31d688722e7a Uploaded
tyty
parents:
diff changeset
81
31d688722e7a Uploaded
tyty
parents:
diff changeset
82 os.system("rm -r "+syspathrs)
31d688722e7a Uploaded
tyty
parents:
diff changeset
83
31d688722e7a Uploaded
tyty
parents:
diff changeset
84
31d688722e7a Uploaded
tyty
parents:
diff changeset
85
31d688722e7a Uploaded
tyty
parents:
diff changeset
86 f.close();
31d688722e7a Uploaded
tyty
parents:
diff changeset
87 h.close()
31d688722e7a Uploaded
tyty
parents:
diff changeset
88
31d688722e7a Uploaded
tyty
parents:
diff changeset
89
31d688722e7a Uploaded
tyty
parents:
diff changeset
90
31d688722e7a Uploaded
tyty
parents:
diff changeset
91