annotate profilegenerator.py @ 0:70f8259b0b30 draft

Uploaded
author arkarachai-fungtammasan
date Wed, 01 Apr 2015 16:48:58 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
1 import collections
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
2 import itertools
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
3 import sys
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
4
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
5 filename=sys.argv[1]
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
6 MOTIF=sys.argv[2]
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
7 MOTIFSIZE=len(MOTIF)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
8 MaxDEPTH=int(sys.argv[3])
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
9 MINIMUMPROB=float(sys.argv[4])##1.0/(10**4)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
10 MININUMCOUNT=1
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
11 fd=open(filename)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
12 lines=fd.readlines()
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
13 countbymajorallele=collections.defaultdict(list)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
14 for line in lines:
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
15 temp=line.strip().split('\t')
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
16 t_major=int(temp[0])
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
17 t_count=int(temp[2])
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
18 countbymajorallele[t_major].append(t_count)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
19 fd.close()
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
20 sumbymajorallele=collections.defaultdict(int)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
21 for t_majorallele in countbymajorallele.keys():
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
22 sumbymajorallele[t_majorallele]=sum(countbymajorallele[t_majorallele])
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
23
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
24 fd=open(filename)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
25 ##fd=open('PCRinclude.mono.A.bymajorallele')
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
26 lines=fd.readlines()
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
27 allmajor=collections.defaultdict(list)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
28 for line in lines:
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
29 temp=line.strip().split()
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
30 if int(temp[0])%MOTIFSIZE==0:
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
31 if (int(temp[2])/(sumbymajorallele[int(temp[0])]*1.0))>=MINIMUMPROB:
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
32 if int(temp[2])>=MININUMCOUNT:
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
33 allmajor[int(temp[0])].append(int(temp[1]))
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
34 ##print allmajor
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
35 allkey=allmajor.keys()
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
36 allkey.sort()
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
37 #print allkey
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
38 keycount=0
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
39 combinelist_collection=[]
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
40 for dummycount in range(len(allkey)-1):
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
41 pair1,pair2=allkey[keycount],allkey[keycount+1]
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
42 pair1list=allmajor[pair1]
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
43 pair2list=allmajor[pair2]
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
44 #print pair1list,pair2list
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
45 pair1list.extend(pair2list)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
46 combinelist=list(set(pair1list))
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
47 combinelist.sort()
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
48 ##print combinelist
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
49 combinelist_collection.append(tuple(combinelist))
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
50 keycount+=1
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
51 combinelist_collection=list(set(combinelist_collection))
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
52 newcombinelist_collection=combinelist_collection[:]
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
53 #combinelist_collection=set(combinelist_collection)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
54 for smallset1 in combinelist_collection:
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
55 for smallset2 in combinelist_collection:
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
56 if set(smallset1).issubset(set(smallset2)) and smallset1 != smallset2:
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
57 newcombinelist_collection.remove(smallset1)
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
58 break
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
59 ##print combinelist_collection
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
60
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
61 for depth in range(2,MaxDEPTH+1):
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
62 for member_list in newcombinelist_collection:
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
63 for member in itertools.combinations_with_replacement(member_list,depth):
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
64 print 'chr'+'\t'+','.join(map(str,member))+'\t'+MOTIF
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
65
70f8259b0b30 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
66