annotate substitution_rates.py @ 0:d1b35bcdaacc draft

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:49:05 -0400
parents
children 3d9544198582
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
2 #guruprasad Ananda
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
3 """
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
4 Estimates substitution rates from pairwise alignments using JC69 model.
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
5 """
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
6
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
7 from galaxy import eggs
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
8 from galaxy.tools.util.galaxyops import *
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
9 from galaxy.tools.util import maf_utilities
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
10 import bx.align.maf
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
11 import fileinput
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
12 import sys
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
13
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
14 def stop_err(msg):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
15 sys.stderr.write(msg)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
16 sys.exit()
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
17
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
18
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
19 if len(sys.argv) < 3:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
20 stop_err("Incorrect number of arguments.")
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
21
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
22 inp_file = sys.argv[1]
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
23 out_file = sys.argv[2]
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
24 fout = open(out_file, 'w')
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
25 int_file = sys.argv[3]
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
26 if int_file != "None": #The user has specified an interval file
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
27 dbkey_i = sys.argv[4]
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
28 chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
29
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
30
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
31 def rateEstimator(block):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
32 global alignlen, mismatches
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
33
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
34 src1 = block.components[0].src
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
35 sequence1 = block.components[0].text
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
36 start1 = block.components[0].start
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
37 end1 = block.components[0].end
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
38 len1 = int(end1)-int(start1)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
39 len1_withgap = len(sequence1)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
40 mismatch = 0.0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
41
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
42 for seq in range (1, len(block.components)):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
43 src2 = block.components[seq].src
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
44 sequence2 = block.components[seq].text
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
45 start2 = block.components[seq].start
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
46 end2 = block.components[seq].end
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
47 len2 = int(end2)-int(start2)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
48 for nt in range(len1_withgap):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
49 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': # Not a gap or masked character
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
50 if sequence1[nt].upper() != sequence2[nt].upper():
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
51 mismatch += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
52
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
53 if int_file == "None":
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
54 p = mismatch/min(len1, len2)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
55 print >> fout, "%s\t%s\t%s\t%s\t%s\t%s\t%d\t%d\t%.4f" % ( src1, start1, end1, src2, start2, end2, min(len1, len2), mismatch, p )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
56 else:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
57 mismatches += mismatch
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
58 alignlen += min(len1, len2)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
59
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
60
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
61 def main():
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
62 skipped = 0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
63 not_pairwise = 0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
64
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
65 if int_file == "None":
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
66 try:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
67 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
68 except:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
69 stop_err("Your MAF file appears to be malformed.")
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
70 print >> fout, "#Seq1\tStart1\tEnd1\tSeq2\tStart2\tEnd2\tL\tN\tp"
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
71 for block in maf_reader:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
72 if len(block.components) != 2:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
73 not_pairwise += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
74 continue
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
75 try:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
76 rateEstimator(block)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
77 except:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
78 skipped += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
79 else:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
80 index, index_filename = maf_utilities.build_maf_index( inp_file, species = [dbkey_i] )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
81 if index is None:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
82 print >> sys.stderr, "Your MAF file appears to be malformed."
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
83 sys.exit()
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
84 win = NiceReaderWrapper( fileinput.FileInput( int_file ),
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
85 chrom_col=chr_col_i,
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
86 start_col=start_col_i,
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
87 end_col=end_col_i,
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
88 strand_col=strand_col_i,
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
89 fix_strand=True)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
90 species = None
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
91 mincols = 0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
92 global alignlen, mismatches
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
93
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
94 for interval in win:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
95 alignlen = 0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
96 mismatches = 0.0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
97 src = "%s.%s" % ( dbkey_i, interval.chrom )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
98 for block in maf_utilities.get_chopped_blocks_for_region( index, src, interval, species, mincols ):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
99 if len(block.components) != 2:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
100 not_pairwise += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
101 continue
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
102 try:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
103 rateEstimator(block)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
104 except:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
105 skipped += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
106 if alignlen:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
107 p = mismatches/alignlen
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
108 else:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
109 p = 'NA'
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
110 interval.fields.append(str(alignlen))
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
111 interval.fields.append(str(mismatches))
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
112 interval.fields.append(str(p))
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
113 print >> fout, "\t".join(interval.fields)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
114 #num_blocks += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
115
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
116 if not_pairwise:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
117 print "Skipped %d non-pairwise blocks" % (not_pairwise)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
118 if skipped:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
119 print "Skipped %d blocks as invalid" % (skipped)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
120
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
121
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
122 if __name__ == "__main__":
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
123 main()