annotate substitution_rates.py @ 1:3d9544198582 draft default tip

planemo upload commit fa9a2569c9ece4b03f622d0b85b0e5376a25084c-dirty
author devteam
date Tue, 29 Sep 2015 11:09:15 -0400
parents d1b35bcdaacc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
2 #guruprasad Ananda
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
3 """
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
4 Estimates substitution rates from pairwise alignments using JC69 model.
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
5 """
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
6
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
7 from galaxy import eggs
1
3d9544198582 planemo upload commit fa9a2569c9ece4b03f622d0b85b0e5376a25084c-dirty
devteam
parents: 0
diff changeset
8 from galaxy.tools.util.galaxyops import parse_cols_arg
0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
9 from galaxy.tools.util import maf_utilities
1
3d9544198582 planemo upload commit fa9a2569c9ece4b03f622d0b85b0e5376a25084c-dirty
devteam
parents: 0
diff changeset
10 from bx.intervals.io import NiceReaderWrapper
0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
11 import bx.align.maf
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
12 import fileinput
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
13 import sys
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
14
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
15 def stop_err(msg):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
16 sys.stderr.write(msg)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
17 sys.exit()
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
18
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
19
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
20 if len(sys.argv) < 3:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
21 stop_err("Incorrect number of arguments.")
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
22
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
23 inp_file = sys.argv[1]
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
24 out_file = sys.argv[2]
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
25 fout = open(out_file, 'w')
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
26 int_file = sys.argv[3]
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
27 if int_file != "None": #The user has specified an interval file
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
28 dbkey_i = sys.argv[4]
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
29 chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
30
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
31
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
32 def rateEstimator(block):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
33 global alignlen, mismatches
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
34
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
35 src1 = block.components[0].src
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
36 sequence1 = block.components[0].text
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
37 start1 = block.components[0].start
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
38 end1 = block.components[0].end
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
39 len1 = int(end1)-int(start1)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
40 len1_withgap = len(sequence1)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
41 mismatch = 0.0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
42
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
43 for seq in range (1, len(block.components)):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
44 src2 = block.components[seq].src
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
45 sequence2 = block.components[seq].text
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
46 start2 = block.components[seq].start
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
47 end2 = block.components[seq].end
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
48 len2 = int(end2)-int(start2)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
49 for nt in range(len1_withgap):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
50 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': # Not a gap or masked character
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
51 if sequence1[nt].upper() != sequence2[nt].upper():
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
52 mismatch += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
53
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
54 if int_file == "None":
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
55 p = mismatch/min(len1, len2)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
56 print >> fout, "%s\t%s\t%s\t%s\t%s\t%s\t%d\t%d\t%.4f" % ( src1, start1, end1, src2, start2, end2, min(len1, len2), mismatch, p )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
57 else:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
58 mismatches += mismatch
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
59 alignlen += min(len1, len2)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
60
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
61
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
62 def main():
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
63 skipped = 0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
64 not_pairwise = 0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
65
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
66 if int_file == "None":
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
67 try:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
68 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
69 except:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
70 stop_err("Your MAF file appears to be malformed.")
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
71 print >> fout, "#Seq1\tStart1\tEnd1\tSeq2\tStart2\tEnd2\tL\tN\tp"
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
72 for block in maf_reader:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
73 if len(block.components) != 2:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
74 not_pairwise += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
75 continue
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
76 try:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
77 rateEstimator(block)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
78 except:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
79 skipped += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
80 else:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
81 index, index_filename = maf_utilities.build_maf_index( inp_file, species = [dbkey_i] )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
82 if index is None:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
83 print >> sys.stderr, "Your MAF file appears to be malformed."
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
84 sys.exit()
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
85 win = NiceReaderWrapper( fileinput.FileInput( int_file ),
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
86 chrom_col=chr_col_i,
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
87 start_col=start_col_i,
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
88 end_col=end_col_i,
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
89 strand_col=strand_col_i,
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
90 fix_strand=True)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
91 species = None
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
92 mincols = 0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
93 global alignlen, mismatches
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
94
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
95 for interval in win:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
96 alignlen = 0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
97 mismatches = 0.0
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
98 src = "%s.%s" % ( dbkey_i, interval.chrom )
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
99 for block in maf_utilities.get_chopped_blocks_for_region( index, src, interval, species, mincols ):
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
100 if len(block.components) != 2:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
101 not_pairwise += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
102 continue
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
103 try:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
104 rateEstimator(block)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
105 except:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
106 skipped += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
107 if alignlen:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
108 p = mismatches/alignlen
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
109 else:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
110 p = 'NA'
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
111 interval.fields.append(str(alignlen))
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
112 interval.fields.append(str(mismatches))
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
113 interval.fields.append(str(p))
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
114 print >> fout, "\t".join(interval.fields)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
115 #num_blocks += 1
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
116
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
117 if not_pairwise:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
118 print "Skipped %d non-pairwise blocks" % (not_pairwise)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
119 if skipped:
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
120 print "Skipped %d blocks as invalid" % (skipped)
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
121
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
122
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
123 if __name__ == "__main__":
d1b35bcdaacc Imported from capsule None
devteam
parents:
diff changeset
124 main()