annotate pileup_interval.py @ 4:9c1c0b947e46 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
author devteam
date Fri, 15 Jan 2021 11:38:56 +0000
parents a110f9d6ae24
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
2
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
3 """
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
4 Condenses pileup format into ranges of bases.
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
5
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
6 usage: %prog [options]
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
7 -i, --input=i: Input pileup file
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
8 -o, --output=o: Output pileup
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
9 -c, --coverage=c: Coverage
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
10 -f, --format=f: Pileup format
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
11 -b, --base=b: Base to select
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
12 -s, --seq_column=s: Sequence column
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
13 -l, --loc_column=l: Base location column
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
14 -r, --base_column=r: Reference base column
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
15 -C, --cvrg_column=C: Coverage column
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
16 """
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
17 import sys
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
18
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
19 from bx.cookbook import doc_optparse
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
20
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
21
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
22 def __main__():
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
23 # Parse Command Line
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
24 options, args = doc_optparse.parse(__doc__)
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
25 coverage = int(options.coverage)
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
26 fin = open(options.input, 'r')
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
27 fout = open(options.output, 'w')
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
28 inLine = fin.readline()
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
29 if options.format == 'six':
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
30 seqIndex = 0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
31 locIndex = 1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
32 baseIndex = 2
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
33 covIndex = 3
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
34 elif options.format == 'ten':
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
35 seqIndex = 0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
36 locIndex = 1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
37 if options.base == 'first':
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
38 baseIndex = 2
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
39 else:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
40 baseIndex = 3
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
41 covIndex = 7
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
42 else:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
43 seqIndex = int(options.seq_column) - 1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
44 locIndex = int(options.loc_column) - 1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
45 baseIndex = int(options.base_column) - 1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
46 covIndex = int(options.cvrg_column) - 1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
47 lastSeq = ''
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
48 lastLoc = -1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
49 locs = []
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
50 startLoc = -1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
51 bases = []
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
52 while inLine.strip() != '':
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
53 lineParts = inLine.split('\t')
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
54 try:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
55 seq, loc, base, cov = lineParts[seqIndex], int(lineParts[locIndex]), lineParts[baseIndex], int(lineParts[covIndex])
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
56 except IndexError as ei:
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
57 if options.format == 'ten':
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
58 sys.exit('It appears that you have selected 10 columns while your file has 6. Make sure that the number of columns you specify matches the number in your file.\n' + str(ei))
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
59 else:
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
60 sys.exit('There appears to be something wrong with your column index values.\n' + str(ei))
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
61 except ValueError as ev:
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
62 if options.format == 'six':
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
63 sys.exit('It appears that you have selected 6 columns while your file has 10. Make sure that the number of columns you specify matches the number in your file.\n' + str(ev))
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
64 else:
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
65 sys.exit('There appears to be something wrong with your column index values.\n' + str(ev))
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
66 # strout += str(startLoc) + '\n'
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
67 # strout += str(bases) + '\n'
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
68 # strout += '%s\t%s\t%s\t%s\n' % (seq, loc, base, cov)
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
69 if loc == lastLoc + 1 or lastLoc == -1:
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
70 if cov >= coverage:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
71 if seq == lastSeq or lastSeq == '':
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
72 if startLoc == -1:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
73 startLoc = loc
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
74 locs.append(loc)
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
75 bases.append(base)
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
76 else:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
77 if len(bases) > 0:
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
78 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc - 1, lastLoc, ''.join(bases)))
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
79 startLoc = loc
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
80 locs = [loc]
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
81 bases = [base]
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
82 else:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
83 if len(bases) > 0:
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
84 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc - 1, lastLoc, ''.join(bases)))
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
85 startLoc = -1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
86 locs = []
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
87 bases = []
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
88 else:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
89 if len(bases) > 0:
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
90 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc - 1, lastLoc, ''.join(bases)))
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
91 if cov >= coverage:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
92 startLoc = loc
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
93 locs = [loc]
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
94 bases = [base]
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
95 else:
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
96 startLoc = -1
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
97 locs = []
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
98 bases = []
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
99 lastSeq = seq
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
100 lastLoc = loc
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
101 inLine = fin.readline()
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
102 if len(bases) > 0:
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
103 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc - 1, lastLoc, ''.join(bases)))
0
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
104 fout.close()
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
105 fin.close()
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
106 # import sys
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
107 # strout += file(fout.name,'r').read()
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
108 # sys.stderr.write(strout)
a110f9d6ae24 Uploaded tool tarball.
devteam
parents:
diff changeset
109
4
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
110
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
111 if __name__ == "__main__":
9c1c0b947e46 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_interval commit 8b2095c59ecc2e94c58a42e2e04dbcecdc823dbf"
devteam
parents: 0
diff changeset
112 __main__()