Mercurial > repos > xuebing > sharplabtool
comparison tools/filters/trimmer.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys | |
4 import optparse | |
5 | |
6 def stop_err( msg ): | |
7 sys.stderr.write( msg ) | |
8 sys.exit() | |
9 | |
10 def main(): | |
11 usage = """%prog [options] | |
12 | |
13 options (listed below) default to 'None' if omitted | |
14 """ | |
15 parser = optparse.OptionParser(usage=usage) | |
16 | |
17 parser.add_option( | |
18 '-a','--ascii', | |
19 dest='ascii', | |
20 action='store_true', | |
21 default = False, | |
22 help='Use ascii codes to defined ignored beginnings instead of raw characters') | |
23 | |
24 parser.add_option( | |
25 '-q','--fastq', | |
26 dest='fastq', | |
27 action='store_true', | |
28 default = False, | |
29 help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids') | |
30 | |
31 parser.add_option( | |
32 '-i','--ignore', | |
33 dest='ignore', | |
34 help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled') | |
35 | |
36 parser.add_option( | |
37 '-s','--start', | |
38 dest='start', | |
39 default = '0', | |
40 help='Trim from beginning to here (1-based)') | |
41 | |
42 parser.add_option( | |
43 '-e','--end', | |
44 dest='end', | |
45 default = '0', | |
46 help='Trim from here to the ned (1-based)') | |
47 | |
48 parser.add_option( | |
49 '-f','--file', | |
50 dest='input_txt', | |
51 default = False, | |
52 help='Name of file to be chopped. STDIN is default') | |
53 | |
54 parser.add_option( | |
55 '-c','--column', | |
56 dest='col', | |
57 default = '0', | |
58 help='Column to chop. If 0 = chop the whole line') | |
59 | |
60 | |
61 options, args = parser.parse_args() | |
62 invalid_starts = [] | |
63 | |
64 if options.input_txt: | |
65 infile = open ( options.input_txt, 'r') | |
66 else: | |
67 infile = sys.stdin | |
68 | |
69 if options.ignore and options.ignore != "None": | |
70 invalid_starts = options.ignore.split(',') | |
71 | |
72 if options.ascii and options.ignore and options.ignore != "None": | |
73 for i, item in enumerate( invalid_starts ): | |
74 invalid_starts[i] = chr( int( item ) ) | |
75 | |
76 col = int( options.col ) | |
77 | |
78 for i, line in enumerate( infile ): | |
79 line = line.rstrip( '\r\n' ) | |
80 if line: | |
81 | |
82 if options.fastq and i % 2 == 0: | |
83 print line | |
84 continue | |
85 | |
86 | |
87 if line[0] not in invalid_starts: | |
88 if col == 0: | |
89 if int( options.end ) > 0: | |
90 line = line[ int( options.start )-1 : int( options.end ) ] | |
91 else: | |
92 line = line[ int( options.start )-1 : ] | |
93 else: | |
94 fields = line.split( '\t' ) | |
95 if col-1 > len( fields ): | |
96 stop_err('Column %d does not exist. Check input parameters\n' % col) | |
97 | |
98 if int( options.end ) > 0: | |
99 fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ] | |
100 else: | |
101 fields[col - 1] = fields[col - 1][ int( options.start )-1 : ] | |
102 line = '\t'.join(fields) | |
103 print line | |
104 | |
105 if __name__ == "__main__": main() | |
106 |