comparison tools/filters/trimmer.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #!/usr/bin/env python
2
3 import sys
4 import optparse
5
6 def stop_err( msg ):
7 sys.stderr.write( msg )
8 sys.exit()
9
10 def main():
11 usage = """%prog [options]
12
13 options (listed below) default to 'None' if omitted
14 """
15 parser = optparse.OptionParser(usage=usage)
16
17 parser.add_option(
18 '-a','--ascii',
19 dest='ascii',
20 action='store_true',
21 default = False,
22 help='Use ascii codes to defined ignored beginnings instead of raw characters')
23
24 parser.add_option(
25 '-q','--fastq',
26 dest='fastq',
27 action='store_true',
28 default = False,
29 help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids')
30
31 parser.add_option(
32 '-i','--ignore',
33 dest='ignore',
34 help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled')
35
36 parser.add_option(
37 '-s','--start',
38 dest='start',
39 default = '0',
40 help='Trim from beginning to here (1-based)')
41
42 parser.add_option(
43 '-e','--end',
44 dest='end',
45 default = '0',
46 help='Trim from here to the ned (1-based)')
47
48 parser.add_option(
49 '-f','--file',
50 dest='input_txt',
51 default = False,
52 help='Name of file to be chopped. STDIN is default')
53
54 parser.add_option(
55 '-c','--column',
56 dest='col',
57 default = '0',
58 help='Column to chop. If 0 = chop the whole line')
59
60
61 options, args = parser.parse_args()
62 invalid_starts = []
63
64 if options.input_txt:
65 infile = open ( options.input_txt, 'r')
66 else:
67 infile = sys.stdin
68
69 if options.ignore and options.ignore != "None":
70 invalid_starts = options.ignore.split(',')
71
72 if options.ascii and options.ignore and options.ignore != "None":
73 for i, item in enumerate( invalid_starts ):
74 invalid_starts[i] = chr( int( item ) )
75
76 col = int( options.col )
77
78 for i, line in enumerate( infile ):
79 line = line.rstrip( '\r\n' )
80 if line:
81
82 if options.fastq and i % 2 == 0:
83 print line
84 continue
85
86
87 if line[0] not in invalid_starts:
88 if col == 0:
89 if int( options.end ) > 0:
90 line = line[ int( options.start )-1 : int( options.end ) ]
91 else:
92 line = line[ int( options.start )-1 : ]
93 else:
94 fields = line.split( '\t' )
95 if col-1 > len( fields ):
96 stop_err('Column %d does not exist. Check input parameters\n' % col)
97
98 if int( options.end ) > 0:
99 fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ]
100 else:
101 fields[col - 1] = fields[col - 1][ int( options.start )-1 : ]
102 line = '\t'.join(fields)
103 print line
104
105 if __name__ == "__main__": main()
106