0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys
|
|
4 import optparse
|
|
5
|
|
6 def stop_err( msg ):
|
|
7 sys.stderr.write( msg )
|
|
8 sys.exit()
|
|
9
|
|
10 def main():
|
|
11 usage = """%prog [options]
|
|
12
|
|
13 options (listed below) default to 'None' if omitted
|
|
14 """
|
|
15 parser = optparse.OptionParser(usage=usage)
|
|
16
|
|
17 parser.add_option(
|
|
18 '-a','--ascii',
|
|
19 dest='ascii',
|
|
20 action='store_true',
|
|
21 default = False,
|
|
22 help='Use ascii codes to defined ignored beginnings instead of raw characters')
|
|
23
|
|
24 parser.add_option(
|
|
25 '-q','--fastq',
|
|
26 dest='fastq',
|
|
27 action='store_true',
|
|
28 default = False,
|
|
29 help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids')
|
|
30
|
|
31 parser.add_option(
|
|
32 '-i','--ignore',
|
|
33 dest='ignore',
|
|
34 help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled')
|
|
35
|
|
36 parser.add_option(
|
|
37 '-s','--start',
|
|
38 dest='start',
|
|
39 default = '0',
|
|
40 help='Trim from beginning to here (1-based)')
|
|
41
|
|
42 parser.add_option(
|
|
43 '-e','--end',
|
|
44 dest='end',
|
|
45 default = '0',
|
|
46 help='Trim from here to the ned (1-based)')
|
|
47
|
|
48 parser.add_option(
|
|
49 '-f','--file',
|
|
50 dest='input_txt',
|
|
51 default = False,
|
|
52 help='Name of file to be chopped. STDIN is default')
|
|
53
|
|
54 parser.add_option(
|
|
55 '-c','--column',
|
|
56 dest='col',
|
|
57 default = '0',
|
|
58 help='Column to chop. If 0 = chop the whole line')
|
|
59
|
|
60
|
|
61 options, args = parser.parse_args()
|
|
62 invalid_starts = []
|
|
63
|
|
64 if options.input_txt:
|
|
65 infile = open ( options.input_txt, 'r')
|
|
66 else:
|
|
67 infile = sys.stdin
|
|
68
|
|
69 if options.ignore and options.ignore != "None":
|
|
70 invalid_starts = options.ignore.split(',')
|
|
71
|
|
72 if options.ascii and options.ignore and options.ignore != "None":
|
|
73 for i, item in enumerate( invalid_starts ):
|
|
74 invalid_starts[i] = chr( int( item ) )
|
|
75
|
|
76 col = int( options.col )
|
|
77
|
|
78 for i, line in enumerate( infile ):
|
|
79 line = line.rstrip( '\r\n' )
|
|
80 if line:
|
|
81
|
|
82 if options.fastq and i % 2 == 0:
|
|
83 print line
|
|
84 continue
|
|
85
|
|
86
|
|
87 if line[0] not in invalid_starts:
|
|
88 if col == 0:
|
|
89 if int( options.end ) > 0:
|
|
90 line = line[ int( options.start )-1 : int( options.end ) ]
|
|
91 else:
|
|
92 line = line[ int( options.start )-1 : ]
|
|
93 else:
|
|
94 fields = line.split( '\t' )
|
|
95 if col-1 > len( fields ):
|
|
96 stop_err('Column %d does not exist. Check input parameters\n' % col)
|
|
97
|
|
98 if int( options.end ) > 0:
|
|
99 fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ]
|
|
100 else:
|
|
101 fields[col - 1] = fields[col - 1][ int( options.start )-1 : ]
|
|
102 line = '\t'.join(fields)
|
|
103 print line
|
|
104
|
|
105 if __name__ == "__main__": main()
|
|
106
|