annotate tools/filters/randomlines.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 # Kanwei Li, 2010
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 # Selects N random lines from a file and outputs to another file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 import random, sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 infile = open(sys.argv[1], 'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 total_lines = int(sys.argv[2])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 if total_lines < 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 sys.stderr.write( "Must select at least one line." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 kept = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 n = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 for line in infile:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 line = line.rstrip("\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 n += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 if (n <= total_lines):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 kept.append(line)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 elif random.randint(1, n) <= total_lines:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 kept.pop(random.randint(0, total_lines-1))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 kept.append(line)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 if n < total_lines:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 sys.stderr.write( "Error: asked to select more lines than there were in the file." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 open(sys.argv[3], 'w').write( "\n".join(kept) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 main()