Mercurial > repos > xuebing > sharplabtool
diff tools/filters/randomlines.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/filters/randomlines.py Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Kanwei Li, 2010 +# Selects N random lines from a file and outputs to another file + +import random, sys + +def main(): + infile = open(sys.argv[1], 'r') + total_lines = int(sys.argv[2]) + + if total_lines < 1: + sys.stderr.write( "Must select at least one line." ) + sys.exit() + + kept = [] + n = 0 + for line in infile: + line = line.rstrip("\n") + n += 1 + if (n <= total_lines): + kept.append(line) + elif random.randint(1, n) <= total_lines: + kept.pop(random.randint(0, total_lines-1)) + kept.append(line) + + if n < total_lines: + sys.stderr.write( "Error: asked to select more lines than there were in the file." ) + sys.exit() + + open(sys.argv[3], 'w').write( "\n".join(kept) ) + +if __name__ == "__main__": + main()