comparison hcluster_sg_parser.py @ 1:17aa68582a05 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
author earlhaminst
date Fri, 20 Jan 2017 06:13:23 -0500
parents
children f9e418125021
comparison
equal deleted inserted replaced
0:dbc49bd1a3e9 1:17aa68582a05
1 """
2 A simple parser to convert the hcluster_sg 3-column output into lists of IDs, one list for each cluster.
3
4 When a minimum and/or maximum number of cluster elements are specified, the IDs contained in the filtered-out clusters are collected in the "discarded IDS" output dataset.
5
6 Usage:
7
8 python hcluster_sg_parser.py [-m <N>] [-M <N>] <file> <discarded_out>
9 """
10 import optparse
11 import sys
12
13
14 def main():
15 parser = optparse.OptionParser()
16 parser.add_option('-m', '--min', type='int', default=0, help='Minimum number of cluster elements')
17 parser.add_option('-M', '--max', type='int', default=sys.maxsize, help='Maximum number of cluster elements')
18 options, args = parser.parse_args()
19
20 with open(args[1], 'w') as discarded_out:
21 with open(args[0]) as fh:
22 for line in fh:
23 line = line.rstrip()
24 (cluster_id, n_ids, id_list) = line.split('\t')
25 n_ids = int(n_ids)
26 id_list = id_list.replace(',', '\n')
27 if n_ids >= options.min and n_ids <= options.max:
28 outfile = cluster_id + '_output.txt'
29 with open(outfile, 'w') as f:
30 f.write(id_list)
31 else:
32 discarded_out.write(id_list)
33
34
35 if __name__ == "__main__":
36 main()