comparison nested_collection.py @ 1:86770eea5b09 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
author galaxyp
date Sat, 04 Mar 2017 20:36:03 -0500
parents
children 7a0951d0e13e
comparison
equal deleted inserted replaced
0:3a49065a05d6 1:86770eea5b09
1 import argparse
2 import os
3 import re
4 from collections import OrderedDict
5
6
7 def get_filename_index_with_identifier(realnames, pool_id):
8 pool_indices = []
9 for index, fn in enumerate(realnames):
10 if re.search(pool_id, fn) is not None:
11 pool_indices.append(index)
12 return pool_indices
13
14
15 def get_batches_of_galaxyfiles(realnames, batchsize, pool_ids):
16 """For an amount of input files, pool identifiers and a batch size,
17 return batches of files for a list of lists"""
18 if pool_ids:
19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier(
20 realnames, p_id)) for p_id in pool_ids])
21 else:
22 filegroups = {1: range(len(realnames))}
23 batch = []
24 for pool_id, grouped_indices in filegroups.items():
25 if pool_id == 1:
26 pool_id = 'pool0'
27 for index in grouped_indices:
28 batch.append(index)
29 if batchsize and len(batch) == int(batchsize):
30 yield pool_id, batch
31 batch = []
32 if len(batch) > 0:
33 yield pool_id, batch
34 batch = []
35
36
37 def main():
38 parser = argparse.ArgumentParser()
39 parser.add_argument('--batchsize', dest='batchsize', default=False)
40 parser.add_argument('--real-names', dest='realnames', nargs='+')
41 parser.add_argument('--galaxy-files', dest='galaxyfiles', nargs='+')
42 parser.add_argument('--pool-ids', dest='poolids', nargs='+', default=False)
43 args = parser.parse_args()
44 for batchcount, (pool_id, batch) in enumerate(get_batches_of_galaxyfiles(
45 args.realnames, args.batchsize, args.poolids)):
46 for fncount, batchfile in enumerate([args.galaxyfiles[index] for index in batch]):
47 dsetname = '{}___batch{}_inputfn{}.mzid'.format(pool_id, batchcount, fncount)
48 print('producing', dsetname)
49 os.symlink(batchfile, dsetname)
50
51 if __name__ == '__main__':
52 main()