annotate nested_collection.py @ 2:7a0951d0e13e draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 4ca7fcd1587c906db3314048a223d23b63b3f038
author galaxyp
date Fri, 10 Mar 2017 03:20:52 -0500
parents 86770eea5b09
children abed51712ed0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
1 import argparse
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
2 import os
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
3 import re
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
4 from collections import OrderedDict
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
5
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
6
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
7 def get_filename_index_with_identifier(realnames, pool_id):
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
8 pool_indices = []
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
9 for index, fn in enumerate(realnames):
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
10 if re.search(pool_id, fn) is not None:
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
11 pool_indices.append(index)
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
12 return pool_indices
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
13
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
14
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
15 def get_batches_of_galaxyfiles(realnames, batchsize, pool_ids):
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
16 """For an amount of input files, pool identifiers and a batch size,
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
17 return batches of files for a list of lists"""
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
18 if pool_ids:
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier(
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
20 realnames, p_id)) for p_id in pool_ids])
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
21 else:
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
22 filegroups = {1: range(len(realnames))}
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
23 batch = []
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
24 for pool_id, grouped_indices in filegroups.items():
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
25 if pool_id == 1:
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
26 pool_id = 'pool0'
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
27 for index in grouped_indices:
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
28 batch.append(index)
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
29 if batchsize and len(batch) == int(batchsize):
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
30 yield pool_id, batch
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
31 batch = []
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
32 if len(batch) > 0:
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
33 yield pool_id, batch
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
34 batch = []
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
35
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
36
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
37 def main():
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
38 parser = argparse.ArgumentParser()
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
39 parser.add_argument('--batchsize', dest='batchsize', default=False)
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
40 parser.add_argument('--real-names', dest='realnames', nargs='+')
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
41 parser.add_argument('--galaxy-files', dest='galaxyfiles', nargs='+')
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
42 parser.add_argument('--pool-ids', dest='poolids', nargs='+', default=False)
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
43 args = parser.parse_args()
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
44 for batchcount, (pool_id, batch) in enumerate(get_batches_of_galaxyfiles(
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
45 args.realnames, args.batchsize, args.poolids)):
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
46 for fncount, batchfile in enumerate([args.galaxyfiles[index] for index in batch]):
2
7a0951d0e13e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 4ca7fcd1587c906db3314048a223d23b63b3f038
galaxyp
parents: 1
diff changeset
47 dsetname = '{}_batch{}___inputfn{}.mzid'.format(pool_id, batchcount, fncount)
1
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
48 print('producing', dsetname)
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
49 os.symlink(batchfile, dsetname)
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
50
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
51 if __name__ == '__main__':
86770eea5b09 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff changeset
52 main()