annotate metafiles2pin.py @ 0:3a49065a05d6 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
author galaxyp
date Wed, 07 Dec 2016 16:43:51 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
1 import argparse
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
2 import os
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
3 import re
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
4 from collections import OrderedDict
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
5
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
6
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
7 def get_filename_index_with_identifier(spectrafiles, pool_id):
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
8 pool_indices = []
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
9 for index, fn in enumerate(spectrafiles):
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
10 if re.search(pool_id, fn) is not None:
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
11 pool_indices.append(index)
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
12 return pool_indices
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
13
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
14
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
15 def get_perco_batches_from_spectrafiles(spectrafiles, batchsize, ppool_ids):
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
16 """For an amount of input spectra files, pool identifiers and a batch size,
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
17 return batches of files that can be percolated together"""
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
18 if ppool_ids:
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier(
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
20 spectrafiles, p_id)) for p_id in ppool_ids])
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
21 else:
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
22 filegroups = {1: range(len(spectrafiles))}
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
23 batch = []
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
24 for grouped_indices in filegroups.values():
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
25 for index in grouped_indices:
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
26 batch.append(index)
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
27 if len(batch) == int(batchsize):
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
28 yield batch
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
29 batch = []
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
30 if len(batch) > 0:
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
31 yield batch
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
32 batch = []
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
33
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
34
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
35 def main():
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
36 parser = argparse.ArgumentParser()
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
37 parser.add_argument('--batchsize', dest='batchsize')
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
38 parser.add_argument('--spectrafiles', dest='spectrafiles', nargs='+')
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
39 parser.add_argument('--searchfiles', dest='searchfiles', nargs='+')
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
40 parser.add_argument('--percolator-pool-ids', dest='percopoolids', nargs='+', default=False)
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
41 args = parser.parse_args()
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
42 outpath = os.path.join(os.getcwd(), 'metafiles')
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
43 os.makedirs(outpath)
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
44 for count, batch in enumerate(get_perco_batches_from_spectrafiles(
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
45 args.spectrafiles, args.batchsize, args.percopoolids)):
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
46 batchfiles = [args.searchfiles[index] for index in batch]
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
47 out_file = os.path.join(outpath, 'percolatorpool{}.meta2pin'.format(
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
48 str(count)))
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
49 with open(out_file, 'w') as fp:
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
50 fp.write('\n'.join(batchfiles))
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
51
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
52
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
53 if __name__ == '__main__':
3a49065a05d6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
54 main()