diff metafiles2pin.py @ 0:3a49065a05d6 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
author galaxyp
date Wed, 07 Dec 2016 16:43:51 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metafiles2pin.py	Wed Dec 07 16:43:51 2016 -0500
@@ -0,0 +1,54 @@
+import argparse
+import os
+import re
+from collections import OrderedDict
+
+
+def get_filename_index_with_identifier(spectrafiles, pool_id):
+    pool_indices = []
+    for index, fn in enumerate(spectrafiles):
+        if re.search(pool_id, fn) is not None:
+            pool_indices.append(index)
+    return pool_indices
+
+
+def get_perco_batches_from_spectrafiles(spectrafiles, batchsize, ppool_ids):
+    """For an amount of input spectra files, pool identifiers and a batch size,
+    return batches of files that can be percolated together"""
+    if ppool_ids:
+        filegroups = OrderedDict([(p_id, get_filename_index_with_identifier(
+                                   spectrafiles, p_id)) for p_id in ppool_ids])
+    else:
+        filegroups = {1: range(len(spectrafiles))}
+    batch = []
+    for grouped_indices in filegroups.values():
+        for index in grouped_indices:
+            batch.append(index)
+            if len(batch) == int(batchsize):
+                yield batch
+                batch = []
+        if len(batch) > 0:
+            yield batch
+            batch = []
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--batchsize', dest='batchsize')
+    parser.add_argument('--spectrafiles', dest='spectrafiles', nargs='+')
+    parser.add_argument('--searchfiles', dest='searchfiles', nargs='+')
+    parser.add_argument('--percolator-pool-ids', dest='percopoolids', nargs='+', default=False)
+    args = parser.parse_args()
+    outpath = os.path.join(os.getcwd(), 'metafiles')
+    os.makedirs(outpath)
+    for count, batch in enumerate(get_perco_batches_from_spectrafiles(
+            args.spectrafiles, args.batchsize, args.percopoolids)):
+        batchfiles = [args.searchfiles[index] for index in batch]
+        out_file = os.path.join(outpath, 'percolatorpool{}.meta2pin'.format(
+            str(count)))
+        with open(out_file, 'w') as fp:
+            fp.write('\n'.join(batchfiles))
+
+
+if __name__ == '__main__':
+    main()