# HG changeset patch
# User galaxyp
# Date 1493396736 14400
# Node ID 154147805a33bcef62c23c2930237d29c7aab9a2
# Parent abed51712ed0a889f72a2589f76ae70a1fdaa49c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
diff -r abed51712ed0 -r 154147805a33 nested_collection.py
--- a/nested_collection.py Sat Apr 08 08:23:12 2017 -0400
+++ b/nested_collection.py Fri Apr 28 12:25:36 2017 -0400
@@ -20,18 +20,19 @@
realnames, p_id)) for p_id in pool_ids])
else:
filegroups = {1: range(len(realnames))}
- batch = []
+ batch, in_pool_indices = [], []
for pool_id, grouped_indices in filegroups.items():
if pool_id == 1:
pool_id = 'pool0'
- for index in grouped_indices:
- batch.append(index)
+ for in_pool_index, total_index in enumerate(grouped_indices):
+ batch.append(total_index)
+ in_pool_indices.append(in_pool_index)
if batchsize and len(batch) == int(batchsize):
- yield pool_id, batch
- batch = []
+ yield pool_id, batch, in_pool_indices
+ batch, in_pool_indices = [], []
if len(batch) > 0:
- yield pool_id, batch
- batch = []
+ yield pool_id, batch, in_pool_indices
+ batch, in_pool_indices = [], []
def main():
@@ -41,12 +42,20 @@
parser.add_argument('--galaxy-files', dest='galaxyfiles', nargs='+')
parser.add_argument('--pool-ids', dest='poolids', nargs='+', default=False)
args = parser.parse_args()
- for batchcount, (pool_id, batch) in enumerate(get_batches_of_galaxyfiles(
- args.realnames, args.batchsize, args.poolids)):
- for fncount, batchfile in enumerate([args.galaxyfiles[index] for index in batch]):
- dsetname = '{}_batch{}___inputfn{}.data'.format(pool_id, batchcount, fncount)
+ batches = [x for x in get_batches_of_galaxyfiles(args.realnames, args.batchsize, args.poolids)]
+ batchdigits = len(str(len(batches)))
+ if args.poolids:
+ pooldigits = {pid: [] for pid in args.poolids}
+ for batchdata in batches:
+ pooldigits[batchdata[0]].append(len(batchdata[1]))
+ pooldigits = {pid: len(str(sum(batchlengths))) for pid, batchlengths in pooldigits.items()}
+ else:
+ pooldigits = {'pool0': len(str(len(args.galaxyfiles)))}
+ for batchcount, (pool_id, batch, in_pool_indices) in enumerate(batches):
+ for fnindex, in_pool_index in zip(batch, in_pool_indices):
+ dsetname = '{pid}_batch{bi:0{bd}d}___inputfn{fi:0{pd}d}_{real}.data'.format(pid=pool_id, bi=batchcount, bd=batchdigits, fi=in_pool_index, pd=pooldigits[pool_id], real=args.realnames[fnindex])
print('producing', dsetname)
- os.symlink(batchfile, dsetname)
+ os.symlink(args.galaxyfiles[fnindex], dsetname)
if __name__ == '__main__':
main()
diff -r abed51712ed0 -r 154147805a33 nested_collection.xml
--- a/nested_collection.xml Sat Apr 08 08:23:12 2017 -0400
+++ b/nested_collection.xml Fri Apr 28 12:25:36 2017 -0400
@@ -38,37 +38,51 @@
filetype == "mzid"
-
+
filetype == "percout"
-
+
filetype == "tabular"
-
+
-
+
-
+
+
+
+
+
+
+
+
-
-
+
+
+
+
+
+
-
-
+
+
+
+
+
@@ -85,21 +99,37 @@
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
@@ -124,18 +154,18 @@
-
-
+
+
-
+
-
-
+
+
-
+
diff -r abed51712ed0 -r 154147805a33 test-data/empty_file10.mzid
diff -r abed51712ed0 -r 154147805a33 test-data/empty_file11.mzid
diff -r abed51712ed0 -r 154147805a33 test-data/empty_file12.mzid
diff -r abed51712ed0 -r 154147805a33 test-data/empty_file13.mzid
diff -r abed51712ed0 -r 154147805a33 test-data/empty_file14.mzid
diff -r abed51712ed0 -r 154147805a33 test-data/empty_file7.mzid
diff -r abed51712ed0 -r 154147805a33 test-data/empty_file8.mzid
diff -r abed51712ed0 -r 154147805a33 test-data/empty_file9.mzid