Mercurial > repos > galaxyp > percolator
changeset 4:154147805a33 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
author | galaxyp |
---|---|
date | Fri, 28 Apr 2017 12:25:36 -0400 |
parents | abed51712ed0 |
children | dce55ca21b98 |
files | nested_collection.py nested_collection.xml test-data/empty_file10.mzid test-data/empty_file11.mzid test-data/empty_file12.mzid test-data/empty_file13.mzid test-data/empty_file14.mzid test-data/empty_file7.mzid test-data/empty_file8.mzid test-data/empty_file9.mzid |
diffstat | 2 files changed, 75 insertions(+), 36 deletions(-) [+] |
line wrap: on
line diff
--- a/nested_collection.py Sat Apr 08 08:23:12 2017 -0400 +++ b/nested_collection.py Fri Apr 28 12:25:36 2017 -0400 @@ -20,18 +20,19 @@ realnames, p_id)) for p_id in pool_ids]) else: filegroups = {1: range(len(realnames))} - batch = [] + batch, in_pool_indices = [], [] for pool_id, grouped_indices in filegroups.items(): if pool_id == 1: pool_id = 'pool0' - for index in grouped_indices: - batch.append(index) + for in_pool_index, total_index in enumerate(grouped_indices): + batch.append(total_index) + in_pool_indices.append(in_pool_index) if batchsize and len(batch) == int(batchsize): - yield pool_id, batch - batch = [] + yield pool_id, batch, in_pool_indices + batch, in_pool_indices = [], [] if len(batch) > 0: - yield pool_id, batch - batch = [] + yield pool_id, batch, in_pool_indices + batch, in_pool_indices = [], [] def main(): @@ -41,12 +42,20 @@ parser.add_argument('--galaxy-files', dest='galaxyfiles', nargs='+') parser.add_argument('--pool-ids', dest='poolids', nargs='+', default=False) args = parser.parse_args() - for batchcount, (pool_id, batch) in enumerate(get_batches_of_galaxyfiles( - args.realnames, args.batchsize, args.poolids)): - for fncount, batchfile in enumerate([args.galaxyfiles[index] for index in batch]): - dsetname = '{}_batch{}___inputfn{}.data'.format(pool_id, batchcount, fncount) + batches = [x for x in get_batches_of_galaxyfiles(args.realnames, args.batchsize, args.poolids)] + batchdigits = len(str(len(batches))) + if args.poolids: + pooldigits = {pid: [] for pid in args.poolids} + for batchdata in batches: + pooldigits[batchdata[0]].append(len(batchdata[1])) + pooldigits = {pid: len(str(sum(batchlengths))) for pid, batchlengths in pooldigits.items()} + else: + pooldigits = {'pool0': len(str(len(args.galaxyfiles)))} + for batchcount, (pool_id, batch, in_pool_indices) in enumerate(batches): + for fnindex, in_pool_index in zip(batch, in_pool_indices): + dsetname = '{pid}_batch{bi:0{bd}d}___inputfn{fi:0{pd}d}_{real}.data'.format(pid=pool_id, bi=batchcount, bd=batchdigits, fi=in_pool_index, pd=pooldigits[pool_id], real=args.realnames[fnindex]) print('producing', dsetname) - os.symlink(batchfile, dsetname) + os.symlink(args.galaxyfiles[fnindex], dsetname) if __name__ == '__main__': main()
--- a/nested_collection.xml Sat Apr 08 08:23:12 2017 -0400 +++ b/nested_collection.xml Fri Apr 28 12:25:36 2017 -0400 @@ -38,37 +38,51 @@ <outputs> <collection name="batched_fractions_mzid" type="list:list" label="Pooled batched mzIdentML data"> <filter>filetype == "mzid"</filter> - <discover_datasets pattern="(?P<identifier_0>\w+[^_][^_][^_])___(?P<identifier_1>[^_]+)\.data" ext="mzid" visible="false" /> + <discover_datasets pattern="(?P<identifier_0>\w+[^_][^_][^_])___(?P<identifier_1>[\w.]+)\.data" ext="mzid" visible="false" /> </collection> <collection name="batched_fractions_perco" type="list:list" label="Pooled batched percolator data"> <filter>filetype == "percout"</filter> - <discover_datasets pattern="(?P<identifier_0>\w+[^_][^_][^_])___(?P<identifier_1>[^_]+)\.data" ext="percout" visible="false" /> + <discover_datasets pattern="(?P<identifier_0>\w+[^_][^_][^_])___(?P<identifier_1>[\w.]+)\.data" ext="percout" visible="false" /> </collection> <collection name="batched_fractions_tab" type="list:list" label="Pooled batched tabular data"> <filter>filetype == "tabular"</filter> - <discover_datasets pattern="(?P<identifier_0>\w+[^_][^_][^_])___(?P<identifier_1>[^_]+)\.data" ext="tabular" visible="false" /> + <discover_datasets pattern="(?P<identifier_0>\w+[^_][^_][^_])___(?P<identifier_1>[\w.]+)\.data" ext="tabular" visible="false" /> </collection> </outputs> <tests> <test> - <param name="batchsize" value="2"/> + <param name="batchsize" value="6"/> <param name="filetype" value="mzid" /> <param name="listtobatch"> <collection type="list"> - <element name="fraction_one_spectra" value="empty_file1.mzid"/> + <element name="fraction_one_spectra.mzML" value="empty_file1.mzid"/> <element name="fraction_two_spectra" value="empty_file2.mzid"/> <element name="fraction_three_spectra" value="empty_file3.mzid"/> <element name="fraction_four_spectra" value="empty_file4.mzid"/> + <element name="fraction_five_spectra" value="empty_file5.mzid"/> + <element name="fraction_six_spectra" value="empty_file6.mzid"/> + <element name="fraction_seven_spectra" value="empty_file7.mzid"/> + <element name="fraction_eight_spectra" value="empty_file8.mzid"/> + <element name="fraction_nine_spectra" value="empty_file9.mzid"/> + <element name="fraction_ten_spectra" value="empty_file10.mzid"/> + <element name="fraction_eleven_spectra" value="empty_file11.mzid"/> </collection> </param> <output_collection name="batched_fractions_mzid" type="list:list"> <element name="pool0_batch0"> - <element name="inputfn0" ftype="mzid" file="empty_file1.mzid"/> - <element name="inputfn1" ftype="mzid" file="empty_file2.mzid"/> + <element name="inputfn00_fraction_one_spectra.mzML" ftype="mzid" file="empty_file1.mzid"/> + <element name="inputfn01_fraction_two_spectra" ftype="mzid" file="empty_file2.mzid"/> + <element name="inputfn02_fraction_three_spectra" ftype="mzid" file="empty_file3.mzid"/> + <element name="inputfn03_fraction_four_spectra" ftype="mzid" file="empty_file4.mzid"/> + <element name="inputfn04_fraction_five_spectra" ftype="mzid" file="empty_file5.mzid"/> + <element name="inputfn05_fraction_six_spectra" ftype="mzid" file="empty_file6.mzid"/> </element> <element name="pool0_batch1"> - <element name="inputfn0" ftype="mzid" file="empty_file3.mzid"/> - <element name="inputfn1" ftype="mzid" file="empty_file4.mzid"/> + <element name="inputfn06_fraction_seven_spectra" ftype="mzid" file="empty_file7.mzid"/> + <element name="inputfn07_fraction_eight_spectra" ftype="mzid" file="empty_file8.mzid"/> + <element name="inputfn08_fraction_nine_spectra" ftype="mzid" file="empty_file9.mzid"/> + <element name="inputfn09_fraction_ten_spectra" ftype="mzid" file="empty_file10.mzid"/> + <element name="inputfn10_fraction_eleven_spectra" ftype="mzid" file="empty_file10.mzid"/> </element> </output_collection> </test> @@ -85,21 +99,37 @@ <element name="fr_one_set1_spectra" value="empty_file1.mzid"/> <element name="fr_two_set1_spectra" value="empty_file2.mzid"/> <element name="fr_three_set1_spectra" value="empty_file3.mzid"/> - <element name="fr_one_set2_spectra" value="empty_file4.mzid"/> - <element name="fr_two_set2_spectra" value="empty_file5.mzid"/> - <element name="fr_three_set2_spectra" value="empty_file6.mzid"/> + <element name="fr_four_set1_spectra" value="empty_file4.mzid"/> + <element name="fr_five_set1_spectra" value="empty_file5.mzid"/> + <element name="fr_six_set1_spectra" value="empty_file6.mzid"/> + <element name="fr_seven_set1_spectra" value="empty_file7.mzid"/> + <element name="fr_eight_set1_spectra" value="empty_file8.mzid"/> + <element name="fr_nine_set1_spectra" value="empty_file9.mzid"/> + <element name="fr_ten_set1_spectra" value="empty_file10.mzid"/> + <element name="fr_eleven_set1_spectra" value="empty_file11.mzid"/> + <element name="fr_one_set2_spectra" value="empty_file12.mzid"/> + <element name="fr_two_set2_spectra" value="empty_file13.mzid"/> + <element name="fr_three_set2_spectra" value="empty_file14.mzid"/> </collection> </param> <output_collection name="batched_fractions_tab" type="list:list"> <element name="set1_batch0"> - <element name="inputfn0" ftype="tabular" file="empty_file1.mzid"/> - <element name="inputfn1" ftype="tabular" file="empty_file2.mzid"/> - <element name="inputfn2" ftype="tabular" file="empty_file3.mzid"/> + <element name="inputfn00_fr_one_set1_spectra" ftype="tabular" file="empty_file1.mzid"/> + <element name="inputfn01_fr_two_set1_spectra" ftype="tabular" file="empty_file2.mzid"/> + <element name="inputfn02_fr_three_set1_spectra" ftype="tabular" file="empty_file3.mzid"/> + <element name="inputfn03_fr_four_set1_spectra" ftype="tabular" file="empty_file4.mzid"/> + <element name="inputfn04_fr_five_set1_spectra" ftype="tabular" file="empty_file5.mzid"/> + <element name="inputfn05_fr_six_set1_spectra" ftype="tabular" file="empty_file6.mzid"/> + <element name="inputfn06_fr_seven_set1_spectra" ftype="tabular" file="empty_file7.mzid"/> + <element name="inputfn07_fr_eight_set1_spectra" ftype="tabular" file="empty_file8.mzid"/> + <element name="inputfn08_fr_nine_set1_spectra" ftype="tabular" file="empty_file9.mzid"/> + <element name="inputfn09_fr_ten_set1_spectra" ftype="tabular" file="empty_file10.mzid"/> + <element name="inputfn10_fr_eleven_set1_spectra" ftype="tabular" file="empty_file11.mzid"/> </element> <element name="set2_batch1"> - <element name="inputfn0" ftype="tabular" file="empty_file4.mzid"/> - <element name="inputfn1" ftype="tabular" file="empty_file5.mzid"/> - <element name="inputfn2" ftype="tabular" file="empty_file6.mzid"/> + <element name="inputfn0_fr_one_set2_spectra" ftype="tabular" file="empty_file12.mzid"/> + <element name="inputfn1_fr_two_set2_spectra" ftype="tabular" file="empty_file13.mzid"/> + <element name="inputfn2_fr_three_set2_spectra" ftype="tabular" file="empty_file14.mzid"/> </element> </output_collection> </test> @@ -124,18 +154,18 @@ </param> <output_collection name="batched_fractions_perco" type="list:list"> <element name="set1_batch0"> - <element name="inputfn0" ftype="percout" file="empty_file1.mzid"/> - <element name="inputfn1" ftype="percout" file="empty_file2.mzid"/> + <element name="inputfn0_fr_one_set1_spectra" ftype="percout" file="empty_file1.mzid"/> + <element name="inputfn1_fr_two_set1_spectra" ftype="percout" file="empty_file2.mzid"/> </element> <element name="set1_batch1"> - <element name="inputfn0" ftype="percout" file="empty_file3.mzid"/> + <element name="inputfn2_fr_three_set1_spectra" ftype="percout" file="empty_file3.mzid"/> </element> <element name="set2_batch2"> - <element name="inputfn0" ftype="percout" file="empty_file4.mzid"/> - <element name="inputfn1" ftype="percout" file="empty_file5.mzid"/> + <element name="inputfn0_fr_one_set2_spectra" ftype="percout" file="empty_file4.mzid"/> + <element name="inputfn1_fr_two_set2_spectra" ftype="percout" file="empty_file5.mzid"/> </element> <element name="set2_batch3"> - <element name="inputfn0" ftype="percout" file="empty_file6.mzid"/> + <element name="inputfn2_fr_three_set2_spectra" ftype="percout" file="empty_file6.mzid"/> </element> </output_collection> </test>