Mercurial > repos > bgruening > split_file_to_collection
annotate split_file_to_collection.py @ 10:2dae863c8f42 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 5d21f3d6a3a84b4737a2091ae0d772471eb389dd
author | bgruening |
---|---|
date | Thu, 23 May 2024 15:03:47 +0000 |
parents | baabc30154cd |
children |
rev | line source |
---|---|
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
2 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
3 import argparse |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
4 import math |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
5 import os |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
6 import random |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
7 import re |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
8 |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
9 # configuration of the splitting for specific file types |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
10 # - regular expression matching the record separator ('' if not splitting by regex but by number of lines) |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
11 # - number of lines to split after (0 if not splitting by number of lines but regex) |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
12 # - a boolean indicating if the record separator is at the end of the record |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
13 # |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
14 # new file types can be added by appending to this dict, |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
15 # updating the parser, and adding a new type option in the Galaxy wrapper |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
16 FILETYPES = { |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
17 "fasta": (r"^>", 0, False), |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
18 "fastq": (r"", 4, False), |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
19 "tabular": (r"", 1, False), |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
20 "txt": (r"", 1, False), |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
21 "mgf": (r"^BEGIN IONS", 0, False), |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
22 "sdf": (r"\$\$\$\$", 0, True), |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
23 } |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
24 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
25 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
26 def main(): |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
27 ps = parser_cli() |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
28 args = vars(ps.parse_args()) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
29 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
30 # get args and validate |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
31 in_file = args["in"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
32 if not os.path.isfile(args["in"]): |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
33 raise FileNotFoundError("Input file does not exist") |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
34 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
35 out_dir = args["out_dir"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
36 if not os.path.isdir(args["out_dir"]): |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
37 raise FileNotFoundError("out_dir is not a directory") |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
38 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
39 top = args["top"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
40 if top < 0: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
41 raise ValueError("Number of header lines cannot be negative") |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
42 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
43 ftype = args["ftype"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
44 |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
45 assert ( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
46 ftype != "generic" or args["generic_re"] is not None |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
47 ), "--generic_re needs to be given for generic input" |
2
d150ac3d853d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
bgruening
parents:
0
diff
changeset
|
48 |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
49 if args["ftype"] == "tabular" and args["by"] == "col": |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
50 args["match"] = replace_mapped_chars(args["match"]) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
51 args["sub"] = replace_mapped_chars(args["sub"]) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
52 split_by_column(args, in_file, out_dir, top) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
53 else: |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
54 args["generic_re"] = replace_mapped_chars(args["generic_re"]) |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
55 split_by_record(args, in_file, out_dir, top, ftype) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
56 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
57 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
58 def parser_cli(): |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
59 parser = argparse.ArgumentParser( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
60 description="split a file into multiple files. " |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
61 + "Can split on the column of a tabular file, " |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
62 + "with custom and useful names based on column value." |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
63 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
64 parser.add_argument("--in", "-i", required=True, help="The input file") |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
65 parser.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
66 "--out_dir", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
67 "-o", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
68 default=os.getcwd(), |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
69 help="The output directory", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
70 required=True, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
71 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
72 parser.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
73 "--file_names", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
74 "-a", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
75 help="If not splitting by column, the base name of the new files", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
76 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
77 parser.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
78 "--file_ext", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
79 "-e", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
80 help="If not splitting by column," |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
81 + " the extension of the new files (without a period)", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
82 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
83 parser.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
84 "--ftype", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
85 "-f", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
86 help="The type of the file to split", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
87 required=True, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
88 choices=["mgf", "fastq", "fasta", "sdf", "tabular", "txt", "generic"], |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
89 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
90 parser.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
91 "--by", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
92 "-b", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
93 help="Split by line or by column (tabular only)", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
94 default="row", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
95 choices=["col", "row"], |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
96 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
97 parser.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
98 "--top", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
99 "-t", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
100 type=int, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
101 default=0, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
102 help="Number of header lines to carry over to new files.", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
103 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
104 parser.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
105 "--rand", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
106 "-r", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
107 help="Divide records randomly into new files", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
108 action="store_true", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
109 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
110 parser.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
111 "--seed", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
112 "-x", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
113 help="Provide a seed for the random number generator. " |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
114 + 'If not provided and args["rand"]==True, then date is used', |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
115 type=int, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
116 ) |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
117 group = parser.add_mutually_exclusive_group() |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
118 group.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
119 "--numnew", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
120 "-n", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
121 type=int, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
122 default=1, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
123 help="Number of output files desired. Not valid for splitting on a column. Not compatible with chunksize and will be ignored if both are set.", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
124 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
125 group.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
126 "--chunksize", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
127 "-k", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
128 type=int, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
129 default=0, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
130 help="Number of records by file. Not valid for splitting on a column", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
131 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
132 parser.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
133 "--batch", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
134 action="store_true", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
135 help="Distribute files to collection while maintaining order. Ignored if splitting on column.", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
136 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
137 generic = parser.add_argument_group("Arguments controling generic splitting") |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
138 group = generic.add_mutually_exclusive_group() |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
139 group.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
140 "--generic_re", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
141 "-g", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
142 default="", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
143 help="Regular expression indicating the start of a new record (only for generic)", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
144 required=False, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
145 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
146 group.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
147 "--generic_num", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
148 type=int, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
149 default=0, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
150 help="Length of records in number of lines (only for generic)", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
151 required=False, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
152 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
153 generic.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
154 "--split_after", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
155 "-p", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
156 action="store_true", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
157 help="Split between records after separator (default is before). " |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
158 + "Only for generic splitting by regex - specific ftypes are always split in the default way", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
159 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
160 bycol = parser.add_argument_group("If splitting on a column") |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
161 bycol.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
162 "--match", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
163 "-m", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
164 default="(.*)", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
165 help="The regular expression to match id column entries", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
166 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
167 bycol.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
168 "--sub", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
169 "-s", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
170 default=r"\1", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
171 help="The regular expression to substitute in for the matched pattern.", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
172 ) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
173 bycol.add_argument( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
174 "--id_column", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
175 "-c", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
176 default="1", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
177 help="Column that is used to name output files. Indexed starting from 1.", |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
178 type=int, |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
179 ) |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
180 return parser |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
181 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
182 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
183 def replace_mapped_chars(pattern): |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
184 """ |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
185 handles special escaped characters when coming from galaxy |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
186 """ |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
187 mapped_chars = {"'": "__sq__", "\\": "__backslash__"} |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
188 for key, value in mapped_chars.items(): |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
189 pattern = pattern.replace(value, key) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
190 return pattern |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
191 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
192 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
193 def split_by_record(args, in_file, out_dir, top, ftype): |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
194 # get configuration (record separator, start at end) for given filetype |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
195 sep, num, sep_at_end = FILETYPES.get( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
196 ftype, (args["generic_re"], args["generic_num"], args["split_after"]) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
197 ) |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
198 sep = re.compile(sep) |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
199 |
4
0850f2dfba13
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents:
3
diff
changeset
|
200 chunksize = args["chunksize"] |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
201 numnew = args["numnew"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
202 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
203 # random division |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
204 rand = args["rand"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
205 seed = args["seed"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
206 if seed: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
207 random.seed(seed) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
208 else: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
209 random.seed() |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
210 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
211 # batched division (maintains order) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
212 batch = args["batch"] |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
213 |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
214 # determine |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
215 # - the number of records that should be stored per file |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
216 # (done always, even if used only for batch mode) |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
217 # - if the separator is a the start / end of the record |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
218 n_per_file = math.inf |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
219 if ( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
220 chunksize != 0 or batch |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
221 ): # needs to be calculated if either batch or chunksize are selected |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
222 with open(in_file) as f: |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
223 # read header lines |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
224 for i in range(top): |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
225 f.readline() |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
226 n_records = 0 |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
227 last_line_matched = False |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
228 for line in f: |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
229 if (num == 0 and re.match(sep, line) is not None) or ( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
230 num > 0 and n_records % num == 0 |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
231 ): |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
232 n_records += 1 |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
233 last_line_matched = True |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
234 else: |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
235 last_line_matched = False |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
236 if sep_at_end and not last_line_matched: |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
237 n_records += 1 |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
238 |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
239 # if there are fewer records than desired files |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
240 numnew = min(numnew, n_records) |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
241 # approx. number of records per file |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
242 if chunksize == 0: # i.e. no chunking |
4
0850f2dfba13
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents:
3
diff
changeset
|
243 n_per_file = n_records // numnew |
0850f2dfba13
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents:
3
diff
changeset
|
244 else: |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
245 numnew = max(n_records // chunksize, 1) # should not be less than 1 |
4
0850f2dfba13
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents:
3
diff
changeset
|
246 n_per_file = chunksize |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
247 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
248 # make new files |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
249 # strip extension of old file and add number |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
250 custom_new_file_name = args["file_names"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
251 custom_new_file_ext = "." + args["file_ext"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
252 if custom_new_file_name is None: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
253 new_file_base = os.path.splitext(os.path.basename(in_file)) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
254 else: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
255 new_file_base = [custom_new_file_name, custom_new_file_ext] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
256 |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
257 newfile_names = [ |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
258 os.path.join(out_dir, "%s_%06d%s" % (new_file_base[0], count, new_file_base[1])) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
259 for count in range(0, numnew) |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
260 ] |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
261 # bunch o' counters |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
262 # index to list of new files |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
263 if rand: |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
264 new_file_counter = int(math.floor(random.random() * numnew)) |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
265 else: |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
266 new_file_counter = 0 |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
267 new_file = open(newfile_names[new_file_counter], "a") |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
268 # to contain header specified by top |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
269 header = "" |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
270 # keep track of the files that have been opened so far |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
271 fresh_files = set(range(numnew)) |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
272 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
273 # keep track in loop of number of records in each file |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
274 # only used in batch |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
275 records_in_file = 0 |
3
2ddc36385d7a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 8d069684e155d2f5b6fae06d14d98ce41321da53"
bgruening
parents:
2
diff
changeset
|
276 |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
277 # open file |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
278 with open(in_file, "r") as f: |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
279 # read header |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
280 for i in range(top): |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
281 header += f.readline() |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
282 |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
283 record = "" |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
284 for line_no, line in enumerate(f): |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
285 # check if beginning of line is record sep |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
286 # if beginning of line is record sep, either start record or finish one |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
287 if (num == 0 and re.match(sep, line) is not None) or ( |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
288 num > 0 and line_no % num == 0 |
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
289 ): |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
290 # this only happens first time through |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
291 if record == "": |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
292 record += line |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
293 else: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
294 # if is in fresh_files, write header and drop from freshFiles |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
295 if new_file_counter in fresh_files: |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
296 new_file.write(header) |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
297 fresh_files.remove(new_file_counter) |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
298 |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
299 if sep_at_end: |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
300 record += line |
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
301 # write record to file |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
302 new_file.write(record) |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
303 if not sep_at_end: |
4
0850f2dfba13
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents:
3
diff
changeset
|
304 record = line |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
305 else: |
4
0850f2dfba13
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents:
3
diff
changeset
|
306 record = "" |
5
e77b954f0da5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents:
4
diff
changeset
|
307 |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
308 # change destination file |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
309 if rand: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
310 new_file_counter = int(math.floor(random.random() * numnew)) |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
311 new_file.close() |
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
312 new_file = open(newfile_names[new_file_counter], "a") |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
313 elif batch: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
314 # number of records read per file |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
315 records_in_file += 1 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
316 # have we reached the max for each file? |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
317 # if so, switch file |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
318 if records_in_file >= n_per_file: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
319 new_file_counter = (new_file_counter + 1) % numnew |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
320 records_in_file = 0 # reset to 0 |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
321 new_file.close() |
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
322 new_file = open(newfile_names[new_file_counter], "a") |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
323 else: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
324 new_file_counter = (new_file_counter + 1) % numnew |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
325 new_file.close() |
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
326 new_file = open(newfile_names[new_file_counter], "a") |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
327 # if beginning of line is not record sep, we must be inside a record |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
328 # so just append |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
329 else: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
330 record += line |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
331 # after loop, write final record to file |
10
2dae863c8f42
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 5d21f3d6a3a84b4737a2091ae0d772471eb389dd
bgruening
parents:
9
diff
changeset
|
332 if new_file_counter in fresh_files: |
2dae863c8f42
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 5d21f3d6a3a84b4737a2091ae0d772471eb389dd
bgruening
parents:
9
diff
changeset
|
333 new_file.write(header) |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
334 new_file.write(record) |
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
335 new_file.close() |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
336 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
337 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
338 def split_by_column(args, in_file, out_dir, top): |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
339 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
340 # shift to 0-based indexing |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
341 id_col = int(args["id_column"]) - 1 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
342 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
343 try: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
344 match = re.compile(args["match"]) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
345 except re.error: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
346 print("ERROR: Match (-m) supplied is not valid regex.") |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
347 raise |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
348 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
349 sub = args["sub"] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
350 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
351 # set of file names |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
352 files = set() |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
353 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
354 # keep track of how many lines have been read |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
355 n_read = 0 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
356 header = "" |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
357 with open(in_file) as file: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
358 for line in file: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
359 # if still in top, save to header |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
360 n_read += 1 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
361 if n_read <= top: |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
362 header += line |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
363 continue |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
364 # split into columns, on tab |
9
baabc30154cd
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents:
8
diff
changeset
|
365 fields = re.split(r"\t", line.strip("\n")) |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
366 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
367 # get id column value |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
368 id_col_val = fields[id_col] |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
369 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
370 # use regex to get new file name |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
371 out_file_name = re.sub(match, sub, id_col_val) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
372 out_file_path = os.path.join(out_dir, out_file_name) |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
373 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
374 # write |
8
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
375 with open(out_file_path, "a") as current_new_file: |
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
376 if out_file_name not in files: |
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
377 current_new_file.write(header) |
6cbe2f30c2d7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents:
7
diff
changeset
|
378 files.add(out_file_name) |
7
0046692724f9
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6f78214d2c9d7786bfc9d8cbddac7d2613cd314e"
bgruening
parents:
6
diff
changeset
|
379 current_new_file.write(line) |
0
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
380 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
381 |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
382 if __name__ == "__main__": |
de3c2c88e710
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
383 main() |