annotate split_file_to_collection.py @ 10:2dae863c8f42 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 5d21f3d6a3a84b4737a2091ae0d772471eb389dd
author bgruening
date Thu, 23 May 2024 15:03:47 +0000
parents baabc30154cd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
2
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
3 import argparse
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
4 import math
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
5 import os
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
6 import random
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
7 import re
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
8
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
9 # configuration of the splitting for specific file types
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
10 # - regular expression matching the record separator ('' if not splitting by regex but by number of lines)
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
11 # - number of lines to split after (0 if not splitting by number of lines but regex)
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
12 # - a boolean indicating if the record separator is at the end of the record
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
13 #
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
14 # new file types can be added by appending to this dict,
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
15 # updating the parser, and adding a new type option in the Galaxy wrapper
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
16 FILETYPES = {
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
17 "fasta": (r"^>", 0, False),
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
18 "fastq": (r"", 4, False),
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
19 "tabular": (r"", 1, False),
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
20 "txt": (r"", 1, False),
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
21 "mgf": (r"^BEGIN IONS", 0, False),
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
22 "sdf": (r"\$\$\$\$", 0, True),
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
23 }
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
24
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
25
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
26 def main():
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
27 ps = parser_cli()
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
28 args = vars(ps.parse_args())
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
29
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
30 # get args and validate
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
31 in_file = args["in"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
32 if not os.path.isfile(args["in"]):
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
33 raise FileNotFoundError("Input file does not exist")
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
34
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
35 out_dir = args["out_dir"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
36 if not os.path.isdir(args["out_dir"]):
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
37 raise FileNotFoundError("out_dir is not a directory")
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
38
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
39 top = args["top"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
40 if top < 0:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
41 raise ValueError("Number of header lines cannot be negative")
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
42
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
43 ftype = args["ftype"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
44
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
45 assert (
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
46 ftype != "generic" or args["generic_re"] is not None
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
47 ), "--generic_re needs to be given for generic input"
2
d150ac3d853d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
bgruening
parents: 0
diff changeset
48
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
49 if args["ftype"] == "tabular" and args["by"] == "col":
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
50 args["match"] = replace_mapped_chars(args["match"])
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
51 args["sub"] = replace_mapped_chars(args["sub"])
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
52 split_by_column(args, in_file, out_dir, top)
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
53 else:
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
54 args["generic_re"] = replace_mapped_chars(args["generic_re"])
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
55 split_by_record(args, in_file, out_dir, top, ftype)
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
56
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
57
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
58 def parser_cli():
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
59 parser = argparse.ArgumentParser(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
60 description="split a file into multiple files. "
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
61 + "Can split on the column of a tabular file, "
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
62 + "with custom and useful names based on column value."
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
63 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
64 parser.add_argument("--in", "-i", required=True, help="The input file")
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
65 parser.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
66 "--out_dir",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
67 "-o",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
68 default=os.getcwd(),
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
69 help="The output directory",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
70 required=True,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
71 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
72 parser.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
73 "--file_names",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
74 "-a",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
75 help="If not splitting by column, the base name of the new files",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
76 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
77 parser.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
78 "--file_ext",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
79 "-e",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
80 help="If not splitting by column,"
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
81 + " the extension of the new files (without a period)",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
82 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
83 parser.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
84 "--ftype",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
85 "-f",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
86 help="The type of the file to split",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
87 required=True,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
88 choices=["mgf", "fastq", "fasta", "sdf", "tabular", "txt", "generic"],
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
89 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
90 parser.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
91 "--by",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
92 "-b",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
93 help="Split by line or by column (tabular only)",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
94 default="row",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
95 choices=["col", "row"],
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
96 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
97 parser.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
98 "--top",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
99 "-t",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
100 type=int,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
101 default=0,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
102 help="Number of header lines to carry over to new files.",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
103 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
104 parser.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
105 "--rand",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
106 "-r",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
107 help="Divide records randomly into new files",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
108 action="store_true",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
109 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
110 parser.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
111 "--seed",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
112 "-x",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
113 help="Provide a seed for the random number generator. "
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
114 + 'If not provided and args["rand"]==True, then date is used',
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
115 type=int,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
116 )
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
117 group = parser.add_mutually_exclusive_group()
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
118 group.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
119 "--numnew",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
120 "-n",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
121 type=int,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
122 default=1,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
123 help="Number of output files desired. Not valid for splitting on a column. Not compatible with chunksize and will be ignored if both are set.",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
124 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
125 group.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
126 "--chunksize",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
127 "-k",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
128 type=int,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
129 default=0,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
130 help="Number of records by file. Not valid for splitting on a column",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
131 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
132 parser.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
133 "--batch",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
134 action="store_true",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
135 help="Distribute files to collection while maintaining order. Ignored if splitting on column.",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
136 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
137 generic = parser.add_argument_group("Arguments controling generic splitting")
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
138 group = generic.add_mutually_exclusive_group()
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
139 group.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
140 "--generic_re",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
141 "-g",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
142 default="",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
143 help="Regular expression indicating the start of a new record (only for generic)",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
144 required=False,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
145 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
146 group.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
147 "--generic_num",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
148 type=int,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
149 default=0,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
150 help="Length of records in number of lines (only for generic)",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
151 required=False,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
152 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
153 generic.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
154 "--split_after",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
155 "-p",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
156 action="store_true",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
157 help="Split between records after separator (default is before). "
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
158 + "Only for generic splitting by regex - specific ftypes are always split in the default way",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
159 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
160 bycol = parser.add_argument_group("If splitting on a column")
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
161 bycol.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
162 "--match",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
163 "-m",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
164 default="(.*)",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
165 help="The regular expression to match id column entries",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
166 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
167 bycol.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
168 "--sub",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
169 "-s",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
170 default=r"\1",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
171 help="The regular expression to substitute in for the matched pattern.",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
172 )
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
173 bycol.add_argument(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
174 "--id_column",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
175 "-c",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
176 default="1",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
177 help="Column that is used to name output files. Indexed starting from 1.",
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
178 type=int,
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
179 )
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
180 return parser
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
181
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
182
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
183 def replace_mapped_chars(pattern):
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
184 """
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
185 handles special escaped characters when coming from galaxy
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
186 """
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
187 mapped_chars = {"'": "__sq__", "\\": "__backslash__"}
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
188 for key, value in mapped_chars.items():
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
189 pattern = pattern.replace(value, key)
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
190 return pattern
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
191
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
192
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
193 def split_by_record(args, in_file, out_dir, top, ftype):
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
194 # get configuration (record separator, start at end) for given filetype
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
195 sep, num, sep_at_end = FILETYPES.get(
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
196 ftype, (args["generic_re"], args["generic_num"], args["split_after"])
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
197 )
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
198 sep = re.compile(sep)
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
199
4
0850f2dfba13 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents: 3
diff changeset
200 chunksize = args["chunksize"]
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
201 numnew = args["numnew"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
202
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
203 # random division
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
204 rand = args["rand"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
205 seed = args["seed"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
206 if seed:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
207 random.seed(seed)
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
208 else:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
209 random.seed()
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
210
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
211 # batched division (maintains order)
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
212 batch = args["batch"]
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
213
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
214 # determine
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
215 # - the number of records that should be stored per file
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
216 # (done always, even if used only for batch mode)
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
217 # - if the separator is a the start / end of the record
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
218 n_per_file = math.inf
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
219 if (
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
220 chunksize != 0 or batch
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
221 ): # needs to be calculated if either batch or chunksize are selected
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
222 with open(in_file) as f:
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
223 # read header lines
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
224 for i in range(top):
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
225 f.readline()
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
226 n_records = 0
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
227 last_line_matched = False
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
228 for line in f:
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
229 if (num == 0 and re.match(sep, line) is not None) or (
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
230 num > 0 and n_records % num == 0
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
231 ):
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
232 n_records += 1
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
233 last_line_matched = True
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
234 else:
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
235 last_line_matched = False
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
236 if sep_at_end and not last_line_matched:
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
237 n_records += 1
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
238
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
239 # if there are fewer records than desired files
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
240 numnew = min(numnew, n_records)
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
241 # approx. number of records per file
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
242 if chunksize == 0: # i.e. no chunking
4
0850f2dfba13 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents: 3
diff changeset
243 n_per_file = n_records // numnew
0850f2dfba13 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents: 3
diff changeset
244 else:
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
245 numnew = max(n_records // chunksize, 1) # should not be less than 1
4
0850f2dfba13 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents: 3
diff changeset
246 n_per_file = chunksize
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
247
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
248 # make new files
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
249 # strip extension of old file and add number
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
250 custom_new_file_name = args["file_names"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
251 custom_new_file_ext = "." + args["file_ext"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
252 if custom_new_file_name is None:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
253 new_file_base = os.path.splitext(os.path.basename(in_file))
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
254 else:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
255 new_file_base = [custom_new_file_name, custom_new_file_ext]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
256
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
257 newfile_names = [
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
258 os.path.join(out_dir, "%s_%06d%s" % (new_file_base[0], count, new_file_base[1]))
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
259 for count in range(0, numnew)
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
260 ]
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
261 # bunch o' counters
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
262 # index to list of new files
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
263 if rand:
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
264 new_file_counter = int(math.floor(random.random() * numnew))
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
265 else:
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
266 new_file_counter = 0
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
267 new_file = open(newfile_names[new_file_counter], "a")
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
268 # to contain header specified by top
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
269 header = ""
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
270 # keep track of the files that have been opened so far
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
271 fresh_files = set(range(numnew))
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
272
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
273 # keep track in loop of number of records in each file
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
274 # only used in batch
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
275 records_in_file = 0
3
2ddc36385d7a "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 8d069684e155d2f5b6fae06d14d98ce41321da53"
bgruening
parents: 2
diff changeset
276
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
277 # open file
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
278 with open(in_file, "r") as f:
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
279 # read header
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
280 for i in range(top):
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
281 header += f.readline()
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
282
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
283 record = ""
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
284 for line_no, line in enumerate(f):
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
285 # check if beginning of line is record sep
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
286 # if beginning of line is record sep, either start record or finish one
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
287 if (num == 0 and re.match(sep, line) is not None) or (
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
288 num > 0 and line_no % num == 0
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
289 ):
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
290 # this only happens first time through
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
291 if record == "":
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
292 record += line
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
293 else:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
294 # if is in fresh_files, write header and drop from freshFiles
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
295 if new_file_counter in fresh_files:
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
296 new_file.write(header)
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
297 fresh_files.remove(new_file_counter)
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
298
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
299 if sep_at_end:
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
300 record += line
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
301 # write record to file
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
302 new_file.write(record)
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
303 if not sep_at_end:
4
0850f2dfba13 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents: 3
diff changeset
304 record = line
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
305 else:
4
0850f2dfba13 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 872590086696cfbc248519507ecb9063926297ad"
bgruening
parents: 3
diff changeset
306 record = ""
5
e77b954f0da5 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 4
diff changeset
307
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
308 # change destination file
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
309 if rand:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
310 new_file_counter = int(math.floor(random.random() * numnew))
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
311 new_file.close()
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
312 new_file = open(newfile_names[new_file_counter], "a")
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
313 elif batch:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
314 # number of records read per file
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
315 records_in_file += 1
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
316 # have we reached the max for each file?
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
317 # if so, switch file
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
318 if records_in_file >= n_per_file:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
319 new_file_counter = (new_file_counter + 1) % numnew
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
320 records_in_file = 0 # reset to 0
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
321 new_file.close()
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
322 new_file = open(newfile_names[new_file_counter], "a")
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
323 else:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
324 new_file_counter = (new_file_counter + 1) % numnew
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
325 new_file.close()
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
326 new_file = open(newfile_names[new_file_counter], "a")
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
327 # if beginning of line is not record sep, we must be inside a record
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
328 # so just append
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
329 else:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
330 record += line
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
331 # after loop, write final record to file
10
2dae863c8f42 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 5d21f3d6a3a84b4737a2091ae0d772471eb389dd
bgruening
parents: 9
diff changeset
332 if new_file_counter in fresh_files:
2dae863c8f42 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 5d21f3d6a3a84b4737a2091ae0d772471eb389dd
bgruening
parents: 9
diff changeset
333 new_file.write(header)
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
334 new_file.write(record)
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
335 new_file.close()
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
336
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
337
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
338 def split_by_column(args, in_file, out_dir, top):
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
339
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
340 # shift to 0-based indexing
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
341 id_col = int(args["id_column"]) - 1
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
342
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
343 try:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
344 match = re.compile(args["match"])
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
345 except re.error:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
346 print("ERROR: Match (-m) supplied is not valid regex.")
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
347 raise
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
348
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
349 sub = args["sub"]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
350
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
351 # set of file names
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
352 files = set()
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
353
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
354 # keep track of how many lines have been read
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
355 n_read = 0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
356 header = ""
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
357 with open(in_file) as file:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
358 for line in file:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
359 # if still in top, save to header
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
360 n_read += 1
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
361 if n_read <= top:
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
362 header += line
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
363 continue
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
364 # split into columns, on tab
9
baabc30154cd planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 37305ce5e6ef1b91f6058eff5b128064f76fbe45
bgruening
parents: 8
diff changeset
365 fields = re.split(r"\t", line.strip("\n"))
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
366
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
367 # get id column value
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
368 id_col_val = fields[id_col]
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
369
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
370 # use regex to get new file name
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
371 out_file_name = re.sub(match, sub, id_col_val)
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
372 out_file_path = os.path.join(out_dir, out_file_name)
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
373
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
374 # write
8
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
375 with open(out_file_path, "a") as current_new_file:
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
376 if out_file_name not in files:
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
377 current_new_file.write(header)
6cbe2f30c2d7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit a1f1f00463f734ba43f4e5c13c63fe5297ee128e-dirty"
bgruening
parents: 7
diff changeset
378 files.add(out_file_name)
7
0046692724f9 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6f78214d2c9d7786bfc9d8cbddac7d2613cd314e"
bgruening
parents: 6
diff changeset
379 current_new_file.write(line)
0
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
380
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
381
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
382 if __name__ == "__main__":
de3c2c88e710 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff changeset
383 main()