Mercurial > repos > bgruening > split_file_to_collection
diff split_file_to_collection.py @ 2:d150ac3d853d draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
author | bgruening |
---|---|
date | Wed, 28 Aug 2019 10:55:25 -0400 |
parents | de3c2c88e710 |
children | 2ddc36385d7a |
line wrap: on
line diff
--- a/split_file_to_collection.py Mon Feb 18 15:20:56 2019 -0500 +++ b/split_file_to_collection.py Wed Aug 28 10:55:25 2019 -0400 @@ -15,6 +15,7 @@ FILETYPES = {'fasta': '^>', 'fastq': '^@', 'tabular': '^.*', + 'txt': '^.*', 'mgf': '^BEGIN IONS'} @@ -37,6 +38,8 @@ ftype = args["ftype"] + assert ftype != "generic" or args["generic_re"] != None, "--generic_re needs to be given for generic input" + if args["ftype"] == "tabular" and args["by"] == "col": args["match"] = replace_mapped_chars(args["match"]) args["sub"] = replace_mapped_chars(args["sub"]) @@ -56,7 +59,8 @@ parser.add_argument('--file_ext', '-e', help="If not splitting by column," + " the extension of the new files (without a period)") parser.add_argument('--ftype', '-f', help="The type of the file to split", required = True, - choices=["mgf", "fastq", "fasta", "tabular"]) + choices=["mgf", "fastq", "fasta", "tabular", "txt", "generic"]) + parser.add_argument('--generic_re', '-g', help="Regular expression indicating the start of a new record (only for generic)", required = False) parser.add_argument('--by', '-b', help="Split by line or by column (tabular only)", default = "row", choices = ["col", "row"]) parser.add_argument('--top', '-t', type=int, default=0, help="Number of header lines to carry over to new files. " + @@ -96,7 +100,7 @@ def split_by_record(args, in_file, out_dir, top, ftype): # get record separator for given filetype - sep = re.compile(FILETYPES[ftype]) + sep = re.compile(FILETYPES.get(ftype, args["generic_re"])) numnew = args["numnew"]