Mercurial > repos > bgruening > split_file_to_collection

diff split_file_to_collection.py @ 2:d150ac3d853d draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
author: bgruening
date: Wed, 28 Aug 2019 10:55:25 -0400
parents: de3c2c88e710
children: 2ddc36385d7a
--- a/split_file_to_collection.py	Mon Feb 18 15:20:56 2019 -0500
+++ b/split_file_to_collection.py	Wed Aug 28 10:55:25 2019 -0400
@@ -15,6 +15,7 @@
 FILETYPES = {'fasta': '^>',
              'fastq': '^@',
              'tabular': '^.*',
+             'txt': '^.*',
              'mgf': '^BEGIN IONS'}
 
 
@@ -37,6 +38,8 @@
 
     ftype = args["ftype"]
 
+    assert ftype != "generic" or args["generic_re"] != None, "--generic_re needs to be given for generic input"
+
     if args["ftype"] == "tabular" and args["by"] == "col":
         args["match"] = replace_mapped_chars(args["match"])
         args["sub"] = replace_mapped_chars(args["sub"])
@@ -56,7 +59,8 @@
     parser.add_argument('--file_ext', '-e', help="If not splitting by column," +
                                                  " the extension of the new files (without a period)")
     parser.add_argument('--ftype', '-f', help="The type of the file to split", required = True,
-        choices=["mgf", "fastq", "fasta", "tabular"])
+        choices=["mgf", "fastq", "fasta", "tabular", "txt", "generic"])
+    parser.add_argument('--generic_re', '-g', help="Regular expression indicating the start of a new record (only for generic)", required = False)
     parser.add_argument('--by', '-b', help="Split by line or by column (tabular only)",
         default = "row", choices = ["col", "row"])
     parser.add_argument('--top', '-t', type=int, default=0, help="Number of header lines to carry over to new files. " +
@@ -96,7 +100,7 @@
 
 def split_by_record(args, in_file, out_dir, top, ftype):
     # get record separator for given filetype
-    sep = re.compile(FILETYPES[ftype])
+    sep = re.compile(FILETYPES.get(ftype, args["generic_re"]))
 
     numnew = args["numnew"]
author	bgruening
date	Wed, 28 Aug 2019 10:55:25 -0400
parents	de3c2c88e710
children	2ddc36385d7a