# HG changeset patch
# User brenninc
# Date 1466498536 14400
# Node ID 2470f3968557ab01fc6cc14bd260701ba79b2cde
# Parent 97a9636974bc06786ba5596761b3817b8196f99a
Uploaded
diff -r 97a9636974bc -r 2470f3968557 data_reader.xml
--- a/data_reader.xml Mon May 09 02:52:47 2016 -0400
+++ b/data_reader.xml Tue Jun 21 04:42:16 2016 -0400
@@ -9,10 +9,13 @@
--new_ending .${directory.fields.galaxy_extension}
--decompress ${directory.fields.decompress}
#if $results.start
- --start $results.start
+ --start "$results.start"
#end if
#if $results.last
- --last $results.last
+ --last "$results.last"
+ #end if
+ #if $results.regex
+ --regex "$results.regex"
#end if
#end if
--path ${directory.fields.path}
@@ -33,6 +36,11 @@
+
+
+
+
+
@@ -76,7 +84,20 @@
+
+
+
+
+
+
+
+
@@ -107,13 +128,23 @@
Files can also be filter for the last part before the file extsentions.
+Files can also be filtered by a regex pattern.
+Only files that contain the regex string will be included.
+This uses the python search funtion so as long as the Regex pattern is found somewhere in file name (excluding extension).
+
+The three filter start, last and regex if supplied work indepently, so only files that pass all supplied test will be included.
+
+Note: Before applying the last and regex test the ending (includig the . just before the ending are removed)
+
Assuming the directory has:
-C01_R1_001.fasta C01_R2_001.fatsa C02_R1_001.fasta C02_R2_001.fatsa
+C01_R1_001.fasta C01_R2_001.fatsa C02_R1_001.fasta C02_R2_002.fatsa
-Setting start C01 will return just the C01 files: C01_R1_001.fasta C01_R2_001.fatsa
+Setting start C01 will return just the C01 files: C01_R1_001.fasta C01_R2_002.fatsa
Setting last R1_001 will return the read1 files: C01_R1_001.fasta C02_R1_001.fasta
+Setting regex R2_00.$ will return the R2 files: C01_R2_001.fatsa C02_R2_002.fatsa
+
As Galaxy detects the file type based on the extension this tool will change the exstension as setup by the admin.
This tool will unzip gz files if requested to by the admin,
diff -r 97a9636974bc -r 2470f3968557 directory_copier.py
--- a/directory_copier.py Mon May 09 02:52:47 2016 -0400
+++ b/directory_copier.py Tue Jun 21 04:42:16 2016 -0400
@@ -1,6 +1,7 @@
import gzip
import optparse # using optparse as hydra still python 2.6
import os.path
+import re
import shutil
import sys
@@ -14,22 +15,21 @@
if options.start:
if not(a_file.startswith(options.start)):
return None
+ name = a_file[:-len(ending)]
+ if name.endswith("."):
+ name = name[:-1]
if options.last:
- if ending[0] == ".":
- last = options.last + ending
- else:
- if options.last[-1] == ".":
- last = options.last + ending
- else:
- last = options.last + "." + ending
- if not(a_file.endswith(last)):
+ if not(name.endswith(last)):
+ return None
+ if options.regex:
+ pattern = re.compile(options.regex)
+ if pattern.search(name) is None:
return None
if options.new_ending:
- name = a_file[:-len(ending)]
if options.new_ending[0] ==".":
- if name[-1] == ".":
- name = name[:-1]
- return name + options.new_ending
+ return name + options.new_ending
+ else:
+ return name + "." + options.new_ending
if options.decompress:
if a_file.endswith(".gz"):
return a_file[:-3]
@@ -46,7 +46,6 @@
def link(a_file, new_name, path):
file_path = os.path.join(os.path.realpath(path), a_file)
sym_path = os.path.join(os.path.realpath("output"), new_name)
- #if not(os.path.exists(sym_path)):
os.link(file_path, sym_path)
@@ -58,7 +57,8 @@
def copy_and_link(path, options):
- os.mkdir("output")
+ if options.decompress or options.link:
+ os.mkdir("output")
with open(options.list, 'w') as list_file:
files = os.listdir(path)
files.sort()
@@ -86,10 +86,10 @@
help="String that must be at the start of the file name ")
parser.add_option("--last", action="store", type="string",
help="String that must be the last bit of the file name before the endings")
+ parser.add_option("--regex", action="store", type="string",
+ help="Regex for file names not including the endings")
parser.add_option("--new_ending", action="store", type="string",
help="New ending to replace any previous ending in list and if required links or decompressions. Note: If not set decompression will auto remove the compressioned part of the ending")
- #parser.add_option("--regex", action="store", type="string",
- # help="Regex pattern the file name (less . ending) must match before the endings")
parser.add_option("--list", action="store", type="string",
help="Path to where all files should be listed. ")
parser.add_option("--link", action="store_true", default=False,