diff test/test_dedup_hash.py @ 0:f33e9e6a6c88 draft default tip

planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
author mvdbeek
date Wed, 23 Nov 2016 07:49:05 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_dedup_hash.py	Wed Nov 23 07:49:05 2016 -0500
@@ -0,0 +1,63 @@
+import hashlib
+import inspect
+import os
+import subprocess
+import sys
+import tempfile
+
+
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parent_dir = os.path.dirname(currentdir)
+sys.path.insert(0, os.path.join(parent_dir, 'dedup_hash/'))
+import dedup_hash
+
+
+TEST_DATA_DIR = os.path.join(parent_dir, 'test-data/')
+UNCOMPRESSED_IN = ['r1.fastq', 'r2.fastq']
+COMPRESSED_IN = ['r1.fastq.gz', 'r2.fastq.gz']
+UNCOMPRESSED_OUT = ['r1_dedup.fastq', 'r2_dedup.fastq']
+SINGLE_IN = ['r1.fastq']
+SINGLE_OUT = ['r1_dedup.fastq']
+
+
+
+def run(input):
+    args = prepare_args(input)
+    run_dedup(args)
+    compare_output(args)
+
+
+def compare_output(args):
+    ref_out1 = os.path.join(TEST_DATA_DIR, 'r1_dedup.fastq')
+    try:
+        assert md5(args['outfiles'][0]) == md5(ref_out1)
+    except AssertionError:
+        cmd = "diff -Nru %s %s" % (args['outfiles'][0], ref_out1)
+        subprocess.check_call(cmd.split(' '))
+    print('all good')
+
+
+def prepare_args(test_files):
+    infiles = [os.path.join(TEST_DATA_DIR, test_file) for test_file in test_files]
+    outfiles = [tempfile.NamedTemporaryFile(delete=False).name for test_file in test_files]  # Same number of output files as input files
+    kwargs = {'infiles': infiles,
+              'outfiles': outfiles,
+              'write_gzip': False}
+    return kwargs
+
+
+def run_dedup(kwargs):
+    fastq_pairs_instance = dedup_hash.get_unique_fastq_instance()
+    fastq_pairs_instance(**kwargs)
+
+def md5(fname):
+    hash_md5 = hashlib.md5()
+    with open(fname, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+
+if __name__ == '__main__':
+    run(UNCOMPRESSED_IN)
+    run(COMPRESSED_IN)
+    run(SINGLE_IN)