comparison test/test_dedup_hash.py @ 0:f33e9e6a6c88 draft default tip

planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
author mvdbeek
date Wed, 23 Nov 2016 07:49:05 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f33e9e6a6c88
1 import hashlib
2 import inspect
3 import os
4 import subprocess
5 import sys
6 import tempfile
7
8
9 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
10 parent_dir = os.path.dirname(currentdir)
11 sys.path.insert(0, os.path.join(parent_dir, 'dedup_hash/'))
12 import dedup_hash
13
14
15 TEST_DATA_DIR = os.path.join(parent_dir, 'test-data/')
16 UNCOMPRESSED_IN = ['r1.fastq', 'r2.fastq']
17 COMPRESSED_IN = ['r1.fastq.gz', 'r2.fastq.gz']
18 UNCOMPRESSED_OUT = ['r1_dedup.fastq', 'r2_dedup.fastq']
19 SINGLE_IN = ['r1.fastq']
20 SINGLE_OUT = ['r1_dedup.fastq']
21
22
23
24 def run(input):
25 args = prepare_args(input)
26 run_dedup(args)
27 compare_output(args)
28
29
30 def compare_output(args):
31 ref_out1 = os.path.join(TEST_DATA_DIR, 'r1_dedup.fastq')
32 try:
33 assert md5(args['outfiles'][0]) == md5(ref_out1)
34 except AssertionError:
35 cmd = "diff -Nru %s %s" % (args['outfiles'][0], ref_out1)
36 subprocess.check_call(cmd.split(' '))
37 print('all good')
38
39
40 def prepare_args(test_files):
41 infiles = [os.path.join(TEST_DATA_DIR, test_file) for test_file in test_files]
42 outfiles = [tempfile.NamedTemporaryFile(delete=False).name for test_file in test_files] # Same number of output files as input files
43 kwargs = {'infiles': infiles,
44 'outfiles': outfiles,
45 'write_gzip': False}
46 return kwargs
47
48
49 def run_dedup(kwargs):
50 fastq_pairs_instance = dedup_hash.get_unique_fastq_instance()
51 fastq_pairs_instance(**kwargs)
52
53 def md5(fname):
54 hash_md5 = hashlib.md5()
55 with open(fname, "rb") as f:
56 for chunk in iter(lambda: f.read(4096), b""):
57 hash_md5.update(chunk)
58 return hash_md5.hexdigest()
59
60 if __name__ == '__main__':
61 run(UNCOMPRESSED_IN)
62 run(COMPRESSED_IN)
63 run(SINGLE_IN)