annotate test/test_dedup_hash.py @ 0:f33e9e6a6c88 draft default tip

planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
author mvdbeek
date Wed, 23 Nov 2016 07:49:05 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
1 import hashlib
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
2 import inspect
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
3 import os
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
4 import subprocess
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
5 import sys
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
6 import tempfile
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
7
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
8
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
9 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
10 parent_dir = os.path.dirname(currentdir)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
11 sys.path.insert(0, os.path.join(parent_dir, 'dedup_hash/'))
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
12 import dedup_hash
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
13
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
14
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
15 TEST_DATA_DIR = os.path.join(parent_dir, 'test-data/')
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
16 UNCOMPRESSED_IN = ['r1.fastq', 'r2.fastq']
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
17 COMPRESSED_IN = ['r1.fastq.gz', 'r2.fastq.gz']
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
18 UNCOMPRESSED_OUT = ['r1_dedup.fastq', 'r2_dedup.fastq']
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
19 SINGLE_IN = ['r1.fastq']
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
20 SINGLE_OUT = ['r1_dedup.fastq']
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
21
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
22
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
23
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
24 def run(input):
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
25 args = prepare_args(input)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
26 run_dedup(args)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
27 compare_output(args)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
28
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
29
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
30 def compare_output(args):
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
31 ref_out1 = os.path.join(TEST_DATA_DIR, 'r1_dedup.fastq')
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
32 try:
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
33 assert md5(args['outfiles'][0]) == md5(ref_out1)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
34 except AssertionError:
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
35 cmd = "diff -Nru %s %s" % (args['outfiles'][0], ref_out1)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
36 subprocess.check_call(cmd.split(' '))
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
37 print('all good')
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
38
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
39
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
40 def prepare_args(test_files):
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
41 infiles = [os.path.join(TEST_DATA_DIR, test_file) for test_file in test_files]
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
42 outfiles = [tempfile.NamedTemporaryFile(delete=False).name for test_file in test_files] # Same number of output files as input files
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
43 kwargs = {'infiles': infiles,
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
44 'outfiles': outfiles,
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
45 'write_gzip': False}
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
46 return kwargs
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
47
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
48
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
49 def run_dedup(kwargs):
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
50 fastq_pairs_instance = dedup_hash.get_unique_fastq_instance()
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
51 fastq_pairs_instance(**kwargs)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
52
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
53 def md5(fname):
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
54 hash_md5 = hashlib.md5()
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
55 with open(fname, "rb") as f:
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
56 for chunk in iter(lambda: f.read(4096), b""):
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
57 hash_md5.update(chunk)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
58 return hash_md5.hexdigest()
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
59
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
60 if __name__ == '__main__':
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
61 run(UNCOMPRESSED_IN)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
62 run(COMPRESSED_IN)
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
63 run(SINGLE_IN)