annotate join_files_on_column_fuzzy.py @ 0:64469e7ecf9f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
author bgruening
date Sun, 26 Nov 2017 16:13:51 -0500
parents
children f2068690addc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
2
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
3 import os
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
4 import argparse
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
5 import sys
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
6
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
7 def main(args):
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
8
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
9 if args.header:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
10 h1 = True
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
11 h2 = True
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
12 else:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
13 h1 = False
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
14 h2 = False
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
15
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
16 cache = list()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
17 out = open(args.outfile, 'w+')
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
18 write_buffer = list()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
19
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
20 def _readline(header = False):
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
21 with open(args.f2) as handle2:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
22 for line in handle2:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
23 line = line.strip()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
24 if header:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
25 header = False
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
26 yield line
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
27 continue
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
28 if not line:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
29 continue
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
30 columns = line.split(args.sep)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
31 value2 = columns[args.c2-1]
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
32 yield columns, float(value2)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
33
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
34 def fill_cache():
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
35 try:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
36 cache.append(next(it))
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
37 except StopIteration:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
38 pass
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
39
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
40 it = _readline(header = h2)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
41
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
42 with open(args.f1) as handle1:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
43 for line in handle1:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
44 line = line.strip()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
45 if h1:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
46 h1 = False
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
47 seconda_header = next(it)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
48 if args.add_distance:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
49 out.write('%s\t%s\t%s\n' % (line, seconda_header, args.unit))
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
50 else:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
51 out.write('%s\t%s\n' % (line, seconda_header))
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
52 continue
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
53 if not line:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
54 continue
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
55 columns = line.split(args.sep)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
56 value1 = float(columns[args.c1-1])
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
57 _cache = list()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
58 fill_cache()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
59 while cache:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
60 _c, value2 = cache.pop(0)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
61 upper_bound = value1 + args.distance
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
62 if args.unit == 'absolute':
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
63 if value2 <= upper_bound and value2 >= (value1 - args.distance):
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
64 line_template = '%s\n'
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
65 abs_dist = abs(value1 - value2)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
66 if args.add_distance:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
67 line_template = '%s\t' + str(abs_dist) + '\n'
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
68 write_buffer.append([abs_dist, line_template % '\t'.join( columns + _c )])
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
69 _cache.append([_c, value2])
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
70 fill_cache()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
71 elif value2 > upper_bound:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
72 # if the value from list 2 is bigger then the current value, he will be taken into the next round
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
73 _cache.append([_c, value2])
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
74 elif value2 < upper_bound:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
75 # if the value from list 2 is smaller then the currecnt value, check the next one of list 2
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
76 fill_cache()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
77 elif args.unit == 'ppm':
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
78 ppm_dist = abs((value1 - value2) / value1 * 1000000)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
79 if ppm_dist <= args.distance:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
80 line_template = '%s\n'
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
81 if args.add_distance:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
82 line_template = '%s\t' + str(ppm_dist) + '\n'
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
83 write_buffer.append([ppm_dist, line_template % '\t'.join( columns + _c )])
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
84 _cache.append([_c, value2])
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
85 fill_cache()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
86 elif ppm_dist > args.distance:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
87 _cache.append([_c, value2])
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
88 elif ppm_dist < args.distance:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
89 fill_cache()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
90 if args.closest and write_buffer:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
91 write_buffer.sort(key=lambda x: x[0])
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
92 out.write(write_buffer[0][1])
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
93 else:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
94 for _dist, line in write_buffer:
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
95 out.write(line)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
96 write_buffer = list()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
97 cache = _cache
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
98 out.close()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
99
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
100
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
101 if __name__ == '__main__':
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
102
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
103 parser = argparse.ArgumentParser(description='Merge two files on a common column the fuzzy way.')
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
104 parser.add_argument('--f1', required=True)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
105 parser.add_argument('--f2', required=True)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
106 parser.add_argument('--c1', type=int, required=True, help="Column in file 1 to be merged on.")
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
107 parser.add_argument('--c2', type=int, required=True, help="Column in file 2 to be merged on.")
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
108 parser.add_argument('--outfile', required=True)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
109 parser.add_argument('--header', action='store_true', help="The files have a header line at the beginning.")
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
110 parser.add_argument('--closest', action='store_true', help="Only report the closest match.")
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
111 parser.add_argument('--add_distance', action='store_true', help="Add addional column with the distance between the two values.")
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
112 parser.add_argument('--sep', type=str, default="\t", help="Files are separated by this separator.")
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
113 parser.add_argument('--distance', type=float, default="0.2", help="Maximal allowed distance.")
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
114 parser.add_argument('--unit', choices=['ppm', 'absolute'], default='absolute')
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
115 args = parser.parse_args()
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
116
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
117 main(args)
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
118
64469e7ecf9f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff changeset
119