Mercurial > repos > bgruening > join_files_on_column_fuzzy
annotate join_files_on_column_fuzzy.py @ 1:8750c3125ec5 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit b1763d10a1c39bc6651be891a993989c5a5617ff
author | bgruening |
---|---|
date | Fri, 01 Dec 2017 16:26:59 -0500 |
parents | 64469e7ecf9f |
children | f2068690addc |
rev | line source |
---|---|
0
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
2 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
3 import os |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
4 import argparse |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
5 import sys |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
6 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
7 def main(args): |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
8 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
9 if args.header: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
10 h1 = True |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
11 h2 = True |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
12 else: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
13 h1 = False |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
14 h2 = False |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
15 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
16 cache = list() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
17 out = open(args.outfile, 'w+') |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
18 write_buffer = list() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
19 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
20 def _readline(header = False): |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
21 with open(args.f2) as handle2: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
22 for line in handle2: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
23 line = line.strip() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
24 if header: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
25 header = False |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
26 yield line |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
27 continue |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
28 if not line: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
29 continue |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
30 columns = line.split(args.sep) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
31 value2 = columns[args.c2-1] |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
32 yield columns, float(value2) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
33 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
34 def fill_cache(): |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
35 try: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
36 cache.append(next(it)) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
37 except StopIteration: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
38 pass |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
39 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
40 it = _readline(header = h2) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
41 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
42 with open(args.f1) as handle1: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
43 for line in handle1: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
44 line = line.strip() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
45 if h1: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
46 h1 = False |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
47 seconda_header = next(it) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
48 if args.add_distance: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
49 out.write('%s\t%s\t%s\n' % (line, seconda_header, args.unit)) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
50 else: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
51 out.write('%s\t%s\n' % (line, seconda_header)) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
52 continue |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
53 if not line: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
54 continue |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
55 columns = line.split(args.sep) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
56 value1 = float(columns[args.c1-1]) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
57 _cache = list() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
58 fill_cache() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
59 while cache: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
60 _c, value2 = cache.pop(0) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
61 upper_bound = value1 + args.distance |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
62 if args.unit == 'absolute': |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
63 if value2 <= upper_bound and value2 >= (value1 - args.distance): |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
64 line_template = '%s\n' |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
65 abs_dist = abs(value1 - value2) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
66 if args.add_distance: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
67 line_template = '%s\t' + str(abs_dist) + '\n' |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
68 write_buffer.append([abs_dist, line_template % '\t'.join( columns + _c )]) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
69 _cache.append([_c, value2]) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
70 fill_cache() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
71 elif value2 > upper_bound: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
72 # if the value from list 2 is bigger then the current value, he will be taken into the next round |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
73 _cache.append([_c, value2]) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
74 elif value2 < upper_bound: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
75 # if the value from list 2 is smaller then the currecnt value, check the next one of list 2 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
76 fill_cache() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
77 elif args.unit == 'ppm': |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
78 ppm_dist = abs((value1 - value2) / value1 * 1000000) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
79 if ppm_dist <= args.distance: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
80 line_template = '%s\n' |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
81 if args.add_distance: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
82 line_template = '%s\t' + str(ppm_dist) + '\n' |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
83 write_buffer.append([ppm_dist, line_template % '\t'.join( columns + _c )]) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
84 _cache.append([_c, value2]) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
85 fill_cache() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
86 elif ppm_dist > args.distance: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
87 _cache.append([_c, value2]) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
88 elif ppm_dist < args.distance: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
89 fill_cache() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
90 if args.closest and write_buffer: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
91 write_buffer.sort(key=lambda x: x[0]) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
92 out.write(write_buffer[0][1]) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
93 else: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
94 for _dist, line in write_buffer: |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
95 out.write(line) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
96 write_buffer = list() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
97 cache = _cache |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
98 out.close() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
99 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
100 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
101 if __name__ == '__main__': |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
102 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
103 parser = argparse.ArgumentParser(description='Merge two files on a common column the fuzzy way.') |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
104 parser.add_argument('--f1', required=True) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
105 parser.add_argument('--f2', required=True) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
106 parser.add_argument('--c1', type=int, required=True, help="Column in file 1 to be merged on.") |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
107 parser.add_argument('--c2', type=int, required=True, help="Column in file 2 to be merged on.") |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
108 parser.add_argument('--outfile', required=True) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
109 parser.add_argument('--header', action='store_true', help="The files have a header line at the beginning.") |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
110 parser.add_argument('--closest', action='store_true', help="Only report the closest match.") |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
111 parser.add_argument('--add_distance', action='store_true', help="Add addional column with the distance between the two values.") |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
112 parser.add_argument('--sep', type=str, default="\t", help="Files are separated by this separator.") |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
113 parser.add_argument('--distance', type=float, default="0.2", help="Maximal allowed distance.") |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
114 parser.add_argument('--unit', choices=['ppm', 'absolute'], default='absolute') |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
115 args = parser.parse_args() |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
116 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
117 main(args) |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
118 |
64469e7ecf9f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit fd2bc86108994c9eda731b305ca6a8c71554cfaa
bgruening
parents:
diff
changeset
|
119 |