Mercurial > repos > richard-burhans > segalign
annotate diagonal_partition.py @ 21:25fa179d9d0a draft
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
author | richard-burhans |
---|---|
date | Fri, 09 Aug 2024 20:23:12 +0000 |
parents | 08e987868f0f |
children |
rev | line source |
---|---|
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
1 #!/usr/bin/env python |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
2 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
3 |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
4 """ |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
5 Diagonal partitioning for segment files output by SegAlign. |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
6 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
7 Usage: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
8 diagonal_partition.py <max-segments> <lastz-command> |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
9 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
10 set <max-segments> = 0 to skip partitioning, -1 to estimate best parameter |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
11 """ |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
12 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
13 import collections |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
14 import os |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
15 import statistics |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
16 import sys |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
17 import typing |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
18 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
19 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
20 def chunks(lst: tuple[str, ...], n: int) -> typing.Iterator[tuple[str, ...]]: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
21 """Yield successive n-sized chunks from list.""" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
22 for i in range(0, len(lst), n): |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
23 yield lst[i:i + n] |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
24 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
25 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
26 if __name__ == "__main__": |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
27 # TODO: make these optional user defined parameters |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
28 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
29 # deletes original segment file after splitting |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
30 DELETE_AFTER_CHUNKING = True |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
31 |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
32 # don't partition segment files with line count below this value |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
33 MIN_CHUNK_SIZE = 5000 |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
34 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
35 # only used when segment size is being estimated |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
36 MAX_CHUNK_SIZE = 50000 |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
37 |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
38 # include chosen split size in file name |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
39 DEBUG = False |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
40 |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
41 # first parameter contains chunk size |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
42 chunk_size = int(sys.argv[1]) |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
43 params = sys.argv[2:] |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
44 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
45 # don't do anything if 0 chunk size |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
46 if chunk_size == 0: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
47 print(" ".join(params), flush=True) |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
48 sys.exit(0) |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
49 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
50 # Parsing command output from SegAlign |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
51 segment_key = "--segments=" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
52 segment_index = None |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
53 input_file = None |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
54 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
55 for index, value in enumerate(params): |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
56 if value[:len(segment_key)] == segment_key: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
57 segment_index = index |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
58 input_file = value[len(segment_key):] |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
59 break |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
60 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
61 if segment_index is None: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
62 sys.exit(f"Error: could not get segment key {segment_key} from parameters {params}") |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
63 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
64 if input_file is None: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
65 sys.exit(f"Error: could not get segment file from parameters {params}") |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
66 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
67 if not os.path.isfile(input_file): |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
68 sys.exit(f"Error: File {input_file} does not exist") |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
69 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
70 # each char is 1 byte |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
71 line_size = None |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
72 file_size = os.path.getsize(input_file) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
73 with open(input_file, "r") as f: |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
74 # add 1 for newline |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
75 line_size = len(f.readline()) |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
76 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
77 estimated_lines = file_size // line_size |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
78 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
79 # check if chunk size should be estimated |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
80 if chunk_size < 0: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
81 # optimization, do not need to get each file size in this case |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
82 if estimated_lines < MIN_CHUNK_SIZE: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
83 print(" ".join(params), flush=True) |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
84 sys.exit(0) |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
85 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
86 # get size of each segment assuming DELETE_AFTER_CHUNKING == True |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
87 # takes into account already split segments |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
88 files = [i for i in os.listdir(".") if i.endswith(".segments")] |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
89 |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
90 if len(files) < 2: |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
91 # if not enough segment files for estimation, use MAX_CHUNK_SIZE |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
92 chunk_size = MAX_CHUNK_SIZE |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
93 else: |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
94 fdict: typing.DefaultDict[str, int] = collections.defaultdict(int) |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
95 for filename in files: |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
96 size = os.path.getsize(filename) |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
97 f_ = filename.split(".split", 1)[0] |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
98 fdict[f_] += size |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
99 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
100 if len(fdict) < 7: |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
101 # outliers can heavily skew prediction if <7 data points |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
102 # to be safe, use 50% quantile |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
103 chunk_size = int(statistics.quantiles(fdict.values())[1] // line_size) |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
104 else: |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
105 # otherwise use 75% quantile |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
106 chunk_size = int(statistics.quantiles(fdict.values())[-1] // line_size) |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
107 # if not enough data points, there is a chance of getting unlucky |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
108 # minimize worst case by using MAX_CHUNK_SIZE |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
109 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
110 chunk_size = min(chunk_size, MAX_CHUNK_SIZE) |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
111 |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
112 # no need to sort if number of lines <= chunk_size |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
113 if (estimated_lines <= chunk_size): |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
114 print(" ".join(params), flush=True) |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
115 sys.exit(0) |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
116 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
117 # Find rest of relevant parameters |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
118 output_key = "--output=" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
119 output_index = None |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
120 output_alignment_file = None |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
121 output_alignment_file_base = None |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
122 output_format = None |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
123 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
124 strand_key = "--strand=" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
125 strand_index = None |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
126 for index, value in enumerate(params): |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
127 if value[:len(output_key)] == output_key: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
128 output_index = index |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
129 output_alignment_file = value[len(output_key):] |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
130 output_alignment_file_base, output_format = output_alignment_file.rsplit(".", 1) |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
131 |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
132 if value[:len(strand_key)] == strand_key: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
133 strand_index = index |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
134 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
135 if output_alignment_file_base is None: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
136 sys.exit(f"Error: could not get output alignment file base from parameters {params}") |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
137 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
138 if output_format is None: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
139 sys.exit(f"Error: could not get output format from parameters {params}") |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
140 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
141 if output_index is None: |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
142 sys.exit(f"Error: could not get output key {output_key} from parameters {params}") |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
143 |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
144 if strand_index is None: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
145 sys.exit(f"Error: could not get strand key {strand_key} from parameters {params}") |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
146 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
147 # error file is at very end |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
148 err_index = -1 |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
149 err_name_base = params[-1].split(".err", 1)[0] |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
150 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
151 # dict of list of tuple (x, y, str) |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
152 data: dict[tuple[str, str], list[tuple[int, int, str]]] = {} |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
153 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
154 direction = None |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
155 if "plus" in params[strand_index]: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
156 direction = "f" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
157 elif "minus" in params[strand_index]: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
158 direction = "r" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
159 else: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
160 sys.exit(f"Error: could not figure out direction from strand value {params[strand_index]}") |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
161 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
162 for line in open(input_file, "r"): |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
163 if line == "": |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
164 continue |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
165 seq1_name, seq1_start, seq1_end, seq2_name, seq2_start, seq2_end, _dir, score = line.split() |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
166 # data.append((int(seq1_start), int(seq2_start), line)) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
167 half_dist = int((int(seq1_end) - int(seq1_start)) // 2) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
168 assert int(seq1_end) > int(seq1_start) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
169 assert int(seq2_end) > int(seq2_start) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
170 seq1_mid = int(seq1_start) + half_dist |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
171 seq2_mid = int(seq2_start) + half_dist |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
172 data.setdefault((seq1_name, seq2_name), []).append((seq1_mid, seq2_mid, line)) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
173 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
174 # If there are chromosome pairs with segment count <= chunk_size |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
175 # then no need to sort and split these pairs into separate files. |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
176 # It is better to keep these pairs in a single segment file. |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
177 |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
178 # pairs that have count <= chunk_size. these will not be sorted |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
179 skip_pairs = [] |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
180 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
181 # save query key order |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
182 # for lastz segment files: 'Query sequence names must appear in the same |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
183 # order as they do in the query file' |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
184 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
185 # NOTE: assuming data.keys() preserves order of keys. Requires Python 3.7+ |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
186 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
187 query_key_order = list(dict.fromkeys([i[1] for i in data.keys()])) |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
188 |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
189 if len(data.keys()) > 1: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
190 for pair in data.keys(): |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
191 if len(data[pair]) <= chunk_size: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
192 skip_pairs.append(pair) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
193 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
194 # sorting for forward segments |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
195 if direction == "r": |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
196 for pair in data.keys(): |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
197 if pair not in skip_pairs: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
198 data[pair] = sorted(data[pair], key=lambda coord: (coord[1] - coord[0], coord[0])) |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
199 |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
200 # sorting for reverse segments |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
201 elif direction == "f": |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
202 for pair in data.keys(): |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
203 if pair not in skip_pairs: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
204 data[pair] = sorted(data[pair], key=lambda coord: (coord[1] + coord[0], coord[0])) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
205 else: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
206 sys.exit(f"INVALID DIRECTION VALUE: {direction}") |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
207 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
208 # Writing file in chunks |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
209 ctr = 0 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
210 # [i for i in data_keys if i not in set(skip_pairs)]: |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
211 for pair in (data.keys() - skip_pairs): |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
212 for chunk in chunks(list(zip(*data[pair]))[2], chunk_size): |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
213 ctr += 1 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
214 name_addition = f".split{ctr}" |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
215 |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
216 if DEBUG: |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
217 name_addition = f".{chunk_size}{name_addition}" |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
218 |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
219 fname = input_file.split(".segments", 1)[0] + name_addition + ".segments" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
220 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
221 assert len(chunk) != 0 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
222 with open(fname, "w") as f: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
223 f.writelines(chunk) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
224 # update segment file in command |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
225 params[segment_index] = segment_key + fname |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
226 # update output file in command |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
227 params[output_index] = output_key + output_alignment_file_base + name_addition + "." + output_format |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
228 # update error file in command |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
229 params[-1] = err_name_base + name_addition + ".err" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
230 print(" ".join(params), flush=True) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
231 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
232 # writing unsorted skipped pairs |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
233 if len(skip_pairs) > 0: |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
234 # list of tuples of (pair length, pair) |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
235 skip_pairs_with_len = sorted([(len(data[p]), p) for p in skip_pairs]) |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
236 # NOTE: This sorting can violate lastz query key order requirement, this is fixed later |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
237 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
238 # used for sorting |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
239 query_key_order_table = {item: idx for idx, item in enumerate(query_key_order)} |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
240 |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
241 # list of list of pair names |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
242 aggregated_skip_pairs: list[list[tuple[str, str]]] = [] |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
243 current_count = 0 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
244 aggregated_skip_pairs.append([]) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
245 for count, pair in skip_pairs_with_len: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
246 if current_count + count <= chunk_size: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
247 current_count += count |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
248 aggregated_skip_pairs[-1].append(pair) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
249 else: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
250 aggregated_skip_pairs.append([]) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
251 current_count = count |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
252 aggregated_skip_pairs[-1].append(pair) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
253 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
254 for aggregate in aggregated_skip_pairs: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
255 ctr += 1 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
256 name_addition = f".split{ctr}" |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
257 |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
258 if DEBUG: |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
259 name_addition = f".{chunk_size}{name_addition}" |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
260 |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
261 fname = input_file.split(".segments", 1)[0] + name_addition + ".segments" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
262 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
263 with open(fname, "w") as f: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
264 # fix possible lastz query key order violations |
21
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
265 # p[1] is query key |
25fa179d9d0a
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit e4e05d23d9da18ea87bc352122ca9e6cfa73d1c7
richard-burhans
parents:
9
diff
changeset
|
266 for pair in sorted(aggregate, key=lambda p: query_key_order_table[p[1]]): |
9
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
267 chunk = list(zip(*data[pair]))[2] |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
268 f.writelines(chunk) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
269 # update segment file in command |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
270 params[segment_index] = segment_key + fname |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
271 # update output file in command |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
272 params[output_index] = output_key + output_alignment_file_base + name_addition + "." + output_format |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
273 # update error file in command |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
274 params[-1] = err_name_base + name_addition + ".err" |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
275 print(" ".join(params), flush=True) |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
276 |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
277 if DELETE_AFTER_CHUNKING: |
08e987868f0f
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 062a761a340e095ea7ef7ed7cd1d3d55b1fdc5c4
richard-burhans
parents:
diff
changeset
|
278 os.remove(input_file) |