Mercurial > repos > richard-burhans > segalign
annotate diagonal_partition.py @ 4:36cafb694dd2 draft
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
author | richard-burhans |
---|---|
date | Tue, 23 Apr 2024 22:39:23 +0000 |
parents | 9e34b25a8670 |
children |
rev | line source |
---|---|
2
9e34b25a8670
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 288c3e1887d4c13dcc922c612957460471977ff0
richard-burhans
parents:
0
diff
changeset
|
1 #!/usr/bin/env python |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
2 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
3 """ |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
4 Diagonal partitioning for segment files output by SegAlign. |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
5 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
6 Usage: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
7 diagonal_partition <max-segments> <lastz-command> |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
8 """ |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
9 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
10 import os |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
11 import sys |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
12 import typing |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
13 |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
14 T = typing.TypeVar("T", bound="_Sliceable") |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
15 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
16 |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
17 class _Sliceable(typing.Protocol): |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
18 def __len__(self) -> int: |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
19 ... |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
20 |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
21 def __getitem__(self: T, i: slice) -> T: |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
22 ... |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
23 |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
24 |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
25 def chunks(lst: T, n: int) -> typing.Iterator[T]: |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
26 """Yield successive n-sized chunks from list.""" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
27 for i in range(0, len(lst), n): |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
28 yield lst[i: i + n] |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
29 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
30 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
31 if __name__ == "__main__": |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
32 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
33 DELETE_AFTER_CHUNKING = True |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
34 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
35 # input_params = "10000 sad sadsa sad --segments=tmp10.block5.r1239937044.plus.segments dsa sa --strand=plus --output=out.maf sadads 2> logging.err" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
36 # sys.argv = [sys.argv[0]] + input_params.split(' ') |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
37 chunk_size = int(sys.argv[1]) # first parameter contains chunk size |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
38 params = sys.argv[2:] |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
39 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
40 # Parsing command output from SegAlign |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
41 segment_key = "--segments=" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
42 segment_index = None |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
43 input_file: typing.Optional[str] = None |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
44 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
45 for index, value in enumerate(params): |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
46 if value[: len(segment_key)] == segment_key: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
47 segment_index = index |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
48 input_file = value[len(segment_key):] |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
49 break |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
50 if segment_index is None: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
51 print(f"Error: could not segment key {segment_key} in parameters {params}") |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
52 exit(0) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
53 |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
54 if input_file is None or not os.path.isfile(input_file): |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
55 print(f"Error: File {input_file} does not exist") |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
56 exit(0) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
57 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
58 if ( |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
59 chunk_size == 0 or sum(1 for _ in open(input_file)) <= chunk_size |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
60 ): # no need to sort if number of lines <= chunk_size |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
61 print(" ".join(params), flush=True) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
62 exit(0) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
63 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
64 # Find rest of relevant parameters |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
65 output_key = "--output=" |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
66 output_index: typing.Optional[int] = None |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
67 output_alignment_file = None |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
68 output_alignment_file_base: typing.Optional[str] = None |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
69 output_format = None |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
70 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
71 strand_key = "--strand=" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
72 strand_index = None |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
73 for index, value in enumerate(params): |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
74 if value[: len(output_key)] == output_key: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
75 output_index = index |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
76 output_alignment_file = value[len(output_key):] |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
77 output_alignment_file_base, output_format = output_alignment_file.rsplit( |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
78 ".", 1 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
79 ) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
80 if value[: len(strand_key)] == strand_key: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
81 strand_index = index |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
82 if segment_index is None: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
83 print(f"Error: could not output key {output_key} in parameters {params}") |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
84 exit(0) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
85 if strand_index is None: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
86 print(f"Error: could not output key {strand_key} in parameters {params}") |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
87 exit(0) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
88 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
89 err_index = -1 # error file is at very end |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
90 err_name_base = params[-1].split(".err", 1)[0] |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
91 |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
92 data: typing.Dict[ |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
93 typing.Tuple[str, str], typing.List[typing.Tuple[int, int, str]] |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
94 ] = {} # dict of list of tuple (x, y, str) |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
95 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
96 direction = None |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
97 if "plus" in params[strand_index]: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
98 direction = "f" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
99 elif "minus" in params[strand_index]: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
100 direction = "r" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
101 else: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
102 print( |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
103 f"Error: could not figure out direction from strand value {params[strand_index]}" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
104 ) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
105 exit(0) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
106 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
107 for line in open(input_file, "r"): |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
108 if line == "": |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
109 continue |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
110 ( |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
111 seq1_name, |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
112 seq1_start, |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
113 seq1_end, |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
114 seq2_name, |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
115 seq2_start, |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
116 seq2_end, |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
117 _dir, |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
118 score, |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
119 ) = line.split() |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
120 # data.append((int(seq1_start), int(seq2_start), line)) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
121 half_dist = int((int(seq1_end) - int(seq1_start)) // 2) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
122 assert int(seq1_end) > int(seq1_start) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
123 assert int(seq2_end) > int(seq2_start) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
124 seq1_mid = int(seq1_start) + half_dist |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
125 seq2_mid = int(seq2_start) + half_dist |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
126 data.setdefault((seq1_name, seq2_name), []).append((seq1_mid, seq2_mid, line)) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
127 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
128 # If there are chromosome pairs with segment count <= chunk_size |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
129 # then no need to sort and split these pairs into separate files. |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
130 # It is better to keep these pairs in a single segment file. |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
131 skip_pairs = [] # pairs that have count <= chunk_size. |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
132 # these will not be sorted |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
133 if len(data.keys()) > 1: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
134 for pair in data.keys(): |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
135 if len(data[pair]) <= chunk_size: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
136 skip_pairs.append(pair) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
137 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
138 # sorting for forward segments |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
139 if direction == "r": |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
140 for pair in data.keys(): |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
141 if pair not in skip_pairs: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
142 data[pair] = sorted( |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
143 data[pair], key=lambda coord: (coord[1] - coord[0], coord[0]) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
144 ) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
145 # sorting for reverse segments |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
146 elif direction == "f": |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
147 for pair in data.keys(): |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
148 if pair not in skip_pairs: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
149 data[pair] = sorted( |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
150 data[pair], key=lambda coord: (coord[1] + coord[0], coord[0]) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
151 ) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
152 else: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
153 print(f"INVALID DIRECTION VALUE: {direction}") |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
154 exit(0) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
155 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
156 # Writing file in chunks |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
157 ctr = 0 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
158 for pair in data.keys() - skip_pairs: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
159 for chunk in chunks(list(zip(*data[pair]))[2], chunk_size): |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
160 ctr += 1 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
161 name_addition = f".split{ctr}" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
162 fname = input_file.split(".segments", 1)[0] + name_addition + ".segments" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
163 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
164 with open(fname, "w") as f: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
165 f.writelines(chunk) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
166 # update segment file in command |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
167 params[segment_index] = segment_key + fname |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
168 # update output file in command |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
169 if output_index is not None: |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
170 params[output_index] = ( |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
171 f"{output_key}{output_alignment_file_base}{name_addition}.{output_format}" |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
172 ) |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
173 # update error file in command |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
174 params[-1] = err_name_base + name_addition + ".err" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
175 print(" ".join(params), flush=True) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
176 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
177 # writing unsorted skipped pairs |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
178 if len(skip_pairs) > 0: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
179 skip_pairs_with_len = sorted( |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
180 [(len(data[p]), p) for p in skip_pairs] |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
181 ) # list of tuples of (pair length, pair) |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
182 aggregated_skip_pairs: typing.List[typing.List[typing.Any]] = ( |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
183 [] |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
184 ) # list of list of pair names |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
185 current_count = 0 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
186 aggregated_skip_pairs.append([]) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
187 for count, pair in skip_pairs_with_len: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
188 if current_count + count <= chunk_size: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
189 current_count += count |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
190 aggregated_skip_pairs[-1].append(pair) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
191 else: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
192 aggregated_skip_pairs.append([]) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
193 current_count = count |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
194 aggregated_skip_pairs[-1].append(pair) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
195 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
196 for aggregate in aggregated_skip_pairs: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
197 ctr += 1 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
198 name_addition = f".split{ctr}" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
199 fname = input_file.split(".segments", 1)[0] + name_addition + ".segments" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
200 with open(fname, "w") as f: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
201 for pair in sorted(aggregate, key=lambda p: (p[1], p[0])): |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
202 chunk = list(zip(*data[pair]))[2] |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
203 f.writelines(chunk) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
204 # update segment file in command |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
205 params[segment_index] = segment_key + fname |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
206 # update output file in command |
4
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
207 if output_index is not None: |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
208 params[output_index] = ( |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
209 f"{output_key}{output_alignment_file_base}{name_addition}.{output_format}" |
36cafb694dd2
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b8aa943b38b865defab8a27e4404bb8a2131f919
richard-burhans
parents:
2
diff
changeset
|
210 ) |
0
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
211 # update error file in command |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
212 params[-1] = err_name_base + name_addition + ".err" |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
213 print(" ".join(params), flush=True) |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
214 |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
215 if DELETE_AFTER_CHUNKING: |
5c72425b7f1b
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit 98a4dd44360447aa96d92143384d78e116d7581b
richard-burhans
parents:
diff
changeset
|
216 os.remove(input_file) |