Mercurial > repos > onnodg > cdhit_analysis
annotate cdhit_analysis.py @ 4:e64af72e1b8f draft default tip
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
| author | onnodg |
|---|---|
| date | Mon, 15 Dec 2025 16:44:40 +0000 |
| parents | c6981ea453ae |
| children |
| rev | line source |
|---|---|
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
1 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
2 This script processes cluster output files from cd-hit-est for use in Galaxy. |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
3 It extracts cluster information, associates taxa from annotation files, |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
4 performs statistical calculations, and generates text and plot outputs |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
5 summarizing similarity and taxonomic distributions. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
6 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
7 Main steps: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
8 1. Parse cd-hit-est cluster file and (optional) annotation Excel file. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
9 2. Process each cluster to extract similarity and taxa information. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
10 3. Aggregate results across clusters. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
11 4. Generate requested outputs: text summaries, plots, and Excel reports. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
12 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
13 |
|
2
706b7acdb230
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
onnodg
parents:
1
diff
changeset
|
14 import argparse |
|
706b7acdb230
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
onnodg
parents:
1
diff
changeset
|
15 from collections import Counter, defaultdict |
|
706b7acdb230
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
onnodg
parents:
1
diff
changeset
|
16 import os |
|
706b7acdb230
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
onnodg
parents:
1
diff
changeset
|
17 import re |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
18 from math import sqrt |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
19 |
|
2
706b7acdb230
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
onnodg
parents:
1
diff
changeset
|
20 import matplotlib.pyplot as plt |
|
706b7acdb230
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
onnodg
parents:
1
diff
changeset
|
21 import pandas as pd |
|
706b7acdb230
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
onnodg
parents:
1
diff
changeset
|
22 import openpyxl |
|
706b7acdb230
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
onnodg
parents:
1
diff
changeset
|
23 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
24 def log_message(messages, text: str): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
25 """Append a message to the log list.""" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
26 messages.append(text) |
|
2
706b7acdb230
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
onnodg
parents:
1
diff
changeset
|
27 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
28 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
29 def parse_arguments(args_list=None): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
30 """Parse command-line arguments for the script.""" |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
31 parser = argparse.ArgumentParser(description="Create taxa analysis from cd-hit cluster files") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
32 parser.add_argument("--input_cluster", type=str, required=True, help="Input cluster file (.clstr)") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
33 parser.add_argument("--input_annotation", type=str, required=False, help="Input annotation Excel file (header annotations)") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
34 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
35 parser.add_argument("--output_similarity_txt", type=str, help="Similarity text output file") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
36 parser.add_argument("--output_similarity_plot",type=str, help="Similarity plot output file (PNG)") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
37 parser.add_argument("--output_count", type=str, help="Count summary output file") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
38 parser.add_argument("--output_excel", type=str, help="Count summary output file") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
39 parser.add_argument("--output_taxa_clusters", action="store_true", default=False, help="Excel output: include raw taxa per cluster sheet") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
40 parser.add_argument("--output_taxa_processed", action="store_true", default=False, help="Excel output: include processed/LCA taxa per cluster sheet") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
41 parser.add_argument("--log_file", type=str, help="Optional log file with run statistics") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
42 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
43 parser.add_argument("--simi_plot_y_min", type=float, default=95.0, help="Minimum value of the y-axis in the similarity plot") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
44 parser.add_argument("--simi_plot_y_max", type=float, default=100.0, help="Maximum value of the y-axis in the similarity plot") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
45 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
46 parser.add_argument("--uncertain_taxa_use_ratio", type=float, default=0.5, help="Ratio at which uncertain taxa count toward the dominant taxa") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
47 parser.add_argument("--min_to_split", type=float, default=0.45, help=("Minimum fraction (0–0.5) for splitting a cluster taxonomically. " |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
48 "ratio = count(second) / (count(first) + count(second))")) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
49 parser.add_argument("--min_count_to_split", type=int, default=10, help=("Minimum combined count of the two top taxa to avoid being forced " |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
50 "into an 'uncertain' split at this level.")) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
51 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
52 parser.add_argument("--min_cluster_support", type=int, default=1, help=("Minimum number of annotated reads required for a cluster to be " |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
53 "included in the processed/LCA taxa sheet. Unannotated reads do not count toward this threshold.")) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
54 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
55 return parser.parse_args(args_list) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
56 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
57 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
58 COLORMAP = [ |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
59 # List of RGBA tuples for bar coloring in plots |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
60 (0.12156862745098039, 0.4666666666666667, 0.7058823529411765, 1.0), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
61 (1.0, 0.4980392156862745, 0.054901960784313725, 1.0), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
62 (0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
63 (0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
64 (0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
65 (0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
66 (0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
67 (0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
68 (0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0), |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
69 (0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0), |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
70 ] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
71 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
72 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
73 def parse_cluster_file(cluster_file, annotation_file, raise_on_error=False, log_messages=None): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
74 """Parse CD-HIT cluster output and attach taxonomic annotations. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
75 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
76 Cluster entries are parsed for read headers, counts, and similarities. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
77 If an annotation Excel file is supplied, taxa, seq_id and source fields |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
78 are mapped per read from sheet ``Individual_Reads``. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
79 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
80 :param cluster_file: Path to the CD-HIT cluster file. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
81 :type cluster_file: str |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
82 :param annotation_file: Optional Excel annotation file. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
83 :type annotation_file: str or None |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
84 :param raise_on_error: Whether to raise parsing errors directly. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
85 :type raise_on_error: bool |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
86 :param log_messages: Optional message collector for logging. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
87 :type log_messages: list[str] or None |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
88 :return: List of clusters, where each cluster is a dict keyed by header. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
89 :rtype: list[dict] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
90 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
91 clusters = [] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
92 current_cluster = {} |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
93 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
94 annotations = {} |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
95 if annotation_file and os.path.exists(annotation_file): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
96 try: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
97 df = pd.read_excel( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
98 annotation_file, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
99 sheet_name="Individual_Reads", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
100 engine="openpyxl", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
101 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
102 required_cols = {"header", "seq_id", "source", "taxa"} |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
103 missing = required_cols - set(df.columns) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
104 if missing: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
105 msg = ( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
106 f"Annotation file missing columns {missing}, " |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
107 "continuing without annotations." |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
108 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
109 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
110 log_message(log_messages, msg) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
111 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
112 for _, row in df.iterrows(): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
113 header = str(row["header"]) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
114 seq_id = str(row["seq_id"]) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
115 source = str(row["source"]) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
116 taxa = str(row["taxa"]) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
117 annotations[header] = { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
118 "seq_id": seq_id, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
119 "source": source, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
120 "taxa": taxa, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
121 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
122 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
123 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
124 log_messages, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
125 f"Loaded {len(annotations)} annotated headers", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
126 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
127 except Exception as exc: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
128 msg = ( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
129 "Failed to read annotation file; proceeding without annotations. " |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
130 f"Details: {exc}" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
131 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
132 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
133 log_message(log_messages, msg) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
134 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
135 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
136 if annotation_file: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
137 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
138 log_messages, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
139 "Annotation file not found; proceeding without annotations.", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
140 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
141 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
142 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
143 log_messages, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
144 "No annotation file provided; proceeding without annotations.", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
145 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
146 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
147 with open(cluster_file, "r") as f: |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
148 for line in f: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
149 line = line.strip() |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
150 if not line: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
151 continue |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
152 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
153 if line.startswith(">Cluster"): |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
154 # Start of new cluster, save previous if exists |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
155 if current_cluster: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
156 clusters.append(current_cluster) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
157 current_cluster = {} |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
158 else: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
159 parts = line.split() |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
160 if len(parts) < 3: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
161 continue |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
162 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
163 header_part = parts[2].strip(">.") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
164 header_parts = header_part.split("(") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
165 header = header_parts[0] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
166 count = 0 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
167 if len(header_parts) > 1: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
168 last_part = header_parts[-1].strip(")") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
169 if last_part: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
170 try: |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
171 count = int(last_part) |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
172 except ValueError: |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
173 count = 0 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
174 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
175 similarity_part = parts[-1].strip() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
176 if "*" in similarity_part: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
177 similarity = 100.0 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
178 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
179 matches = re.findall(r"[\d.]+", similarity_part) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
180 if matches: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
181 similarity = float(matches[-1]) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
182 else: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
183 msg = f"Could not parse similarity from: '{similarity_part}'" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
184 if raise_on_error: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
185 raise ValueError(msg) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
186 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
187 log_message(log_messages, f"WARNING: {msg}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
188 similarity = 0.0 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
189 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
190 if header in annotations: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
191 taxa = annotations[header]["taxa"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
192 seq_id = annotations[header]["seq_id"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
193 source = annotations[header]["source"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
194 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
195 taxa = "Unannotated read" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
196 seq_id = "Unannotated read" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
197 source = "Unannotated read" |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
198 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
199 current_cluster[header] = { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
200 "count": count, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
201 "similarity": similarity, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
202 "taxa": taxa, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
203 "seq_id": seq_id, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
204 "source": source, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
205 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
206 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
207 if current_cluster: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
208 clusters.append(current_cluster) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
209 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
210 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
211 log_message(log_messages, f"Parsed {len(clusters)} clusters") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
212 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
213 return clusters |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
214 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
215 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
216 def process_cluster_data(cluster: dict): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
217 """Convert a raw cluster into similarity lists and aggregated taxa counts. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
218 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
219 Similarity values are expanded based on read count. Taxa are grouped |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
220 by unique taxonomic labels, tracking supporting seq_ids and sources. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
221 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
222 :param cluster: Mapping of read header to cluster metadata. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
223 :type cluster: dict |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
224 :return: similarity_list, taxa_map, annotated_count, unannotated_count |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
225 :rtype: tuple[list[float], dict, int, int] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
226 """ |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
227 similarity_list: list[float] = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
228 taxa_map: dict[str, dict] = {} |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
229 annotated_count = 0 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
230 unannotated_count = 0 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
231 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
232 for _header, info in cluster.items(): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
233 count = info["count"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
234 similarity = info["similarity"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
235 taxa = info["taxa"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
236 seq_id = info["seq_id"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
237 source = info["source"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
238 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
239 similarity_list.extend([similarity] * count) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
240 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
241 key = taxa if seq_id != "Unannotated read" else "Unannotated read" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
242 if key not in taxa_map: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
243 taxa_map[key] = { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
244 "count": 0, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
245 "seq_ids": set(), |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
246 "sources": set(), |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
247 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
248 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
249 taxa_map[key]["count"] += count |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
250 taxa_map[key]["seq_ids"].add(seq_id) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
251 taxa_map[key]["sources"].add(source) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
252 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
253 if seq_id == "Unannotated read": |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
254 unannotated_count += count |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
255 else: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
256 annotated_count += count |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
257 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
258 return similarity_list, taxa_map, annotated_count, unannotated_count |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
259 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
260 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
261 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
262 def calculate_cluster_taxa(taxa_dict, args): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
263 """Resolve cluster-level taxa using weighted recursive LCA splitting. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
264 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
265 Unannotated reads are treated as fully uncertain taxa but contribute |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
266 partially via ``uncertain_taxa_use_ratio`` when determining dominant levels. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
267 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
268 :param taxa_dict: Mapping of taxa string to total read count. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
269 :type taxa_dict: dict[str, int] |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
270 :param args: Argument namespace containing resolution parameters. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
271 :type args: argparse.Namespace |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
272 :return: List of taxonomic groups after recursive resolution. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
273 :rtype: list[dict[str, int]] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
274 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
275 processed_dict = {} |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
276 for taxa, count in taxa_dict.items(): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
277 if taxa == "Unannotated read": |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
278 uncertain_taxa = " / ".join(["Uncertain taxa"] * 7) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
279 processed_dict[uncertain_taxa] = count |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
280 else: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
281 processed_dict[taxa] = count |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
282 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
283 return _recursive_taxa_calculation(processed_dict, args, 0) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
284 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
285 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
286 def _recursive_taxa_calculation(taxa_dict, args, level): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
287 """Recursive helper performing level-by-level weighted LCA resolution. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
288 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
289 At each taxonomic rank, taxa are grouped and compared. If multiple |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
290 taxa exceed the configured split thresholds, the cluster is divided |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
291 and recursion continues deeper. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
292 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
293 :param taxa_dict: Mapping of taxa to count for this recursion stage. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
294 :type taxa_dict: dict[str, int] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
295 :param args: Parameter namespace controlling uncertainty and split rules. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
296 :type args: argparse.Namespace |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
297 :param level: Current taxonomy depth (0–6). |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
298 :type level: int |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
299 :return: One or more resolved taxon groups at deeper levels. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
300 :rtype: list[dict[str, int]] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
301 """ |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
302 if level >= 7: |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
303 return [taxa_dict] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
304 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
305 level_dict = defaultdict(float) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
306 for taxa, count in taxa_dict.items(): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
307 taxa_parts = taxa.split(" / ") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
308 if level < len(taxa_parts): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
309 level_taxon = taxa_parts[level] |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
310 if level_taxon == "Uncertain taxa": |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
311 level_dict[level_taxon] += count * args.uncertain_taxa_use_ratio |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
312 else: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
313 level_dict[level_taxon] += count |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
314 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
315 if len(level_dict) <= 1: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
316 return _recursive_taxa_calculation(taxa_dict, args, level + 1) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
317 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
318 sorted_taxa = sorted(level_dict.items(), key=lambda x: x[1], reverse=True) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
319 result = [] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
320 dominant_taxon = sorted_taxa[0][0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
321 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
322 for i in range(1, len(sorted_taxa)): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
323 secondary_taxon = sorted_taxa[i][0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
324 total_count = sorted_taxa[0][1] + sorted_taxa[i][1] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
325 ratio = sorted_taxa[i][1] / total_count |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
326 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
327 if ratio >= args.min_to_split or total_count <= args.min_count_to_split: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
328 split_dict = { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
329 taxa: count |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
330 for taxa, count in taxa_dict.items() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
331 if taxa.split(" / ")[level] == secondary_taxon |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
332 } |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
333 result.extend(_recursive_taxa_calculation(split_dict, args, level + 1)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
334 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
335 dominant_dict = { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
336 taxa: count |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
337 for taxa, count in taxa_dict.items() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
338 if taxa.split(" / ")[level] == dominant_taxon |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
339 } |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
340 result.extend(_recursive_taxa_calculation(dominant_dict, args, level + 1)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
341 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
342 return result |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
343 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
344 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
345 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
346 def write_similarity_output(cluster_data_list, output_file, log_messages=None): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
347 """Write similarity statistics and per-cluster distributions. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
348 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
349 Output includes mean, standard deviation, and counts of each |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
350 similarity value per cluster. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
351 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
352 :param cluster_data_list: List of processed cluster dictionaries. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
353 :type cluster_data_list: list[dict] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
354 :param output_file: Destination path for the TSV summary. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
355 :type output_file: str |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
356 :param log_messages: Optional log collector. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
357 :type log_messages: list[str] or None |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
358 :return: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
359 :rtype: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
360 """ |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
361 all_simi = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
362 pair_counter = Counter() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
363 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
364 for cluster_index, cluster_data in enumerate(cluster_data_list): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
365 simi_list = cluster_data["similarities"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
366 if not simi_list: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
367 continue |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
368 all_simi.extend(simi_list) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
369 for s in simi_list: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
370 pair_counter[(cluster_index, s)] += 1 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
371 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
372 if not all_simi: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
373 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
374 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
375 log_messages, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
376 "No similarity data found; similarity text file not written.", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
377 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
378 return |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
379 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
380 avg = sum(all_simi) / len(all_simi) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
381 variance = sum((s - avg) ** 2 for s in all_simi) / len(all_simi) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
382 st_dev = sqrt(variance) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
383 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
384 with open(output_file, "w") as f: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
385 f.write(f"# Average similarity (all reads): {avg:.2f}\n") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
386 f.write(f"# Standard deviation (all reads): {st_dev:.2f}\n") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
387 f.write("cluster\tsimilarity\tcount\n") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
388 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
389 for (cluster_index, similarity), count in sorted( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
390 pair_counter.items(), key=lambda x: (x[0][0], -x[0][1]) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
391 ): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
392 f.write(f"{cluster_index}\t{similarity}\t{count}\n") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
393 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
394 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
395 log_message(log_messages, "Similarity text summary written succesfully") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
396 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
397 log_messages, f"Similarity mean = {avg:.2f}, std = {st_dev:.2f}" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
398 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
399 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
400 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
401 def write_count_output(cluster_data_list, output_file, log_messages=None): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
402 """Write counts of annotated and unannotated reads per cluster. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
403 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
404 A summary row containing total and percentage values is appended. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
405 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
406 :param cluster_data_list: List of processed clusters. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
407 :type cluster_data_list: list[dict] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
408 :param output_file: Path for the count summary text file. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
409 :type output_file: str |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
410 :param log_messages: Optional logging accumulator. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
411 :type log_messages: list[str] or None |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
412 :return: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
413 :rtype: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
414 """ |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
415 total_annotated = 0 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
416 total_unannotated = 0 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
417 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
418 with open(output_file, "w") as f: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
419 f.write( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
420 "cluster\tunannotated\tannotated\ttotal\tperc_unannotated\tperc_annotated\n" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
421 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
422 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
423 for cluster_index, cluster_data in enumerate(cluster_data_list): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
424 unannotated = cluster_data["unannotated"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
425 annotated = cluster_data["annotated"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
426 total = unannotated + annotated |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
427 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
428 total_annotated += annotated |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
429 total_unannotated += unannotated |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
430 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
431 if total > 0: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
432 perc_annotated = (annotated / total) * 100 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
433 perc_unannotated = (unannotated / total) * 100 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
434 else: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
435 perc_annotated = perc_unannotated = 0.0 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
436 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
437 f.write( |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
438 f"{cluster_index}\t{unannotated}\t{annotated}\t{total}" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
439 f"\t{perc_unannotated:.2f}\t{perc_annotated:.2f}\n" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
440 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
441 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
442 grand_total = total_annotated + total_unannotated |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
443 if grand_total > 0: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
444 perc_annotated = (total_annotated / grand_total) * 100 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
445 perc_unannotated = (total_unannotated / grand_total) * 100 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
446 else: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
447 perc_annotated = perc_unannotated = 0.0 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
448 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
449 f.write( |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
450 f"TOTAL\t{total_unannotated}\t{total_annotated}\t{grand_total}" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
451 f"\t{perc_unannotated:.2f}\t{perc_annotated:.2f}\n" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
452 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
453 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
454 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
455 log_message(log_messages, "Count summary written succesfully") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
456 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
457 log_messages, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
458 f"TOTAL annotated={total_annotated}, unannotated={total_unannotated}, total={grand_total}", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
459 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
460 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
461 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
462 def write_taxa_excel(cluster_data_list, args, output_file, write_raw: bool, write_processed: bool, log_messages=None): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
463 """Write raw and processed taxa data into a combined Excel report. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
464 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
465 Generates up to three sheets: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
466 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
467 - ``Raw_Taxa_Clusters``: Full taxa per read cluster (if enabled). |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
468 - ``Processed_Taxa_Clusters``: LCA-resolved taxa (if enabled). |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
469 - ``Settings``: Parameters used for taxonomic resolution. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
470 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
471 seq_id and source tracking are kept only in the raw sheet. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
472 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
473 :param cluster_data_list: List containing similarity/taxa data per cluster. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
474 :type cluster_data_list: list[dict] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
475 :param args: Parsed arguments containing LCA configuration. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
476 :type args: argparse.Namespace |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
477 :param output_file: Path to the combined Excel file. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
478 :type output_file: str |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
479 :param write_raw: Whether to include the raw taxa sheet. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
480 :type write_raw: bool |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
481 :param write_processed: Whether to include the processed/LCA sheet. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
482 :type write_processed: bool |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
483 :param log_messages: Optional log collector. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
484 :type log_messages: list[str] or None |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
485 :return: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
486 :rtype: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
487 """ |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
488 raw_rows = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
489 processed_rows = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
490 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
491 if write_raw: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
492 for cluster_index, cluster_data in enumerate(cluster_data_list): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
493 taxa_map = cluster_data["taxa_map"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
494 for taxa, info in taxa_map.items(): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
495 count = info["count"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
496 if count <= 0: |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
497 continue |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
498 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
499 taxa_levels = taxa.split(" / ") if taxa else [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
500 while len(taxa_levels) < 7: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
501 taxa_levels.append("Unannotated") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
502 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
503 seq_ids_str = ",".join(sorted(info["seq_ids"])) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
504 sources_str = ",".join(sorted(info["sources"])) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
505 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
506 raw_rows.append( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
507 { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
508 "cluster": cluster_index, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
509 "count": count, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
510 "seq_id": seq_ids_str, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
511 "source": sources_str, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
512 "taxa_full": taxa, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
513 "kingdom": taxa_levels[0], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
514 "phylum": taxa_levels[1], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
515 "class": taxa_levels[2], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
516 "order": taxa_levels[3], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
517 "family": taxa_levels[4], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
518 "genus": taxa_levels[5], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
519 "species": taxa_levels[6], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
520 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
521 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
522 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
523 if write_processed: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
524 for cluster_index, cluster_data in enumerate(cluster_data_list): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
525 taxa_map = cluster_data["taxa_map"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
526 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
527 annotated_support = sum( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
528 info["count"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
529 for taxa, info in taxa_map.items() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
530 if taxa != "Unannotated read" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
531 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
532 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
533 if annotated_support < args.min_cluster_support: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
534 continue |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
535 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
536 taxa_counts = {taxa: info["count"] for taxa, info in taxa_map.items()} |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
537 processed_groups = calculate_cluster_taxa(taxa_counts, args) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
538 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
539 for group in processed_groups: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
540 for taxa, count in group.items(): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
541 if count <= 0: |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
542 continue |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
543 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
544 if "Uncertain taxa / Uncertain taxa / Uncertain taxa" in taxa: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
545 continue |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
546 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
547 info = taxa_map.get(taxa) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
548 seq_ids_set = info["seq_ids"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
549 sources_set = info["sources"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
550 taxa_full = taxa |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
551 taxa_levels = taxa.split(" / ") if taxa else [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
552 while len(taxa_levels) < 7: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
553 taxa_levels.append("Unannotated") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
554 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
555 seq_ids_str = ",".join(sorted(seq_ids_set)) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
556 sources_str = ",".join(sorted(sources_set)) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
557 processed_rows.append( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
558 { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
559 "cluster": cluster_index, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
560 "count": count, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
561 "seq_id": seq_ids_str, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
562 "source": sources_str, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
563 "taxa_full": taxa_full, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
564 "kingdom": taxa_levels[0], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
565 "phylum": taxa_levels[1], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
566 "class": taxa_levels[2], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
567 "order": taxa_levels[3], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
568 "family": taxa_levels[4], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
569 "genus": taxa_levels[5], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
570 "species": taxa_levels[6], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
571 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
572 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
573 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
574 if not raw_rows and not processed_rows: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
575 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
576 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
577 log_messages, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
578 "No taxa data to write; taxa Excel file not created.", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
579 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
580 return |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
581 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
582 raw_df = pd.DataFrame(raw_rows) if raw_rows else None |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
583 processed_df = pd.DataFrame(processed_rows) if processed_rows else None |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
584 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
585 settings_data = [ |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
586 ["uncertain_taxa_use_ratio", args.uncertain_taxa_use_ratio], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
587 ["min_to_split", args.min_to_split], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
588 ["min_count_to_split", args.min_count_to_split], |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
589 ["min_cluster_support", args.min_cluster_support] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
590 ] |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
591 settings_df = pd.DataFrame(settings_data, columns=["Parameter", "Value"]) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
592 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
593 temp_path = output_file + ".tmp.xlsx" |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
594 os.makedirs(os.path.dirname(temp_path), exist_ok=True) |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
595 with pd.ExcelWriter(temp_path, engine="openpyxl") as writer: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
596 if raw_df is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
597 raw_df.to_excel( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
598 writer, sheet_name="Raw_Taxa_Clusters", index=False |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
599 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
600 if processed_df is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
601 processed_df.to_excel( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
602 writer, sheet_name="Processed_Taxa_Clusters", index=False |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
603 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
604 settings_df.to_excel(writer, sheet_name="Settings", index=False) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
605 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
606 for sheet_name in writer.sheets: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
607 worksheet = writer.sheets[sheet_name] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
608 for column in worksheet.columns: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
609 max_length = 0 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
610 column_letter = column[0].column_letter |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
611 for cell in column: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
612 try: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
613 cell_len = len(str(cell.value)) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
614 if cell_len > max_length: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
615 max_length = cell_len |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
616 except AttributeError: |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
617 pass |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
618 adjusted_width = min(max_length + 2, 50) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
619 worksheet.column_dimensions[column_letter].width = adjusted_width |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
620 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
621 os.replace(temp_path, output_file) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
622 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
623 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
624 if raw_df is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
625 log_message(log_messages, "Raw taxa per cluster written succesfully") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
626 if processed_df is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
627 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
628 log_messages, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
629 "Processed taxa (split clusters) written succesfully", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
630 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
631 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
632 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
633 def create_similarity_plot(all_simi_data, cluster_simi_lengths, args, output_file, log_messages=None): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
634 """Create and save a similarity distribution bar plot. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
635 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
636 Bars are colored per cluster using a fixed repeating colormap. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
637 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
638 :param all_simi_data: All similarity values across clusters. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
639 :type all_simi_data: list[float] |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
640 :param cluster_simi_lengths: Per-cluster similarity list lengths. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
641 :type cluster_simi_lengths: list[int] |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
642 :param args: Namespace containing y-axis plot limits. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
643 :type args: argparse.Namespace |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
644 :param output_file: Output PNG file path. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
645 :type output_file: str |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
646 :param log_messages: Optional log message list. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
647 :type log_messages: list[str] or None |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
648 :return: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
649 :rtype: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
650 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
651 if not all_simi_data: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
652 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
653 log_message(log_messages, "No similarity data; similarity plot not created.") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
654 return |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
655 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
656 sorted_simi_list = sorted(all_simi_data, reverse=True) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
657 bar_positions = list(range(len(sorted_simi_list))) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
658 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
659 colormap_full = [] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
660 for i, length in enumerate(cluster_simi_lengths): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
661 color = COLORMAP[i % len(COLORMAP)] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
662 colormap_full.extend([color] * length) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
663 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
664 plt.figure(figsize=(12, 6)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
665 plt.bar(bar_positions, sorted_simi_list, width=1, color=colormap_full) |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
666 plt.grid(axis="y", linestyle="--", color="gray", alpha=0.7) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
667 plt.ylabel("Similarity (%)") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
668 plt.xlabel("Reads") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
669 plt.title("Intra-cluster Similarity Distribution") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
670 plt.ylim(ymin=args.simi_plot_y_min, ymax=args.simi_plot_y_max) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
671 plt.tight_layout() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
672 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
673 plt.savefig(output_file, format="png", dpi=300, bbox_inches="tight") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
674 plt.close() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
675 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
676 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
677 log_message(log_messages, "Similarity plot written succesfully") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
678 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
679 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
680 def summarize_and_log(cluster_data_list, all_simi_data, log_messages, args): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
681 """Compute global summary statistics and append them to the log. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
682 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
683 Summary includes: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
684 - total clusters |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
685 - total annotated/unannotated reads |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
686 - per-cluster annotation presence |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
687 - top taxa distribution |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
688 - similarity mean and standard deviation |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
689 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
690 :param cluster_data_list: List of processed cluster descriptors. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
691 :type cluster_data_list: list[dict] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
692 :param all_simi_data: All similarity values from all clusters. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
693 :type all_simi_data: list[float] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
694 :param log_messages: List collecting log output lines. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
695 :type log_messages: list[str] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
696 :param args: Argument namespace with configuration parameters. |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
697 :type args: argparse.Namespace |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
698 :return: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
699 :rtype: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
700 """ |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
701 total_clusters = len(cluster_data_list) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
702 total_annotated = sum(c["annotated"] for c in cluster_data_list) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
703 total_unannotated = sum(c["unannotated"] for c in cluster_data_list) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
704 grand_total = total_annotated + total_unannotated |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
705 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
706 clusters_with_annotations = sum( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
707 1 for c in cluster_data_list if c["annotated"] > 0 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
708 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
709 clusters_unannotated_only = total_clusters - clusters_with_annotations |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
710 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
711 log_message(log_messages, "=== SUMMARY ===") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
712 log_message(log_messages, f"Clusters parsed: {total_clusters}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
713 log_message(log_messages, f"Total reads: {grand_total}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
714 log_message(log_messages, f"Annotated reads: {total_annotated}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
715 log_message(log_messages, f"Unannotated reads: {total_unannotated}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
716 log_message(log_messages, f"Clusters with annotations: {clusters_with_annotations}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
717 log_message(log_messages, f"Clusters fully unannotated: {clusters_unannotated_only}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
718 log_message(log_messages, f"Minimum cluster support for processed taxa: {args.min_cluster_support}") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
719 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
720 taxa_counter = Counter() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
721 for c in cluster_data_list: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
722 for taxa, info in c["taxa_map"].items(): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
723 if taxa == "Unannotated read": |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
724 continue |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
725 taxa_counter[taxa] += info["count"] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
726 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
727 if taxa_counter: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
728 log_message(log_messages, "=== TAXA SUMMARY (top 10) ===") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
729 for taxa, count in taxa_counter.most_common(10): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
730 log_message(log_messages, f"{taxa}: {count} reads") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
731 log_message(log_messages, f"Total unique taxa: {len(taxa_counter)}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
732 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
733 log_message(log_messages, "No annotated taxa found.") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
734 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
735 if all_simi_data: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
736 avg = sum(all_simi_data) / len(all_simi_data) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
737 variance = sum((s - avg) ** 2 for s in all_simi_data) / len(all_simi_data) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
738 st_dev = sqrt(variance) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
739 log_message(log_messages, "=== SIMILARITY SUMMARY ===") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
740 log_message(log_messages, f"Mean similarity: {avg:.2f}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
741 log_message(log_messages, f"Std similarity: {st_dev:.2f}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
742 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
743 log_message(log_messages, "No similarity values available for summary.") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
744 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
745 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
746 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
747 def main(arg_list=None): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
748 """Main entry point for CD-HIT cluster processing. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
749 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
750 Orchestrates parsing, cluster processing, statistic aggregation, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
751 visualization, and generation of all requested outputs. |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
752 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
753 :param arg_list: Optional list of arguments (used by tests). |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
754 :type arg_list: list[str] or None |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
755 :return: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
756 :rtype: None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
757 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
758 args = parse_arguments(arg_list) |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
759 log_messages: list[str] = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
760 log_message(log_messages, "=== CD-HIT cluster processing started ===") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
761 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
762 log_message(log_messages, "cluster_file: provided") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
763 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
764 log_messages, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
765 "annotation_file: provided" if args.input_annotation else "annotation_file: none", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
766 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
767 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
768 log_message(log_messages, "=== PARAMETERS ===") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
769 skip_keys = { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
770 "output_similarity_txt", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
771 "output_similarity_plot", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
772 "output_count", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
773 "output_excel", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
774 "input_cluster", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
775 "input_annotation", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
776 "log_file", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
777 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
778 for key, value in vars(args).items(): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
779 if key in skip_keys: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
780 continue |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
781 log_message(log_messages, f"{key}: {value}") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
782 log_message(log_messages, "=== PARAMETERS END===") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
783 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
784 clusters = parse_cluster_file( |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
785 args.input_cluster, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
786 args.input_annotation, |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
787 raise_on_error=False, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
788 log_messages=log_messages, |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
789 ) |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
790 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
791 cluster_data_list = [] |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
792 all_simi_data: list[float] = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
793 cluster_simi_lengths: list[int] = [] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
794 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
795 for cluster in clusters: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
796 simi_list, taxa_map, annotated_count, unannotated_count = process_cluster_data( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
797 cluster |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
798 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
799 cluster_data = { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
800 "similarities": simi_list, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
801 "taxa_map": taxa_map, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
802 "annotated": annotated_count, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
803 "unannotated": unannotated_count, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
804 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
805 cluster_data_list.append(cluster_data) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
806 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
807 if simi_list: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
808 all_simi_data.extend(simi_list) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
809 cluster_simi_lengths.append(len(simi_list)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
810 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
811 if args.output_similarity_txt: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
812 write_similarity_output( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
813 cluster_data_list, args.output_similarity_txt, log_messages) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
814 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
815 if args.output_similarity_plot: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
816 create_similarity_plot( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
817 all_simi_data, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
818 cluster_simi_lengths, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
819 args, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
820 args.output_similarity_plot, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
821 log_messages) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
822 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
823 if args.output_count: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
824 write_count_output(cluster_data_list, args.output_count, log_messages) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
825 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
826 if args.output_excel: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
827 write_raw = bool(args.output_taxa_clusters) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
828 write_processed = bool(args.output_taxa_processed) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
829 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
830 if not write_raw and not write_processed: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
831 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
832 log_message(log_messages, "output_excel provided but no taxa sheet flags set; no Excel file written.") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
833 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
834 write_taxa_excel(cluster_data_list, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
835 args, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
836 args.output_excel, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
837 write_raw=write_raw, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
838 write_processed=write_processed, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
839 log_messages=log_messages) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
840 else: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
841 if args.output_taxa_clusters or args.output_taxa_processed: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
842 if log_messages is not None: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
843 log_message( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
844 log_messages, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
845 "WARNING: Raw/processed taxa output flags set but no --output_excel path provided; skipping Excel output." |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
846 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
847 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
848 summarize_and_log(cluster_data_list, all_simi_data, log_messages, args) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
849 log_message(log_messages, "=== CD-HIT cluster processing finished ===") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
850 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
851 if args.log_file: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
852 os.makedirs(os.path.dirname(args.log_file), exist_ok=True) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
853 with open(args.log_file, "w") as f: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
854 f.write("\n".join(log_messages)) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
855 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
856 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
857 if __name__ == "__main__": |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
3
diff
changeset
|
858 main() |
