Mercurial > repos > onnodg > cdhit_analysis
comparison cdhit_analysis.py @ 2:706b7acdb230 draft
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c2020ecc91cea0c8cf7439180cf796743c838b4d-dirty
| author | onnodg |
|---|---|
| date | Tue, 21 Oct 2025 07:54:21 +0000 |
| parents | ff68835adb2b |
| children | c6981ea453ae |
comparison
equal
deleted
inserted
replaced
| 1:ff68835adb2b | 2:706b7acdb230 |
|---|---|
| 1 import argparse | |
| 2 import os | |
| 3 import re | |
| 4 from collections import Counter, defaultdict | |
| 5 from math import sqrt | |
| 6 import pandas as pd | |
| 7 import matplotlib | |
| 8 | |
| 9 matplotlib.use('Agg') # Non-interactive backend for Galaxy | |
| 10 import matplotlib.pyplot as plt | |
| 11 | |
| 12 """ | 1 """ |
| 13 This script processes cluster output files from cd-hit-est for use in Galaxy. | 2 This script processes cluster output files from cd-hit-est for use in Galaxy. |
| 14 It extracts cluster information, associates taxa and e-values from annotation files, | 3 It extracts cluster information, associates taxa and e-values from annotation files, |
| 15 performs statistical calculations, and generates text and plot outputs | 4 performs statistical calculations, and generates text and plot outputs |
| 16 summarizing similarity and taxonomic distributions. | 5 summarizing similarity and taxonomic distributions. |
| 23 4. Generate requested outputs: text summaries, plots, and Excel reports. | 12 4. Generate requested outputs: text summaries, plots, and Excel reports. |
| 24 | 13 |
| 25 | 14 |
| 26 Note: Uses a non-interactive matplotlib backend (Agg) for compatibility with Galaxy. | 15 Note: Uses a non-interactive matplotlib backend (Agg) for compatibility with Galaxy. |
| 27 """ | 16 """ |
| 17 | |
| 18 import argparse | |
| 19 from collections import Counter, defaultdict | |
| 20 import os | |
| 21 import re | |
| 22 import matplotlib.pyplot as plt | |
| 23 import pandas as pd | |
| 24 from math import sqrt | |
| 25 import openpyxl | |
| 26 | |
| 28 | 27 |
| 29 | 28 |
| 30 def parse_arguments(args_list=None): | 29 def parse_arguments(args_list=None): |
| 31 """Parse command-line arguments for the script.""" | 30 """Parse command-line arguments for the script.""" |
| 32 parser = argparse.ArgumentParser( | 31 parser = argparse.ArgumentParser( |
