Mercurial > repos > tduigou > get_from_db
annotate get_db_info.py @ 0:1769c133986b draft default tip
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
author | tduigou |
---|---|
date | Wed, 11 Jun 2025 09:42:52 +0000 |
parents | |
children |
rev | line source |
---|---|
0
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
1 import subprocess |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
2 import argparse |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
3 import time |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
4 import os |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
5 import socket |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
6 import re |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
7 from Bio.Seq import Seq |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
8 import pandas as pd |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
9 from Bio.SeqRecord import SeqRecord |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
10 from sqlalchemy import create_engine, inspect |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
11 from sqlalchemy.engine.url import make_url |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
12 from sqlalchemy.sql import text |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
13 from sqlalchemy.exc import OperationalError |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
14 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
15 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
16 def fix_db_uri(uri): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
17 """Replace __at__ with @ in the URI if needed.""" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
18 return uri.replace("__at__", "@") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
19 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
20 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
21 def is_port_in_use(uri): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
22 """Check if a TCP port is already in use on host.""" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
23 url = make_url(uri) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
24 host = url.host |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
25 port = url.port |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
26 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
27 s.settimeout(2) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
28 return s.connect_ex((host, port)) == 0 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
29 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
30 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
31 def extract_db_name(uri): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
32 """Extract the database name from the SQLAlchemy URI.""" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
33 url = make_url(uri) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
34 return url.database |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
35 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
36 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
37 # this fuction is to activate the Docker id the DB is in container. BUT IT IS NOT USED IN MAIN() |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
38 def start_postgres_container(db_name): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
39 """Start a PostgreSQL container with the given database name as the container name.""" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
40 container_name = db_name |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
41 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
42 # Check if container is already running |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
43 container_running = subprocess.run( |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
44 f"docker ps -q -f name={container_name}", shell=True, capture_output=True, text=True |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
45 ) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
46 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
47 if container_running.stdout.strip(): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
48 print(f"Container '{container_name}' is already running.") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
49 return |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
50 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
51 # Check if container exists (stopped) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
52 container_exists = subprocess.run( |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
53 f"docker ps -a -q -f name={container_name}", shell=True, capture_output=True, text=True |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
54 ) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
55 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
56 if container_exists.stdout.strip(): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
57 print(f"Starting existing container '{container_name}'...") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
58 subprocess.run(f"docker start {container_name}", shell=True) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
59 print(f"PostgreSQL Docker container '{container_name}' activated.") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
60 return |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
61 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
62 # If container does not exist, create and start a new one |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
63 port = 5432 if not is_port_in_use(5432) else 5433 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
64 postgres_password = os.getenv("POSTGRES_PASSWORD", "RK17") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
65 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
66 start_command = [ |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
67 "docker", "run", "--name", container_name, |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
68 "-e", f"POSTGRES_PASSWORD={postgres_password}", |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
69 "-p", f"{port}:5432", |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
70 "-d", "postgres" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
71 ] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
72 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
73 try: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
74 subprocess.run(start_command, check=True) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
75 print(f"PostgreSQL Docker container '{container_name}' started on port {port}.") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
76 except subprocess.CalledProcessError as e: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
77 print(f"Failed to start Docker container: {e}") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
78 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
79 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
80 def wait_for_db(uri, timeout=60): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
81 """Try connecting to the DB until it works or timeout.""" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
82 engine = create_engine(uri) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
83 start_time = time.time() |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
84 while time.time() - start_time < timeout: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
85 try: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
86 with engine.connect(): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
87 print("Connected to database.") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
88 return |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
89 except OperationalError: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
90 print("Database not ready, retrying...") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
91 time.sleep(2) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
92 raise Exception("Database connection failed after timeout.") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
93 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
94 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
95 def fetch_annotations(csv_file, sequence_column, annotation_columns, db_uri, table_name, fragment_column_name, output): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
96 """Fetch annotations from the database and save the result as GenBank files.""" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
97 db_uri = fix_db_uri(db_uri) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
98 df = pd.read_csv(csv_file, sep=',', header=None) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
99 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
100 engine = create_engine(db_uri) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
101 connection = engine.connect() |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
102 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
103 annotated_data = [] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
104 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
105 try: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
106 with connection: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
107 inspector = inspect(engine) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
108 columns = [column['name'] for column in inspector.get_columns(table_name)] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
109 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
110 # Fetch all fragments from the table once |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
111 if fragment_column_name not in columns: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
112 raise ValueError(f"Fragment column '{fragment_column_name}' not found in table '{table_name}'.") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
113 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
114 fragment_column_index = columns.index(fragment_column_name) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
115 all_rows = connection.execute(text(f"SELECT * FROM {table_name}")).fetchall() |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
116 fragment_map = {row[fragment_column_index]: row for row in all_rows} |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
117 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
118 # Compare fragments between CSV and DB |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
119 csv_fragments = set() |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
120 all_ids = set(df[0].dropna().astype(str)) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
121 for _, row in df.iterrows(): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
122 for col in df.columns: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
123 if col != 0: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
124 fragment = row[col] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
125 if pd.notna(fragment): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
126 fragment_str = str(fragment) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
127 if fragment_str not in all_ids: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
128 csv_fragments.add(fragment_str) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
129 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
130 db_fragments = set(fragment_map.keys()) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
131 missing_fragments = sorted(list(csv_fragments - db_fragments)) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
132 if missing_fragments: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
133 raise ValueError( |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
134 f" Missing fragments in DB: {', '.join(missing_fragments)}" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
135 ) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
136 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
137 # === CONTINUE WITH GB FILE CREATION === |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
138 for _, row in df.iterrows(): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
139 annotated_row = {"Backbone": row[0], "Fragments": []} |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
140 for col in df.columns: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
141 if col != 0: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
142 fragment = row[col] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
143 if fragment not in csv_fragments: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
144 continue |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
145 db_row = fragment_map.get(fragment) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
146 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
147 if db_row: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
148 fragment_data = {"id": fragment} |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
149 for i, column_name in enumerate(columns[1:]): # skip ID column |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
150 fragment_data[column_name] = db_row[i + 1] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
151 else: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
152 fragment_data = {"id": fragment, "metadata": "No data found"} |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
153 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
154 annotated_row["Fragments"].append(fragment_data) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
155 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
156 annotated_data.append(annotated_row) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
157 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
158 except Exception as e: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
159 print(f"Error occurred during annotation: {e}") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
160 raise # Ensures the error exits the script |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
161 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
162 # GenBank file generation per fragment |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
163 try: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
164 for annotated_row in annotated_data: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
165 backbone_id = annotated_row["Backbone"] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
166 for fragment in annotated_row["Fragments"]: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
167 fragment_id = fragment["id"] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
168 sequence = fragment.get(sequence_column, "") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
169 annotation = fragment.get(annotation_columns, "") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
170 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
171 # Create the SeqRecord |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
172 record = SeqRecord( |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
173 Seq(sequence), |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
174 id=fragment_id, |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
175 name=fragment_id, |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
176 description=f"Fragment {fragment_id} from Backbone {backbone_id}" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
177 ) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
178 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
179 # Add annotations to GenBank header |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
180 record.annotations = { |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
181 k: str(fragment[k]) for k in annotation_columns if k in fragment |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
182 } |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
183 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
184 # LOCUS line extraction from annotation (copy-paste the LOCUS from annotation) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
185 locus_line_match = re.search(r"LOCUS\s+.+", annotation) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
186 if locus_line_match: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
187 locus_line = locus_line_match.group() |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
188 else: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
189 print(f"LOCUS info missing for fragment {fragment_id}") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
190 locus_line = f"LOCUS {fragment_id: <20} {len(sequence)} bp DNA linear UNK 01-JAN-2025" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
191 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
192 # Format sequence as per GenBank standards (with ORIGIN and line breaks) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
193 if "ORIGIN" in sequence: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
194 origin_block = sequence.strip() |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
195 else: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
196 # Format sequence as per GenBank standards (with ORIGIN and line breaks) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
197 formatted_sequence = "ORIGIN\n" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
198 seq_str = str(record.seq) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
199 for i in range(0, len(seq_str), 60): # 60 bases per line |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
200 line_seq = seq_str[i:i + 60] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
201 formatted_sequence += f"{str(i + 1).rjust(9)} { ' '.join([line_seq[j:j+10] for j in range(0, len(line_seq), 10)]) }\n" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
202 origin_block = formatted_sequence.strip() |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
203 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
204 # Find and copy the FEATURES section directly from annotation |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
205 features_section = "" |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
206 features_start = annotation.find("FEATURES") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
207 if features_start != -1: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
208 features_section = annotation[features_start:] |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
209 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
210 # Writing the GenBank file |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
211 if not os.path.exists(output): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
212 os.makedirs(output) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
213 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
214 gb_filename = os.path.join(output, f"{fragment_id}.gb") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
215 with open(gb_filename, "w") as f: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
216 # Write the LOCUS line |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
217 f.write(locus_line + "\n") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
218 # Write DEFINITION, ACCESSION, and other annotations |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
219 f.write(f"DEFINITION {record.description}\n") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
220 f.write(f"ACCESSION {record.id}\n") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
221 f.write(f"VERSION DB\n") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
222 f.write(f"KEYWORDS .\n") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
223 f.write(f"SOURCE .\n") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
224 # Write the FEATURES section directly from annotation |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
225 f.write(features_section) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
226 # Write the ORIGIN section |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
227 f.write(origin_block + "\n") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
228 f.write("//\n") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
229 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
230 except Exception as e: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
231 print(f"Error saving GenBank files: {e}") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
232 return |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
233 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
234 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
235 def main(): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
236 parser = argparse.ArgumentParser(description="Fetch annotations from PostgreSQL database and save as JSON.") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
237 parser.add_argument("--input", required=True, help="Input CSV file") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
238 parser.add_argument("--sequence_column", required=True, help="DB column contains sequence for ganbank file") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
239 parser.add_argument("--annotation_columns", required=True, help="DB column contains head for ganbank file") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
240 parser.add_argument("--db_uri", required=True, help="Database URI connection string") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
241 parser.add_argument("--table", required=True, help="Table name in the database") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
242 parser.add_argument("--fragment_column", required=True, help="Fragment column name in the database") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
243 parser.add_argument("--output", required=True, help="Output dir for gb files") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
244 args = parser.parse_args() |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
245 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
246 # Wait until the database is ready |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
247 db_uri = fix_db_uri(args.db_uri) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
248 # db_name = extract_db_name(db_uri) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
249 # start_postgres_container(db_name) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
250 MAX_RETRIES = 3 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
251 for attempt in range(1, MAX_RETRIES + 1): |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
252 try: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
253 wait_for_db(db_uri) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
254 break # Success |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
255 except Exception as e: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
256 if attempt == MAX_RETRIES: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
257 print(f"Attempt {attempt} failed: Could not connect to database at {db_uri}.") |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
258 raise e |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
259 else: |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
260 time.sleep(2) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
261 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
262 # Fetch annotations from the database and save as gb |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
263 fetch_annotations(args.input, args.sequence_column, args.annotation_columns, db_uri, args.table, args.fragment_column, args.output) |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
264 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
265 |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
266 if __name__ == "__main__": |
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
tduigou
parents:
diff
changeset
|
267 main() |