Mercurial > repos > tduigou > get_from_db
annotate get_db_info.py @ 0:1769c133986b draft default tip
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
| author | tduigou | 
|---|---|
| date | Wed, 11 Jun 2025 09:42:52 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 0 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 1 import subprocess | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 2 import argparse | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 3 import time | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 4 import os | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 5 import socket | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 6 import re | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 7 from Bio.Seq import Seq | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 8 import pandas as pd | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 9 from Bio.SeqRecord import SeqRecord | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 10 from sqlalchemy import create_engine, inspect | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 11 from sqlalchemy.engine.url import make_url | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 12 from sqlalchemy.sql import text | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 13 from sqlalchemy.exc import OperationalError | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 14 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 15 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 16 def fix_db_uri(uri): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 17 """Replace __at__ with @ in the URI if needed.""" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 18 return uri.replace("__at__", "@") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 19 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 20 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 21 def is_port_in_use(uri): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 22 """Check if a TCP port is already in use on host.""" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 23 url = make_url(uri) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 24 host = url.host | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 25 port = url.port | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 26 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 27 s.settimeout(2) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 28 return s.connect_ex((host, port)) == 0 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 29 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 30 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 31 def extract_db_name(uri): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 32 """Extract the database name from the SQLAlchemy URI.""" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 33 url = make_url(uri) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 34 return url.database | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 35 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 36 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 37 # this fuction is to activate the Docker id the DB is in container. BUT IT IS NOT USED IN MAIN() | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 38 def start_postgres_container(db_name): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 39 """Start a PostgreSQL container with the given database name as the container name.""" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 40 container_name = db_name | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 41 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 42 # Check if container is already running | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 43 container_running = subprocess.run( | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 44 f"docker ps -q -f name={container_name}", shell=True, capture_output=True, text=True | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 45 ) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 46 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 47 if container_running.stdout.strip(): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 48 print(f"Container '{container_name}' is already running.") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 49 return | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 50 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 51 # Check if container exists (stopped) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 52 container_exists = subprocess.run( | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 53 f"docker ps -a -q -f name={container_name}", shell=True, capture_output=True, text=True | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 54 ) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 55 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 56 if container_exists.stdout.strip(): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 57 print(f"Starting existing container '{container_name}'...") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 58 subprocess.run(f"docker start {container_name}", shell=True) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 59 print(f"PostgreSQL Docker container '{container_name}' activated.") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 60 return | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 61 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 62 # If container does not exist, create and start a new one | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 63 port = 5432 if not is_port_in_use(5432) else 5433 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 64 postgres_password = os.getenv("POSTGRES_PASSWORD", "RK17") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 65 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 66 start_command = [ | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 67 "docker", "run", "--name", container_name, | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 68 "-e", f"POSTGRES_PASSWORD={postgres_password}", | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 69 "-p", f"{port}:5432", | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 70 "-d", "postgres" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 71 ] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 72 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 73 try: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 74 subprocess.run(start_command, check=True) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 75 print(f"PostgreSQL Docker container '{container_name}' started on port {port}.") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 76 except subprocess.CalledProcessError as e: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 77 print(f"Failed to start Docker container: {e}") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 78 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 79 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 80 def wait_for_db(uri, timeout=60): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 81 """Try connecting to the DB until it works or timeout.""" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 82 engine = create_engine(uri) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 83 start_time = time.time() | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 84 while time.time() - start_time < timeout: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 85 try: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 86 with engine.connect(): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 87 print("Connected to database.") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 88 return | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 89 except OperationalError: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 90 print("Database not ready, retrying...") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 91 time.sleep(2) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 92 raise Exception("Database connection failed after timeout.") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 93 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 94 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 95 def fetch_annotations(csv_file, sequence_column, annotation_columns, db_uri, table_name, fragment_column_name, output): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 96 """Fetch annotations from the database and save the result as GenBank files.""" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 97 db_uri = fix_db_uri(db_uri) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 98 df = pd.read_csv(csv_file, sep=',', header=None) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 99 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 100 engine = create_engine(db_uri) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 101 connection = engine.connect() | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 102 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 103 annotated_data = [] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 104 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 105 try: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 106 with connection: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 107 inspector = inspect(engine) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 108 columns = [column['name'] for column in inspector.get_columns(table_name)] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 109 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 110 # Fetch all fragments from the table once | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 111 if fragment_column_name not in columns: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 112 raise ValueError(f"Fragment column '{fragment_column_name}' not found in table '{table_name}'.") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 113 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 114 fragment_column_index = columns.index(fragment_column_name) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 115 all_rows = connection.execute(text(f"SELECT * FROM {table_name}")).fetchall() | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 116 fragment_map = {row[fragment_column_index]: row for row in all_rows} | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 117 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 118 # Compare fragments between CSV and DB | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 119 csv_fragments = set() | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 120 all_ids = set(df[0].dropna().astype(str)) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 121 for _, row in df.iterrows(): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 122 for col in df.columns: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 123 if col != 0: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 124 fragment = row[col] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 125 if pd.notna(fragment): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 126 fragment_str = str(fragment) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 127 if fragment_str not in all_ids: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 128 csv_fragments.add(fragment_str) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 129 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 130 db_fragments = set(fragment_map.keys()) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 131 missing_fragments = sorted(list(csv_fragments - db_fragments)) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 132 if missing_fragments: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 133 raise ValueError( | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 134 f" Missing fragments in DB: {', '.join(missing_fragments)}" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 135 ) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 136 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 137 # === CONTINUE WITH GB FILE CREATION === | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 138 for _, row in df.iterrows(): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 139 annotated_row = {"Backbone": row[0], "Fragments": []} | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 140 for col in df.columns: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 141 if col != 0: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 142 fragment = row[col] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 143 if fragment not in csv_fragments: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 144 continue | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 145 db_row = fragment_map.get(fragment) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 146 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 147 if db_row: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 148 fragment_data = {"id": fragment} | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 149 for i, column_name in enumerate(columns[1:]): # skip ID column | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 150 fragment_data[column_name] = db_row[i + 1] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 151 else: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 152 fragment_data = {"id": fragment, "metadata": "No data found"} | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 153 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 154 annotated_row["Fragments"].append(fragment_data) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 155 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 156 annotated_data.append(annotated_row) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 157 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 158 except Exception as e: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 159 print(f"Error occurred during annotation: {e}") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 160 raise # Ensures the error exits the script | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 161 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 162 # GenBank file generation per fragment | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 163 try: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 164 for annotated_row in annotated_data: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 165 backbone_id = annotated_row["Backbone"] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 166 for fragment in annotated_row["Fragments"]: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 167 fragment_id = fragment["id"] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 168 sequence = fragment.get(sequence_column, "") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 169 annotation = fragment.get(annotation_columns, "") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 170 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 171 # Create the SeqRecord | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 172 record = SeqRecord( | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 173 Seq(sequence), | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 174 id=fragment_id, | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 175 name=fragment_id, | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 176 description=f"Fragment {fragment_id} from Backbone {backbone_id}" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 177 ) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 178 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 179 # Add annotations to GenBank header | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 180 record.annotations = { | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 181 k: str(fragment[k]) for k in annotation_columns if k in fragment | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 182 } | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 183 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 184 # LOCUS line extraction from annotation (copy-paste the LOCUS from annotation) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 185 locus_line_match = re.search(r"LOCUS\s+.+", annotation) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 186 if locus_line_match: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 187 locus_line = locus_line_match.group() | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 188 else: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 189 print(f"LOCUS info missing for fragment {fragment_id}") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 190 locus_line = f"LOCUS {fragment_id: <20} {len(sequence)} bp DNA linear UNK 01-JAN-2025" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 191 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 192 # Format sequence as per GenBank standards (with ORIGIN and line breaks) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 193 if "ORIGIN" in sequence: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 194 origin_block = sequence.strip() | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 195 else: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 196 # Format sequence as per GenBank standards (with ORIGIN and line breaks) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 197 formatted_sequence = "ORIGIN\n" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 198 seq_str = str(record.seq) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 199 for i in range(0, len(seq_str), 60): # 60 bases per line | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 200 line_seq = seq_str[i:i + 60] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 201 formatted_sequence += f"{str(i + 1).rjust(9)} { ' '.join([line_seq[j:j+10] for j in range(0, len(line_seq), 10)]) }\n" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 202 origin_block = formatted_sequence.strip() | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 203 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 204 # Find and copy the FEATURES section directly from annotation | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 205 features_section = "" | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 206 features_start = annotation.find("FEATURES") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 207 if features_start != -1: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 208 features_section = annotation[features_start:] | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 209 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 210 # Writing the GenBank file | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 211 if not os.path.exists(output): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 212 os.makedirs(output) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 213 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 214 gb_filename = os.path.join(output, f"{fragment_id}.gb") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 215 with open(gb_filename, "w") as f: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 216 # Write the LOCUS line | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 217 f.write(locus_line + "\n") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 218 # Write DEFINITION, ACCESSION, and other annotations | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 219 f.write(f"DEFINITION {record.description}\n") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 220 f.write(f"ACCESSION {record.id}\n") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 221 f.write(f"VERSION DB\n") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 222 f.write(f"KEYWORDS .\n") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 223 f.write(f"SOURCE .\n") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 224 # Write the FEATURES section directly from annotation | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 225 f.write(features_section) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 226 # Write the ORIGIN section | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 227 f.write(origin_block + "\n") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 228 f.write("//\n") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 229 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 230 except Exception as e: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 231 print(f"Error saving GenBank files: {e}") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 232 return | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 233 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 234 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 235 def main(): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 236 parser = argparse.ArgumentParser(description="Fetch annotations from PostgreSQL database and save as JSON.") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 237 parser.add_argument("--input", required=True, help="Input CSV file") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 238 parser.add_argument("--sequence_column", required=True, help="DB column contains sequence for ganbank file") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 239 parser.add_argument("--annotation_columns", required=True, help="DB column contains head for ganbank file") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 240 parser.add_argument("--db_uri", required=True, help="Database URI connection string") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 241 parser.add_argument("--table", required=True, help="Table name in the database") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 242 parser.add_argument("--fragment_column", required=True, help="Fragment column name in the database") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 243 parser.add_argument("--output", required=True, help="Output dir for gb files") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 244 args = parser.parse_args() | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 245 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 246 # Wait until the database is ready | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 247 db_uri = fix_db_uri(args.db_uri) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 248 # db_name = extract_db_name(db_uri) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 249 # start_postgres_container(db_name) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 250 MAX_RETRIES = 3 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 251 for attempt in range(1, MAX_RETRIES + 1): | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 252 try: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 253 wait_for_db(db_uri) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 254 break # Success | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 255 except Exception as e: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 256 if attempt == MAX_RETRIES: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 257 print(f"Attempt {attempt} failed: Could not connect to database at {db_uri}.") | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 258 raise e | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 259 else: | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 260 time.sleep(2) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 261 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 262 # Fetch annotations from the database and save as gb | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 263 fetch_annotations(args.input, args.sequence_column, args.annotation_columns, db_uri, args.table, args.fragment_column, args.output) | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 264 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 265 | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 266 if __name__ == "__main__": | 
| 
1769c133986b
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
 tduigou parents: diff
changeset | 267 main() | 
