# HG changeset patch # User enis # Date 1753195667 0 # Node ID 2ff4a39ea41bc6ffb28d6e3d34a52ffe073faf38 # Parent 0ea626b105572c49563264d792506f830fce5e34 planemo upload commit 1bf6938d35be8e67e317f504f43f281ce7dc06e6 diff -r 0ea626b10557 -r 2ff4a39ea41b Dockerfile --- a/Dockerfile Mon Jul 21 15:13:09 2025 +0000 +++ b/Dockerfile Tue Jul 22 14:47:47 2025 +0000 @@ -1,36 +1,13 @@ FROM google/cloud-sdk:latest -RUN apt-get update && apt-get install -y python3 netcat-openbsd - -# Create galaxy user and home directory -RUN useradd -m -s /bin/bash galaxy - -# Create a directory for our scripts that galaxy user can access -RUN mkdir -p /opt/galaxy-tools && chown galaxy:galaxy /opt/galaxy-tools +RUN apt-get update && apt-get install -y python3 python3-pip netcat-openbsd -# Create gcloud config directory with proper permissions -RUN mkdir -p /home/galaxy/.config/gcloud && \ - chown -R galaxy:galaxy /home/galaxy/.config && \ - chmod -R 777 /home/galaxy/.config +RUN pip3 install --break-system-packages google-cloud-batch -# Also create a temporary gcloud config directory as backup -RUN mkdir -p /tmp/gcloud-config && chmod 777 /tmp/gcloud-config +RUN groupadd -g 10001 galaxy && useradd -u 10001 -g 10001 -m -s /bin/bash galaxy -# Copy files to the galaxy-accessible directory -COPY gcp_batch_netcat.py /opt/galaxy-tools/ -COPY gcp_batch_netcat.xml /opt/galaxy-tools/ +# Add bash alias for ll +RUN echo "alias ll='ls -l'" >> /home/galaxy/.bashrc && \ + chown galaxy:galaxy /home/galaxy/.bashrc -# Ensure galaxy user can execute the script -RUN chown galaxy:galaxy /opt/galaxy-tools/* && chmod +x /opt/galaxy-tools/gcp_batch_netcat.py - -# Switch to galaxy user USER galaxy - -# Set environment variables for gcloud configuration -ENV CLOUDSDK_CONFIG=/tmp/gcloud-config -ENV HOME=/home/galaxy - -# Set working directory -WORKDIR /opt/galaxy-tools - -ENTRYPOINT ["python3", "/opt/galaxy-tools/gcp_batch_netcat.py"] diff -r 0ea626b10557 -r 2ff4a39ea41b LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Tue Jul 22 14:47:47 2025 +0000 @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Enis Afgan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff -r 0ea626b10557 -r 2ff4a39ea41b README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Tue Jul 22 14:47:47 2025 +0000 @@ -0,0 +1,111 @@ +# GCP Batch Netcat Galaxy Tool + +A Galaxy tool that submits a job to Google Cloud Platform (GCP) Batch service to test connectivity to an NFS server using `netcat`. This tool is predominantly intended for use with Galaxy deployments using the Galaxy Helm chart, where it can verify network connectivity between GCP Batch workers and NFS storage systems. + +## Overview + +This tool creates and submits a GCP Batch job that runs a simple network connectivity test to an NFS server using `netcat` (nc). It's particularly useful for: +- Testing network connectivity between GCP Batch compute nodes and NFS storage +- Validating that firewall rules allow communication on port 2049 (NFS) +- Troubleshooting connectivity issues in Galaxy deployments on Kubernetes + +The tool is available in the Main Tool Shed at: +https://toolshed.g2.bx.psu.edu/view/enis/gcp_batch_netcat/ + +## For use with the Galaxy Helm Chart + +This tool is specifically designed for Galaxy deployments using the Galaxy Helm chart on Google Kubernetes Engine (GKE). A sample deployment can be obtained using the [galaxy-k8s-boot repository](https://github.com/galaxyproject/galaxy-k8s-boot/). + +## Input Parameters Reference + +The Galaxy tool interface presents the following parameters: + +### Required Parameters + +#### **GCP Batch Region** +- **Galaxy Label**: "GCP Batch Region" +- **Description**: The GCP region where the Batch job will be submitted +- **Example**: `us-central1` +- **Note**: Choose the region as the Galaxy deployment + +#### **GCP Network name** +- **Galaxy Label**: "GCP Network name" +- **Description**: The name of the GCP VPC network in which Galaxy runs +- **Examples**: `default`, `galaxy-vpc` +- **Important**: The network must allow communication between Batch workers and the Galaxy NFS server + +#### **GCP Subnet name** +- **Galaxy Label**: "GCP Subnet name" +- **Description**: The name of the subnet in which Galaxy runs +- **Examples**: `default` + +#### **GCP Service Account Key File** +- **Galaxy Label**: "GCP Service Account Key File" +- **Format**: JSON file +- **Description**: Upload the JSON key file for a GCP service account with Batch API permissions +- **Required Permissions**: + - Batch Job Editor role (or equivalent permissions) + - Access to the specified network and subnet +- **How to Create**: + 1. Go to GCP Console → IAM & Admin → Service Accounts + 2. Create a new service account or select existing one + 3. Assign "Batch Job Editor" role + 4. Create and download JSON key + +### Optional Parameters + +#### **NFS Server Address** +- **Galaxy Label**: "NFS Server Address" +- **Description**: IP address or hostname of the NFS server to test connectivity to. This is the same address as Galaxy is using. +- **Auto-detection**: If not supplied, the tool attempts to detect the NFS server from Galaxy's database mount. This is the preferred mode of operation. +- **Example**: `10.0.0.100` +- **When to specify**: Use when auto-detection fails or when testing a different NFS server + +#### **GCP Project ID** +- **Galaxy Label**: "GCP Project ID" +- **Description**: The ID of the GCP project where the Batch job should be created +- **Auto-extraction**: If left blank, the project ID is automatically extracted from the service account key file +- **Example**: `my-galaxy-project` + +## Using the Tool in Galaxy + +### What Happens + +The tool will: +- Submit a lightweight job to GCP Batch in your specified region and network +- Test connectivity to the NFS server on port 2049 using `netcat` +- Return a report showing whether the connection was successful + +## Setup Requirements + +Before using this tool in Galaxy, ensure you have: + +### GCP Prerequisites +- A GCP project with the Batch API enabled +- A VPC network and subnet where both Galaxy and the NFS server can communicate +- A service account with "Batch Job Editor" role +- Downloaded JSON key file for the service account + +### Network Configuration +- Firewall rule allowing traffic from the Batch subnet to NFS server on port 2049 for the specified network: +``` +gcloud compute firewall-rules create allow-nfs-from-batch \ + --network=NETWORK_NAME \ + --allow=tcp:2049 +``` + +### NFS server Setup +- The Ganesha NFS service needs to use an internal LoadBalancer +``` +apiVersion: v1 +kind: Service +metadata: + name: nfs-provisioner-nfs-server-provisioner + namespace: nfs-provisioner + annotations: + cloud.google.com/load-balancer-type: "Internal" + ... +spec: + type: LoadBalancer + ... +``` diff -r 0ea626b10557 -r 2ff4a39ea41b batch-netcat.Dockerfile diff -r 0ea626b10557 -r 2ff4a39ea41b gcp_batch_netcat.py --- a/gcp_batch_netcat.py Mon Jul 21 15:13:09 2025 +0000 +++ b/gcp_batch_netcat.py Tue Jul 22 14:47:47 2025 +0000 @@ -1,86 +1,192 @@ - +import argparse import json -import subprocess -import argparse +import logging +import os +import sys +# import time import uuid -import time -import os +from google.cloud import batch_v1 + +# Configure logging to go to stdout instead of stderr to avoid Galaxy marking job as failed +import sys +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + stream=sys.stdout +) +logger = logging.getLogger(__name__) def main(): parser = argparse.ArgumentParser() - parser.add_argument('--nfs_address', required=True) + parser.add_argument('--nfs_address', required=False, help='NFS server address (if not provided, will be auto-detected from /galaxy/server/database/ mount)') parser.add_argument('--output', required=True) - parser.add_argument('--project', required=True) + parser.add_argument('--project', required=False, help='GCP Project ID (if not provided, will be extracted from service account key)') parser.add_argument('--region', required=True) - parser.add_argument('--port', default='2049') + parser.add_argument('--network', default='default', help='GCP Network name') + parser.add_argument('--subnet', default='default', help='GCP Subnet name') parser.add_argument('--service_account_key', required=True) args = parser.parse_args() # Set up authentication using the service account key os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = args.service_account_key + logger.info(f"Authentication configured with service account: {args.service_account_key}") - # Ensure gcloud uses a writable config directory - os.environ['CLOUDSDK_CONFIG'] = '/tmp/gcloud-config' + # Extract GCP project ID from service account key if not provided + if args.project: + project_id = args.project + logger.info(f"Using provided project ID: {project_id}") + else: + try: + with open(args.service_account_key, 'r') as f: + service_account_data = json.load(f) + project_id = service_account_data.get('project_id') + if not project_id: + raise ValueError("project_id not found in service account key file") + logger.info(f"Extracted project ID from service account key: {project_id}") + except Exception as e: + logger.error(f"Failed to extract project ID from service account key: {e}") + raise + + # Extract NFS server address if not provided + if args.nfs_address: + nfs_address = args.nfs_address + logger.info(f"Using provided NFS address: {nfs_address}") + else: + try: + # Try to detect NFS server from /galaxy/server/database/ mount + import subprocess + result = subprocess.run(['mount'], capture_output=True, text=True) + nfs_address = None - # Create the temp config directory if it doesn't exist - os.makedirs('/tmp/gcloud-config', exist_ok=True) + for line in result.stdout.split('\n'): + if '/galaxy/server/database' in line and ':' in line: + # Look for NFS mount pattern: server:/path on /galaxy/server/database + parts = line.split() + for part in parts: + if ':' in part and part.count(':') == 1: + nfs_address = part.split(':')[0] + break + if nfs_address: + logger.info(f"Detected NFS address from mount: {nfs_address}") + break + + if not nfs_address: + # Fallback: try to parse /proc/mounts + try: + with open('/proc/mounts', 'r') as f: + for line in f: + if '/galaxy/server/database' in line and ':' in line: + parts = line.split() + if len(parts) > 0 and ':' in parts[0]: + nfs_address = parts[0].split(':')[0] + logger.info(f"Detected NFS address from /proc/mounts: {nfs_address}") + break + except: + pass + + if not nfs_address: + raise ValueError("Could not auto-detect NFS server address from /galaxy/server/database/ mount") + + logger.info(f"Auto-detected NFS address from mount: {nfs_address}") + except Exception as e: + logger.error(f"Failed to auto-detect NFS address: {e}") + raise + + # time.sleep(10000) job_name = f'netcat-job-{uuid.uuid4()}' + logger.info(f"Generated job name: {job_name}") - job_spec = { - "taskGroups": [ - { - "taskSpec": { - "runnables": [ - { - "script": { - "text": f"/usr/bin/nc -z -v {args.nfs_address} {args.port}" - } - } - ], - "computeResource": { - "cpuMilli": 1000, - "memoryMib": 1024 - }, - "environment": { - "imageUri": "afgane/gcp-batch-netcat:0.1.0" - } - }, - "taskCount": 1, - "parallelism": 1 - } - ], - "logsPolicy": { - "destination": "CLOUD_LOGGING" - } - } + # Create Batch client + logger.info("Creating Batch client...") + client = batch_v1.BatchServiceClient() + logger.info("Batch client created successfully") + + # Define the job using the Python client library objects + logger.info("Building job specification...") + runnable = batch_v1.Runnable() + runnable.container = batch_v1.Runnable.Container() + runnable.container.image_uri = "afgane/gcp-batch-netcat:0.2.0" + runnable.container.entrypoint = "/usr/bin/nc" + runnable.container.commands = ["-z", "-v", nfs_address, "2049"] + logger.debug(f"Container config: image={runnable.container.image_uri}, entrypoint={runnable.container.entrypoint}, commands={runnable.container.commands}") + + task = batch_v1.TaskSpec() + task.runnables = [runnable] + task.compute_resource = batch_v1.ComputeResource() + task.compute_resource.cpu_milli = 1000 + task.compute_resource.memory_mib = 1024 + logger.debug(f"Compute resources: CPU={task.compute_resource.cpu_milli}m, Memory={task.compute_resource.memory_mib}MiB") + + task_group = batch_v1.TaskGroup() + task_group.task_count = 1 + task_group.parallelism = 1 + task_group.task_spec = task + logger.debug(f"Task group: count={task_group.task_count}, parallelism={task_group.parallelism}") + + # Network configuration: Batch job should run in the same network as the NFS server + network_interface = batch_v1.AllocationPolicy.NetworkInterface() + network_interface.network = f"global/networks/{args.network}" + network_interface.subnetwork = f"regions/{args.region}/subnetworks/{args.subnet}" + logger.debug(f"Network: {network_interface.network}") + logger.debug(f"Subnet: {network_interface.subnetwork}") + + network_policy = batch_v1.AllocationPolicy.NetworkPolicy() + network_policy.network_interfaces = [network_interface] + + allocation_policy = batch_v1.AllocationPolicy() + allocation_policy.network = network_policy - job_spec_file = 'job.json' - with open(job_spec_file, 'w') as f: - json.dump(job_spec, f) + job = batch_v1.Job() + job.task_groups = [task_group] + job.allocation_policy = allocation_policy + job.logs_policy = batch_v1.LogsPolicy() + job.logs_policy.destination = batch_v1.LogsPolicy.Destination.CLOUD_LOGGING + logger.info("Job specification built successfully") - command = [ - 'gcloud', 'batch', 'jobs', 'submit', job_name, - '--location', args.region, - '--project', args.project, - '--config', job_spec_file, - '--format=text', - '--verbosity=debug' - ] + create_request = batch_v1.CreateJobRequest() + create_request.parent = f"projects/{project_id}/locations/{args.region}" + create_request.job_id = job_name + create_request.job = job + logger.debug(f"Create request parent: {create_request.parent}") + logger.debug(f"Create request job_id: {create_request.job_id}") + + logger.info(f"Submitting job with name: {job_name}") + logger.info(f"Target project: {project_id}") + logger.info(f"Target Batch region: {args.region}") + logger.info(f"NFS target: {nfs_address}:2049") - # Wait 4 minutes before submitting the job - time.sleep(240) + # Proceed with job submission + try: + logger.info("Calling client.create_job()...") + job_response = client.create_job(request=create_request) + logger.info("Job submitted successfully!") + logger.info(f"Job name: {job_response.name}") + logger.info(f"Job UID: {job_response.uid}") - try: - result = subprocess.run(command, capture_output=True, text=True, check=True) with open(args.output, 'w') as f: - f.write("Job output:\n") - f.write(result.stdout) - f.write(result.stderr) - except subprocess.CalledProcessError as e: + f.write("Job submitted successfully using Python client.\n") + f.write(f"Job name: {job_name}\n") + f.write(f"Job response name: {job_response.name}\n") + f.write(f"Job UID: {job_response.uid}\n") + f.write(f"Project: {project_id}\n") + f.write(f"Region: {args.region}\n") + f.write(f"NFS Address: {nfs_address}:2049\n") + + except Exception as e: + logger.error(f"Error submitting job: {type(e).__name__}: {e}") + logger.error(f"Error details: {str(e)}") + import traceback + logger.error("Traceback:", exc_info=True) + with open(args.output, 'w') as f: - f.write("Error submitting job:\n") - f.write(e.stderr) + f.write(f"Error submitting job: {type(e).__name__}: {e}\n") + f.write(f"Error details: {str(e)}\n") + f.write(f"Job name: {job_name}\n") + f.write(f"Project: {project_id}\n") + f.write(f"Region: {args.region}\n") + f.write(f"Traceback:\n") + f.write(traceback.format_exc()) if __name__ == '__main__': main() diff -r 0ea626b10557 -r 2ff4a39ea41b gcp_batch_netcat.xml --- a/gcp_batch_netcat.xml Mon Jul 21 15:13:09 2025 +0000 +++ b/gcp_batch_netcat.xml Tue Jul 22 14:47:47 2025 +0000 @@ -1,19 +1,20 @@ Submit a job to GCP Batch and connect to an NFS server. - google-cloud-sdk - netcat - afgane/gcp-batch-netcat:0.1.0 + + afgane/gcp-batch-netcat:0.2.0 - - - - + + + + + diff -r 0ea626b10557 -r 2ff4a39ea41b test-data/gcp_batch_netcat_out.txt --- a/test-data/gcp_batch_netcat_out.txt Mon Jul 21 15:13:09 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -Job submitted successfully.