changeset 4:2ff4a39ea41b draft

planemo upload commit 1bf6938d35be8e67e317f504f43f281ce7dc06e6
author enis
date Tue, 22 Jul 2025 14:47:47 +0000
parents 0ea626b10557
children b2ce158b4f22
files Dockerfile LICENSE README.md batch-netcat.Dockerfile gcp_batch_netcat.py gcp_batch_netcat.xml test-data/gcp_batch_netcat_out.txt
diffstat 6 files changed, 313 insertions(+), 98 deletions(-) [+]
line wrap: on
line diff
--- a/Dockerfile	Mon Jul 21 15:13:09 2025 +0000
+++ b/Dockerfile	Tue Jul 22 14:47:47 2025 +0000
@@ -1,36 +1,13 @@
 FROM google/cloud-sdk:latest
 
-RUN apt-get update && apt-get install -y python3 netcat-openbsd
-
-# Create galaxy user and home directory
-RUN useradd -m -s /bin/bash galaxy
-
-# Create a directory for our scripts that galaxy user can access
-RUN mkdir -p /opt/galaxy-tools && chown galaxy:galaxy /opt/galaxy-tools
+RUN apt-get update && apt-get install -y python3 python3-pip netcat-openbsd
 
-# Create gcloud config directory with proper permissions
-RUN mkdir -p /home/galaxy/.config/gcloud && \
-    chown -R galaxy:galaxy /home/galaxy/.config && \
-    chmod -R 777 /home/galaxy/.config
+RUN pip3 install --break-system-packages google-cloud-batch
 
-# Also create a temporary gcloud config directory as backup
-RUN mkdir -p /tmp/gcloud-config && chmod 777 /tmp/gcloud-config
+RUN groupadd -g 10001 galaxy && useradd -u 10001 -g 10001 -m -s /bin/bash galaxy
 
-# Copy files to the galaxy-accessible directory
-COPY gcp_batch_netcat.py /opt/galaxy-tools/
-COPY gcp_batch_netcat.xml /opt/galaxy-tools/
+# Add bash alias for ll
+RUN echo "alias ll='ls -l'" >> /home/galaxy/.bashrc && \
+    chown galaxy:galaxy /home/galaxy/.bashrc
 
-# Ensure galaxy user can execute the script
-RUN chown galaxy:galaxy /opt/galaxy-tools/* && chmod +x /opt/galaxy-tools/gcp_batch_netcat.py
-
-# Switch to galaxy user
 USER galaxy
-
-# Set environment variables for gcloud configuration
-ENV CLOUDSDK_CONFIG=/tmp/gcloud-config
-ENV HOME=/home/galaxy
-
-# Set working directory
-WORKDIR /opt/galaxy-tools
-
-ENTRYPOINT ["python3", "/opt/galaxy-tools/gcp_batch_netcat.py"]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Tue Jul 22 14:47:47 2025 +0000
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Enis Afgan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Tue Jul 22 14:47:47 2025 +0000
@@ -0,0 +1,111 @@
+# GCP Batch Netcat Galaxy Tool
+
+A Galaxy tool that submits a job to Google Cloud Platform (GCP) Batch service to test connectivity to an NFS server using `netcat`. This tool is predominantly intended for use with Galaxy deployments using the Galaxy Helm chart, where it can verify network connectivity between GCP Batch workers and NFS storage systems.
+
+## Overview
+
+This tool creates and submits a GCP Batch job that runs a simple network connectivity test to an NFS server using `netcat` (nc). It's particularly useful for:
+- Testing network connectivity between GCP Batch compute nodes and NFS storage
+- Validating that firewall rules allow communication on port 2049 (NFS)
+- Troubleshooting connectivity issues in Galaxy deployments on Kubernetes
+
+The tool is available in the Main Tool Shed at:
+https://toolshed.g2.bx.psu.edu/view/enis/gcp_batch_netcat/
+
+## For use with the Galaxy Helm Chart
+
+This tool is specifically designed for Galaxy deployments using the Galaxy Helm chart on Google Kubernetes Engine (GKE). A sample deployment can be obtained using the [galaxy-k8s-boot repository](https://github.com/galaxyproject/galaxy-k8s-boot/).
+
+## Input Parameters Reference
+
+The Galaxy tool interface presents the following parameters:
+
+### Required Parameters
+
+#### **GCP Batch Region**
+- **Galaxy Label**: "GCP Batch Region"
+- **Description**: The GCP region where the Batch job will be submitted
+- **Example**: `us-central1`
+- **Note**: Choose the region as the Galaxy deployment
+
+#### **GCP Network name**
+- **Galaxy Label**: "GCP Network name"
+- **Description**: The name of the GCP VPC network in which Galaxy runs
+- **Examples**: `default`, `galaxy-vpc`
+- **Important**: The network must allow communication between Batch workers and the Galaxy NFS server
+
+#### **GCP Subnet name**
+- **Galaxy Label**: "GCP Subnet name"
+- **Description**: The name of the subnet in which Galaxy runs
+- **Examples**: `default`
+
+#### **GCP Service Account Key File**
+- **Galaxy Label**: "GCP Service Account Key File"
+- **Format**: JSON file
+- **Description**: Upload the JSON key file for a GCP service account with Batch API permissions
+- **Required Permissions**:
+  - Batch Job Editor role (or equivalent permissions)
+  - Access to the specified network and subnet
+- **How to Create**:
+  1. Go to GCP Console → IAM & Admin → Service Accounts
+  2. Create a new service account or select existing one
+  3. Assign "Batch Job Editor" role
+  4. Create and download JSON key
+
+### Optional Parameters
+
+#### **NFS Server Address**
+- **Galaxy Label**: "NFS Server Address"
+- **Description**: IP address or hostname of the NFS server to test connectivity to. This is the same address as Galaxy is using.
+- **Auto-detection**: If not supplied, the tool attempts to detect the NFS server from Galaxy's database mount. This is the preferred mode of operation.
+- **Example**: `10.0.0.100`
+- **When to specify**: Use when auto-detection fails or when testing a different NFS server
+
+#### **GCP Project ID**
+- **Galaxy Label**: "GCP Project ID"
+- **Description**: The ID of the GCP project where the Batch job should be created
+- **Auto-extraction**: If left blank, the project ID is automatically extracted from the service account key file
+- **Example**: `my-galaxy-project`
+
+## Using the Tool in Galaxy
+
+### What Happens
+
+The tool will:
+- Submit a lightweight job to GCP Batch in your specified region and network
+- Test connectivity to the NFS server on port 2049 using `netcat`
+- Return a report showing whether the connection was successful
+
+## Setup Requirements
+
+Before using this tool in Galaxy, ensure you have:
+
+### GCP Prerequisites
+- A GCP project with the Batch API enabled
+- A VPC network and subnet where both Galaxy and the NFS server can communicate
+- A service account with "Batch Job Editor" role
+- Downloaded JSON key file for the service account
+
+### Network Configuration
+- Firewall rule allowing traffic from the Batch subnet to NFS server on port 2049 for the specified network:
+```
+gcloud compute firewall-rules create allow-nfs-from-batch \
+  --network=NETWORK_NAME \
+  --allow=tcp:2049
+```
+
+### NFS server Setup
+- The Ganesha NFS service needs to use an internal LoadBalancer
+```
+apiVersion: v1
+kind: Service
+metadata:
+  name: nfs-provisioner-nfs-server-provisioner
+  namespace: nfs-provisioner
+  annotations:
+    cloud.google.com/load-balancer-type: "Internal"
+  ...
+spec:
+  type: LoadBalancer
+  ...
+```
--- a/gcp_batch_netcat.py	Mon Jul 21 15:13:09 2025 +0000
+++ b/gcp_batch_netcat.py	Tue Jul 22 14:47:47 2025 +0000
@@ -1,86 +1,192 @@
-
+import argparse
 import json
-import subprocess
-import argparse
+import logging
+import os
+import sys
+# import time
 import uuid
-import time
-import os
+from google.cloud import batch_v1
+
+# Configure logging to go to stdout instead of stderr to avoid Galaxy marking job as failed
+import sys
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    stream=sys.stdout
+)
+logger = logging.getLogger(__name__)
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--nfs_address', required=True)
+    parser.add_argument('--nfs_address', required=False, help='NFS server address (if not provided, will be auto-detected from /galaxy/server/database/ mount)')
     parser.add_argument('--output', required=True)
-    parser.add_argument('--project', required=True)
+    parser.add_argument('--project', required=False, help='GCP Project ID (if not provided, will be extracted from service account key)')
     parser.add_argument('--region', required=True)
-    parser.add_argument('--port', default='2049')
+    parser.add_argument('--network', default='default', help='GCP Network name')
+    parser.add_argument('--subnet', default='default', help='GCP Subnet name')
     parser.add_argument('--service_account_key', required=True)
     args = parser.parse_args()
 
     # Set up authentication using the service account key
     os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = args.service_account_key
+    logger.info(f"Authentication configured with service account: {args.service_account_key}")
 
-    # Ensure gcloud uses a writable config directory
-    os.environ['CLOUDSDK_CONFIG'] = '/tmp/gcloud-config'
+    # Extract GCP project ID from service account key if not provided
+    if args.project:
+        project_id = args.project
+        logger.info(f"Using provided project ID: {project_id}")
+    else:
+        try:
+            with open(args.service_account_key, 'r') as f:
+                service_account_data = json.load(f)
+            project_id = service_account_data.get('project_id')
+            if not project_id:
+                raise ValueError("project_id not found in service account key file")
+            logger.info(f"Extracted project ID from service account key: {project_id}")
+        except Exception as e:
+            logger.error(f"Failed to extract project ID from service account key: {e}")
+            raise
+
+    # Extract NFS server address if not provided
+    if args.nfs_address:
+        nfs_address = args.nfs_address
+        logger.info(f"Using provided NFS address: {nfs_address}")
+    else:
+        try:
+            # Try to detect NFS server from /galaxy/server/database/ mount
+            import subprocess
+            result = subprocess.run(['mount'], capture_output=True, text=True)
+            nfs_address = None
 
-    # Create the temp config directory if it doesn't exist
-    os.makedirs('/tmp/gcloud-config', exist_ok=True)
+            for line in result.stdout.split('\n'):
+                if '/galaxy/server/database' in line and ':' in line:
+                    # Look for NFS mount pattern: server:/path on /galaxy/server/database
+                    parts = line.split()
+                    for part in parts:
+                        if ':' in part and part.count(':') == 1:
+                            nfs_address = part.split(':')[0]
+                            break
+                    if nfs_address:
+                        logger.info(f"Detected NFS address from mount: {nfs_address}")
+                        break
+
+            if not nfs_address:
+                # Fallback: try to parse /proc/mounts
+                try:
+                    with open('/proc/mounts', 'r') as f:
+                        for line in f:
+                            if '/galaxy/server/database' in line and ':' in line:
+                                parts = line.split()
+                                if len(parts) > 0 and ':' in parts[0]:
+                                    nfs_address = parts[0].split(':')[0]
+                                    logger.info(f"Detected NFS address from /proc/mounts: {nfs_address}")
+                                    break
+                except:
+                    pass
+
+            if not nfs_address:
+                raise ValueError("Could not auto-detect NFS server address from /galaxy/server/database/ mount")
+
+            logger.info(f"Auto-detected NFS address from mount: {nfs_address}")
+        except Exception as e:
+            logger.error(f"Failed to auto-detect NFS address: {e}")
+            raise
+
+    # time.sleep(10000)
 
     job_name = f'netcat-job-{uuid.uuid4()}'
+    logger.info(f"Generated job name: {job_name}")
 
-    job_spec = {
-        "taskGroups": [
-            {
-                "taskSpec": {
-                    "runnables": [
-                        {
-                            "script": {
-                                "text": f"/usr/bin/nc -z -v {args.nfs_address} {args.port}"
-                            }
-                        }
-                    ],
-                    "computeResource": {
-                        "cpuMilli": 1000,
-                        "memoryMib": 1024
-                    },
-                    "environment": {
-                        "imageUri": "afgane/gcp-batch-netcat:0.1.0"
-                    }
-                },
-                "taskCount": 1,
-                "parallelism": 1
-            }
-        ],
-        "logsPolicy": {
-            "destination": "CLOUD_LOGGING"
-        }
-    }
+    # Create Batch client
+    logger.info("Creating Batch client...")
+    client = batch_v1.BatchServiceClient()
+    logger.info("Batch client created successfully")
+
+    # Define the job using the Python client library objects
+    logger.info("Building job specification...")
+    runnable = batch_v1.Runnable()
+    runnable.container = batch_v1.Runnable.Container()
+    runnable.container.image_uri = "afgane/gcp-batch-netcat:0.2.0"
+    runnable.container.entrypoint = "/usr/bin/nc"
+    runnable.container.commands = ["-z", "-v", nfs_address, "2049"]
+    logger.debug(f"Container config: image={runnable.container.image_uri}, entrypoint={runnable.container.entrypoint}, commands={runnable.container.commands}")
+
+    task = batch_v1.TaskSpec()
+    task.runnables = [runnable]
+    task.compute_resource = batch_v1.ComputeResource()
+    task.compute_resource.cpu_milli = 1000
+    task.compute_resource.memory_mib = 1024
+    logger.debug(f"Compute resources: CPU={task.compute_resource.cpu_milli}m, Memory={task.compute_resource.memory_mib}MiB")
+
+    task_group = batch_v1.TaskGroup()
+    task_group.task_count = 1
+    task_group.parallelism = 1
+    task_group.task_spec = task
+    logger.debug(f"Task group: count={task_group.task_count}, parallelism={task_group.parallelism}")
+
+    # Network configuration: Batch job should run in the same network as the NFS server
+    network_interface = batch_v1.AllocationPolicy.NetworkInterface()
+    network_interface.network = f"global/networks/{args.network}"
+    network_interface.subnetwork = f"regions/{args.region}/subnetworks/{args.subnet}"
+    logger.debug(f"Network: {network_interface.network}")
+    logger.debug(f"Subnet: {network_interface.subnetwork}")
+
+    network_policy = batch_v1.AllocationPolicy.NetworkPolicy()
+    network_policy.network_interfaces = [network_interface]
+
+    allocation_policy = batch_v1.AllocationPolicy()
+    allocation_policy.network = network_policy
 
-    job_spec_file = 'job.json'
-    with open(job_spec_file, 'w') as f:
-        json.dump(job_spec, f)
+    job = batch_v1.Job()
+    job.task_groups = [task_group]
+    job.allocation_policy = allocation_policy
+    job.logs_policy = batch_v1.LogsPolicy()
+    job.logs_policy.destination = batch_v1.LogsPolicy.Destination.CLOUD_LOGGING
+    logger.info("Job specification built successfully")
 
-    command = [
-        'gcloud', 'batch', 'jobs', 'submit', job_name,
-        '--location', args.region,
-        '--project', args.project,
-        '--config', job_spec_file,
-        '--format=text',
-        '--verbosity=debug'
-    ]
+    create_request = batch_v1.CreateJobRequest()
+    create_request.parent = f"projects/{project_id}/locations/{args.region}"
+    create_request.job_id = job_name
+    create_request.job = job
+    logger.debug(f"Create request parent: {create_request.parent}")
+    logger.debug(f"Create request job_id: {create_request.job_id}")
+
+    logger.info(f"Submitting job with name: {job_name}")
+    logger.info(f"Target project: {project_id}")
+    logger.info(f"Target Batch region: {args.region}")
+    logger.info(f"NFS target: {nfs_address}:2049")
 
-    # Wait 4 minutes before submitting the job
-    time.sleep(240)
+    # Proceed with job submission
+    try:
+        logger.info("Calling client.create_job()...")
+        job_response = client.create_job(request=create_request)
+        logger.info("Job submitted successfully!")
+        logger.info(f"Job name: {job_response.name}")
+        logger.info(f"Job UID: {job_response.uid}")
 
-    try:
-        result = subprocess.run(command, capture_output=True, text=True, check=True)
         with open(args.output, 'w') as f:
-            f.write("Job output:\n")
-            f.write(result.stdout)
-            f.write(result.stderr)
-    except subprocess.CalledProcessError as e:
+            f.write("Job submitted successfully using Python client.\n")
+            f.write(f"Job name: {job_name}\n")
+            f.write(f"Job response name: {job_response.name}\n")
+            f.write(f"Job UID: {job_response.uid}\n")
+            f.write(f"Project: {project_id}\n")
+            f.write(f"Region: {args.region}\n")
+            f.write(f"NFS Address: {nfs_address}:2049\n")
+
+    except Exception as e:
+        logger.error(f"Error submitting job: {type(e).__name__}: {e}")
+        logger.error(f"Error details: {str(e)}")
+        import traceback
+        logger.error("Traceback:", exc_info=True)
+
         with open(args.output, 'w') as f:
-            f.write("Error submitting job:\n")
-            f.write(e.stderr)
+            f.write(f"Error submitting job: {type(e).__name__}: {e}\n")
+            f.write(f"Error details: {str(e)}\n")
+            f.write(f"Job name: {job_name}\n")
+            f.write(f"Project: {project_id}\n")
+            f.write(f"Region: {args.region}\n")
+            f.write(f"Traceback:\n")
+            f.write(traceback.format_exc())
 
 if __name__ == '__main__':
     main()
--- a/gcp_batch_netcat.xml	Mon Jul 21 15:13:09 2025 +0000
+++ b/gcp_batch_netcat.xml	Tue Jul 22 14:47:47 2025 +0000
@@ -1,19 +1,20 @@
 <tool id="gcp_batch_netcat" name="GCP Batch Netcat" version="0.1.1">
     <description>Submit a job to GCP Batch and connect to an NFS server.</description>
     <requirements>
-        <requirement type="package" version="438.0.0">google-cloud-sdk</requirement>
-        <requirement type="package" version="0.7.1">netcat</requirement>
-        <container type="docker">afgane/gcp-batch-netcat:0.1.0</container>
+        <!-- <requirement type="package" version="529.0.0">google-cloud-sdk</requirement>
+        <requirement type="package" version="0.7.1">netcat</requirement> -->
+        <container type="docker">afgane/gcp-batch-netcat:0.2.0</container>
     </requirements>
     <command><![CDATA[
-python3 '$__tool_directory__/gcp_batch_netcat.py' --nfs_address '$nfs_address' --output '$output' --project '$project' --region '$region' --port '$port' --service_account_key '$service_account_key'
+python3 '$__tool_directory__/gcp_batch_netcat.py' --nfs_address '$nfs_address' --output '$output' --project '$project' --region '$region' --service_account_key '$service_account_key' --network '$network' --subnet '$subnet'
     ]]></command>
     <inputs>
-        <param name="nfs_address" type="text" label="NFS Server Address" optional="false"/>
-        <param name="project" type="text" label="GCP Project ID" optional="false"/>
-        <param name="region" type="text" label="GCP Region" optional="false"/>
-        <param name="port" type="integer" value="2049" label="Port"/>
+        <param name="region" type="text" label="GCP Batch Region" optional="false"/>
+        <param name="network" type="text" label="GCP Network name" optional="false"/>
+        <param name="subnet" type="text" label="GCP Subnet name" optional="false"/>
+        <param name="nfs_address" type="text" label="NFS Server Address" help="The address of the NFS server to connect to. If not provided, will be auto-detected." />
         <param name="service_account_key" type="data" format="json" label="GCP Service Account Key File" help="JSON key file for GCP service account with Batch API permissions"/>
+        <param name="project" type="text" label="GCP Project ID" help="The ID of the GCP project to use. If not provided, will be extracted from the service account key."/>
     </inputs>
     <outputs>
         <data name="output" format="txt"/>
--- a/test-data/gcp_batch_netcat_out.txt	Mon Jul 21 15:13:09 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-Job submitted successfully.