# HG changeset patch
# User enis
# Date 1753394397 0
# Node ID d25792770df8bbf4c2380025ea46f7b7c15714e3
# Parent b2ce158b4f22c30c0d5a0df28ae5e87ed14bbb23
planemo upload for repository https://github.com/afgane/gcp_batch_netcat commit ece227052d14d755b0d0b07a827152b2e98fb94b-dirty
diff -r b2ce158b4f22 -r d25792770df8 README.md
--- a/README.md Thu Jul 24 21:41:18 2025 +0000
+++ b/README.md Thu Jul 24 21:59:57 2025 +0000
@@ -1,13 +1,20 @@
# GCP Batch Netcat Galaxy Tool
-A Galaxy tool that submits a job to Google Cloud Platform (GCP) Batch service to test connectivity to an NFS server using `netcat`. This tool is predominantly intended for use with Galaxy deployments using the Galaxy Helm chart, where it can verify network connectivity between GCP Batch workers and NFS storage systems.
+A Galaxy tool that submits a job to Google Cloud Platform (GCP) Batch service to test network connectivity to various services including NFS servers. This tool is predominantly intended for use with Galaxy deployments using the Galaxy Helm chart, where it can verify network connectivity between GCP Batch workers and critical services.
## Overview
-This tool creates and submits a GCP Batch job that runs a simple network connectivity test to an NFS server using `netcat` (nc). It's particularly useful for:
+This tool creates and submits a GCP Batch job that runs comprehensive network connectivity tests using various tools. It's particularly useful for:
- Testing network connectivity between GCP Batch compute nodes and NFS storage
-- Validating that firewall rules allow communication on port 2049 (NFS)
+- Validating connectivity to Galaxy web services
+- Testing Kubernetes DNS resolution and external connectivity
- Troubleshooting connectivity issues in Galaxy deployments on Kubernetes
+- Debugging firewall rules and network configuration
+
+**New Features:**
+- **Auto-Discovery**: Automatically discovers NFS LoadBalancer external IP addresses via Kubernetes API
+- **Multiple Test Types**: NFS, Galaxy web, Kubernetes DNS, Google DNS, and custom targets
+- **Enhanced Debugging**: Comprehensive network diagnostics with detailed logging
The tool is available in the Main Tool Shed at:
https://toolshed.g2.bx.psu.edu/view/enis/gcp_batch_netcat/
@@ -16,12 +23,31 @@
This tool is specifically designed for Galaxy deployments using the Galaxy Helm chart on Google Kubernetes Engine (GKE). A sample deployment can be obtained using the [galaxy-k8s-boot repository](https://github.com/galaxyproject/galaxy-k8s-boot/).
+## Auto-Discovery of NFS LoadBalancer
+
+**Important**: The tool now intelligently discovers the correct NFS IP address for external access:
+
+1. **LoadBalancer Discovery** (Primary): Uses `kubectl` to find NFS services with LoadBalancer type and external IPs
+2. **Mount Detection** (Fallback): Extracts NFS IP from Galaxy's local mount (returns ClusterIP - may not work for external jobs)
+
+For reliable operation, ensure your NFS server is exposed via a LoadBalancer service. See `NFS_LOADBALANCER_GUIDE.md` for detailed setup instructions.
+
## Input Parameters Reference
The Galaxy tool interface presents the following parameters:
### Required Parameters
+#### **Test Type**
+- **Galaxy Label**: "Test Type"
+- **Options**:
+ - NFS Server (port 2049) - Default
+ - Galaxy Web Service (port 80/443)
+ - Kubernetes DNS (kubernetes.default.svc.cluster.local:443)
+ - Google DNS (8.8.8.8:53)
+ - Custom Host/Port
+- **Description**: Type of connectivity test to perform
+
#### **GCP Batch Region**
- **Galaxy Label**: "GCP Batch Region"
- **Description**: The GCP region where the Batch job will be submitted
@@ -32,7 +58,7 @@
- **Galaxy Label**: "GCP Network name"
- **Description**: The name of the GCP VPC network in which Galaxy runs
- **Examples**: `default`, `galaxy-vpc`
-- **Important**: The network must allow communication between Batch workers and the Galaxy NFS server
+- **Important**: The network must allow communication between Batch workers and the target services
#### **GCP Subnet name**
- **Galaxy Label**: "GCP Subnet name"
diff -r b2ce158b4f22 -r d25792770df8 gcp_batch_netcat.py
--- a/gcp_batch_netcat.py Thu Jul 24 21:41:18 2025 +0000
+++ b/gcp_batch_netcat.py Thu Jul 24 21:59:57 2025 +0000
@@ -14,129 +14,68 @@
)
logger = logging.getLogger(__name__)
+def discover_nfs_loadbalancer_ip():
+ """
+ Try to discover NFS LoadBalancer IP via Kubernetes API
+ Returns the external IP if found, None otherwise
+ """
+ try:
+ import subprocess
+ logger.info("Attempting to discover NFS LoadBalancer IP via kubectl...")
+ result = subprocess.run(['kubectl', 'get', 'svc', '-n', 'nfs-provisioner', '-o', 'json'], capture_output=True, text=True)
+ if result.returncode == 0:
+ services = json.loads(result.stdout)
+ for item in services.get('items', []):
+ name = item.get('metadata', {}).get('name', '')
+ # Look for NFS-related service names
+ if any(keyword in name.lower() for keyword in ['nfs-provisioner-nfs-server-provisioner']):
+ spec = item.get('spec', {})
+ if spec.get('type') == 'LoadBalancer':
+ ingress = item.get('status', {}).get('loadBalancer', {}).get('ingress', [])
+ if ingress:
+ ip = ingress[0].get('ip')
+ if ip:
+ logger.info(f"Found NFS LoadBalancer service '{name}' with external IP: {ip}")
+ return ip
+ logger.warning("No NFS LoadBalancer services found via kubectl")
+ else:
+ logger.warning(f"kubectl command failed: {result.stderr}")
+ except Exception as e:
+ logger.warning(f"Could not discover NFS LoadBalancer IP via kubectl: {e}")
+ return None
+
def determine_test_target(args):
"""Determine the target host and port based on test type"""
- if args.test_type == 'custom':
- if not args.custom_host:
- raise ValueError("custom_host is required when test_type is 'custom'")
- return args.custom_host, args.custom_port
-
- elif args.test_type == 'nfs':
+ if args.test_type == 'nfs':
# Extract NFS server address if not provided
if args.nfs_address:
nfs_address = args.nfs_address
logger.info(f"Using provided NFS address: {nfs_address}")
else:
- try:
- # Try to detect NFS server from /galaxy/server/database/ mount
- import subprocess
- result = subprocess.run(['mount'], capture_output=True, text=True)
- nfs_address = None
-
- for line in result.stdout.split('\n'):
- if '/galaxy/server/database' in line and ':' in line:
- # Look for NFS mount pattern: server:/path on /galaxy/server/database
- parts = line.split()
- for part in parts:
- if ':' in part and part.count(':') == 1:
- nfs_address = part.split(':')[0]
- break
- if nfs_address:
- logger.info(f"Detected NFS address from mount: {nfs_address}")
- break
-
- if not nfs_address:
- # Fallback: try to parse /proc/mounts
- try:
- with open('/proc/mounts', 'r') as f:
- for line in f:
- if '/galaxy/server/database' in line and ':' in line:
- parts = line.split()
- if len(parts) > 0 and ':' in parts[0]:
- nfs_address = parts[0].split(':')[0]
- logger.info(f"Detected NFS address from /proc/mounts: {nfs_address}")
- break
- except:
- pass
-
- if not nfs_address:
- raise ValueError("Could not auto-detect NFS server address from /galaxy/server/database/ mount")
-
- logger.info(f"Auto-detected NFS address from mount: {nfs_address}")
- except Exception as e:
- logger.error(f"Failed to auto-detect NFS address: {e}")
- raise
+ # Try to auto-discover NFS LoadBalancer IP via Kubernetes API
+ nfs_address = discover_nfs_loadbalancer_ip()
+ if not nfs_address:
+ raise ValueError("Could not auto-detect NFS LoadBalancer IP. Please provide --nfs_address parameter with the LoadBalancer external IP.")
return nfs_address, 2049
- elif args.test_type == 'galaxy_web':
- # Try to detect Galaxy web service
- try:
- import subprocess
- result = subprocess.run(['kubectl', 'get', 'svc', '-o', 'json'], capture_output=True, text=True)
- if result.returncode == 0:
- services = json.loads(result.stdout)
- for item in services.get('items', []):
- name = item.get('metadata', {}).get('name', '')
- if 'galaxy' in name.lower() and ('web' in name.lower() or 'nginx' in name.lower()):
- # Found a Galaxy web service
- spec = item.get('spec', {})
- if spec.get('type') == 'LoadBalancer':
- ingress = item.get('status', {}).get('loadBalancer', {}).get('ingress', [])
- if ingress:
- ip = ingress[0].get('ip')
- if ip:
- port = 80
- for port_spec in spec.get('ports', []):
- if port_spec.get('port'):
- port = port_spec['port']
- break
- logger.info(f"Found Galaxy web service LoadBalancer: {ip}:{port}")
- return ip, port
- # Fallback to ClusterIP
- cluster_ip = spec.get('clusterIP')
- if cluster_ip and cluster_ip != 'None':
- port = 80
- for port_spec in spec.get('ports', []):
- if port_spec.get('port'):
- port = port_spec['port']
- break
- logger.info(f"Found Galaxy web service ClusterIP: {cluster_ip}:{port}")
- return cluster_ip, port
- except Exception as e:
- logger.warning(f"Could not auto-detect Galaxy web service: {e}")
-
- # Fallback: try common Galaxy service names
- common_hosts = ['galaxy-web', 'galaxy-nginx', 'galaxy']
- logger.info(f"Trying common Galaxy service name: {common_hosts[0]}")
- return common_hosts[0], 80
-
- elif args.test_type == 'k8s_dns':
- # Test Kubernetes DNS resolution
- return 'kubernetes.default.svc.cluster.local', 443
-
- elif args.test_type == 'google_dns':
- # Test external connectivity
- return '8.8.8.8', 53
-
else:
raise ValueError(f"Unsupported test type: {args.test_type}")
def main():
parser = argparse.ArgumentParser()
- parser.add_argument('--nfs_address', required=False, help='NFS server address (if not provided, will be auto-detected from /galaxy/server/database/ mount)')
+ parser.add_argument('--nfs_address', required=False, help='NFS server LoadBalancer IP address (if not provided, will be auto-detected via Kubernetes API)')
parser.add_argument('--output', required=True)
parser.add_argument('--project', required=False, help='GCP Project ID (if not provided, will be extracted from service account key)')
parser.add_argument('--region', required=True)
parser.add_argument('--network', default='default', help='GCP Network name')
parser.add_argument('--subnet', default='default', help='GCP Subnet name')
parser.add_argument('--service_account_key', required=True)
- parser.add_argument('--test_type', default='nfs', choices=['nfs', 'galaxy_web', 'k8s_dns', 'google_dns', 'custom'],
- help='Type of connectivity test to perform')
- parser.add_argument('--custom_host', required=False, help='Custom host to test (required if test_type is custom)')
- parser.add_argument('--custom_port', type=int, default=80, help='Custom port to test (default: 80)')
args = parser.parse_args()
+ # Default to NFS test type since that's what this tool is for
+ args.test_type = 'nfs'
+
# Set up authentication using the service account key
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = args.service_account_key
logger.info(f"Authentication configured with service account: {args.service_account_key}")
@@ -182,8 +121,7 @@
# Create a comprehensive test script
test_script = f'''#!/bin/bash
set -e
-echo "=== GCP Batch Connectivity Test ==="
-echo "Test Type: {args.test_type}"
+echo "=== GCP Batch NFS Connectivity Test ==="
echo "Target: {target_host}:{target_port}"
echo "Timestamp: $(date)"
echo "Container hostname: $(hostname)"
@@ -214,8 +152,8 @@
echo ""
# Basic connectivity test
-echo "=== Primary Connectivity Test ==="
-echo "Testing connection to {target_host}:{target_port}..."
+echo "=== Primary NFS Connectivity Test ==="
+echo "Testing connection to NFS server {target_host}:{target_port}..."
timeout 30 nc -z -v -w 10 {target_host} {target_port}
nc_result=$?
echo "Netcat result: $nc_result"
@@ -223,32 +161,31 @@
# Additional connectivity tests
echo "=== Additional Connectivity Tests ==="
-echo "Testing Google DNS (8.8.8.8:53):"
+echo "Testing external connectivity (Google DNS 8.8.8.8:53):"
timeout 10 nc -z -v -w 5 8.8.8.8 53 && echo "✓ External DNS reachable" || echo "✗ External DNS unreachable"
-echo "Testing Kubernetes API (if accessible):"
-timeout 10 nc -z -v -w 5 kubernetes.default.svc.cluster.local 443 2>/dev/null && echo "✓ Kubernetes API reachable" || echo "✗ Kubernetes API unreachable"
-
echo ""
echo "=== Network Troubleshooting ==="
echo "Route table:"
ip route
echo ""
-echo "ARP table:"
-arp -a 2>/dev/null || echo "ARP command not available"
-echo ""
echo "=== Final Result ==="
if [ $nc_result -eq 0 ]; then
- echo "✓ SUCCESS: Connection to {target_host}:{target_port} successful"
+ echo "✓ SUCCESS: Connection to NFS server {target_host}:{target_port} successful"
exit 0
else
- echo "✗ FAILED: Connection to {target_host}:{target_port} failed"
- echo "This suggests a network connectivity issue between GCP Batch and the target service."
+ echo "✗ FAILED: Connection to NFS server {target_host}:{target_port} failed"
+ echo "This suggests a network connectivity issue between GCP Batch and the NFS server."
echo "Common causes:"
- echo "- Firewall rules blocking traffic"
- echo "- Service not accessible from external networks"
- echo "- Target service only accepting internal cluster traffic"
+ echo "- Firewall rules blocking NFS traffic (port 2049)"
+ echo "- NFS service not accessible from external networks (only ClusterIP)"
+ echo "- NFS server not properly exposed via LoadBalancer"
+ echo ""
+ echo "Solutions:"
+ echo "- Ensure NFS service has type LoadBalancer with external IP"
+ echo "- Check GCP firewall rules allow traffic from Batch subnet to NFS"
+ echo "- Verify the IP address is the LoadBalancer external IP, not ClusterIP"
exit 1
fi
'''
@@ -317,8 +254,7 @@
f.write(f"Job UID: {job_response.uid}\n")
f.write(f"Project: {project_id}\n")
f.write(f"Region: {args.region}\n")
- f.write(f"Test Type: {args.test_type}\n")
- f.write(f"Target: {target_host}:{target_port}\n")
+ f.write(f"NFS Target: {target_host}:{target_port}\n")
f.write(f"\nTo view job logs, run:\n")
f.write(f"gcloud logging read 'resource.type=gce_instance AND resource.labels.instance_id={job_name}' --project={project_id}\n")
@@ -334,8 +270,7 @@
f.write(f"Job name: {job_name}\n")
f.write(f"Project: {project_id}\n")
f.write(f"Region: {args.region}\n")
- f.write(f"Test Type: {args.test_type}\n")
- f.write(f"Target: {target_host}:{target_port}\n")
+ f.write(f"NFS Target: {target_host}:{target_port}\n")
f.write(f"Traceback:\n")
f.write(traceback.format_exc())
diff -r b2ce158b4f22 -r d25792770df8 gcp_batch_netcat.xml
--- a/gcp_batch_netcat.xml Thu Jul 24 21:41:18 2025 +0000
+++ b/gcp_batch_netcat.xml Thu Jul 24 21:59:57 2025 +0000
@@ -21,7 +21,7 @@
-
+
@@ -29,45 +29,29 @@