Mercurial > repos > enis > gcp_batch_netcat
diff gcp_batch_netcat.py @ 6:d25792770df8 draft
planemo upload for repository https://github.com/afgane/gcp_batch_netcat commit ece227052d14d755b0d0b07a827152b2e98fb94b-dirty
author | enis |
---|---|
date | Thu, 24 Jul 2025 21:59:57 +0000 |
parents | b2ce158b4f22 |
children | fcfb703748b1 |
line wrap: on
line diff
--- a/gcp_batch_netcat.py Thu Jul 24 21:41:18 2025 +0000 +++ b/gcp_batch_netcat.py Thu Jul 24 21:59:57 2025 +0000 @@ -14,129 +14,68 @@ ) logger = logging.getLogger(__name__) +def discover_nfs_loadbalancer_ip(): + """ + Try to discover NFS LoadBalancer IP via Kubernetes API + Returns the external IP if found, None otherwise + """ + try: + import subprocess + logger.info("Attempting to discover NFS LoadBalancer IP via kubectl...") + result = subprocess.run(['kubectl', 'get', 'svc', '-n', 'nfs-provisioner', '-o', 'json'], capture_output=True, text=True) + if result.returncode == 0: + services = json.loads(result.stdout) + for item in services.get('items', []): + name = item.get('metadata', {}).get('name', '') + # Look for NFS-related service names + if any(keyword in name.lower() for keyword in ['nfs-provisioner-nfs-server-provisioner']): + spec = item.get('spec', {}) + if spec.get('type') == 'LoadBalancer': + ingress = item.get('status', {}).get('loadBalancer', {}).get('ingress', []) + if ingress: + ip = ingress[0].get('ip') + if ip: + logger.info(f"Found NFS LoadBalancer service '{name}' with external IP: {ip}") + return ip + logger.warning("No NFS LoadBalancer services found via kubectl") + else: + logger.warning(f"kubectl command failed: {result.stderr}") + except Exception as e: + logger.warning(f"Could not discover NFS LoadBalancer IP via kubectl: {e}") + return None + def determine_test_target(args): """Determine the target host and port based on test type""" - if args.test_type == 'custom': - if not args.custom_host: - raise ValueError("custom_host is required when test_type is 'custom'") - return args.custom_host, args.custom_port - - elif args.test_type == 'nfs': + if args.test_type == 'nfs': # Extract NFS server address if not provided if args.nfs_address: nfs_address = args.nfs_address logger.info(f"Using provided NFS address: {nfs_address}") else: - try: - # Try to detect NFS server from /galaxy/server/database/ mount - import subprocess - result = subprocess.run(['mount'], capture_output=True, text=True) - nfs_address = None - - for line in result.stdout.split('\n'): - if '/galaxy/server/database' in line and ':' in line: - # Look for NFS mount pattern: server:/path on /galaxy/server/database - parts = line.split() - for part in parts: - if ':' in part and part.count(':') == 1: - nfs_address = part.split(':')[0] - break - if nfs_address: - logger.info(f"Detected NFS address from mount: {nfs_address}") - break - - if not nfs_address: - # Fallback: try to parse /proc/mounts - try: - with open('/proc/mounts', 'r') as f: - for line in f: - if '/galaxy/server/database' in line and ':' in line: - parts = line.split() - if len(parts) > 0 and ':' in parts[0]: - nfs_address = parts[0].split(':')[0] - logger.info(f"Detected NFS address from /proc/mounts: {nfs_address}") - break - except: - pass - - if not nfs_address: - raise ValueError("Could not auto-detect NFS server address from /galaxy/server/database/ mount") - - logger.info(f"Auto-detected NFS address from mount: {nfs_address}") - except Exception as e: - logger.error(f"Failed to auto-detect NFS address: {e}") - raise + # Try to auto-discover NFS LoadBalancer IP via Kubernetes API + nfs_address = discover_nfs_loadbalancer_ip() + if not nfs_address: + raise ValueError("Could not auto-detect NFS LoadBalancer IP. Please provide --nfs_address parameter with the LoadBalancer external IP.") return nfs_address, 2049 - elif args.test_type == 'galaxy_web': - # Try to detect Galaxy web service - try: - import subprocess - result = subprocess.run(['kubectl', 'get', 'svc', '-o', 'json'], capture_output=True, text=True) - if result.returncode == 0: - services = json.loads(result.stdout) - for item in services.get('items', []): - name = item.get('metadata', {}).get('name', '') - if 'galaxy' in name.lower() and ('web' in name.lower() or 'nginx' in name.lower()): - # Found a Galaxy web service - spec = item.get('spec', {}) - if spec.get('type') == 'LoadBalancer': - ingress = item.get('status', {}).get('loadBalancer', {}).get('ingress', []) - if ingress: - ip = ingress[0].get('ip') - if ip: - port = 80 - for port_spec in spec.get('ports', []): - if port_spec.get('port'): - port = port_spec['port'] - break - logger.info(f"Found Galaxy web service LoadBalancer: {ip}:{port}") - return ip, port - # Fallback to ClusterIP - cluster_ip = spec.get('clusterIP') - if cluster_ip and cluster_ip != 'None': - port = 80 - for port_spec in spec.get('ports', []): - if port_spec.get('port'): - port = port_spec['port'] - break - logger.info(f"Found Galaxy web service ClusterIP: {cluster_ip}:{port}") - return cluster_ip, port - except Exception as e: - logger.warning(f"Could not auto-detect Galaxy web service: {e}") - - # Fallback: try common Galaxy service names - common_hosts = ['galaxy-web', 'galaxy-nginx', 'galaxy'] - logger.info(f"Trying common Galaxy service name: {common_hosts[0]}") - return common_hosts[0], 80 - - elif args.test_type == 'k8s_dns': - # Test Kubernetes DNS resolution - return 'kubernetes.default.svc.cluster.local', 443 - - elif args.test_type == 'google_dns': - # Test external connectivity - return '8.8.8.8', 53 - else: raise ValueError(f"Unsupported test type: {args.test_type}") def main(): parser = argparse.ArgumentParser() - parser.add_argument('--nfs_address', required=False, help='NFS server address (if not provided, will be auto-detected from /galaxy/server/database/ mount)') + parser.add_argument('--nfs_address', required=False, help='NFS server LoadBalancer IP address (if not provided, will be auto-detected via Kubernetes API)') parser.add_argument('--output', required=True) parser.add_argument('--project', required=False, help='GCP Project ID (if not provided, will be extracted from service account key)') parser.add_argument('--region', required=True) parser.add_argument('--network', default='default', help='GCP Network name') parser.add_argument('--subnet', default='default', help='GCP Subnet name') parser.add_argument('--service_account_key', required=True) - parser.add_argument('--test_type', default='nfs', choices=['nfs', 'galaxy_web', 'k8s_dns', 'google_dns', 'custom'], - help='Type of connectivity test to perform') - parser.add_argument('--custom_host', required=False, help='Custom host to test (required if test_type is custom)') - parser.add_argument('--custom_port', type=int, default=80, help='Custom port to test (default: 80)') args = parser.parse_args() + # Default to NFS test type since that's what this tool is for + args.test_type = 'nfs' + # Set up authentication using the service account key os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = args.service_account_key logger.info(f"Authentication configured with service account: {args.service_account_key}") @@ -182,8 +121,7 @@ # Create a comprehensive test script test_script = f'''#!/bin/bash set -e -echo "=== GCP Batch Connectivity Test ===" -echo "Test Type: {args.test_type}" +echo "=== GCP Batch NFS Connectivity Test ===" echo "Target: {target_host}:{target_port}" echo "Timestamp: $(date)" echo "Container hostname: $(hostname)" @@ -214,8 +152,8 @@ echo "" # Basic connectivity test -echo "=== Primary Connectivity Test ===" -echo "Testing connection to {target_host}:{target_port}..." +echo "=== Primary NFS Connectivity Test ===" +echo "Testing connection to NFS server {target_host}:{target_port}..." timeout 30 nc -z -v -w 10 {target_host} {target_port} nc_result=$? echo "Netcat result: $nc_result" @@ -223,32 +161,31 @@ # Additional connectivity tests echo "=== Additional Connectivity Tests ===" -echo "Testing Google DNS (8.8.8.8:53):" +echo "Testing external connectivity (Google DNS 8.8.8.8:53):" timeout 10 nc -z -v -w 5 8.8.8.8 53 && echo "✓ External DNS reachable" || echo "✗ External DNS unreachable" -echo "Testing Kubernetes API (if accessible):" -timeout 10 nc -z -v -w 5 kubernetes.default.svc.cluster.local 443 2>/dev/null && echo "✓ Kubernetes API reachable" || echo "✗ Kubernetes API unreachable" - echo "" echo "=== Network Troubleshooting ===" echo "Route table:" ip route echo "" -echo "ARP table:" -arp -a 2>/dev/null || echo "ARP command not available" -echo "" echo "=== Final Result ===" if [ $nc_result -eq 0 ]; then - echo "✓ SUCCESS: Connection to {target_host}:{target_port} successful" + echo "✓ SUCCESS: Connection to NFS server {target_host}:{target_port} successful" exit 0 else - echo "✗ FAILED: Connection to {target_host}:{target_port} failed" - echo "This suggests a network connectivity issue between GCP Batch and the target service." + echo "✗ FAILED: Connection to NFS server {target_host}:{target_port} failed" + echo "This suggests a network connectivity issue between GCP Batch and the NFS server." echo "Common causes:" - echo "- Firewall rules blocking traffic" - echo "- Service not accessible from external networks" - echo "- Target service only accepting internal cluster traffic" + echo "- Firewall rules blocking NFS traffic (port 2049)" + echo "- NFS service not accessible from external networks (only ClusterIP)" + echo "- NFS server not properly exposed via LoadBalancer" + echo "" + echo "Solutions:" + echo "- Ensure NFS service has type LoadBalancer with external IP" + echo "- Check GCP firewall rules allow traffic from Batch subnet to NFS" + echo "- Verify the IP address is the LoadBalancer external IP, not ClusterIP" exit 1 fi ''' @@ -317,8 +254,7 @@ f.write(f"Job UID: {job_response.uid}\n") f.write(f"Project: {project_id}\n") f.write(f"Region: {args.region}\n") - f.write(f"Test Type: {args.test_type}\n") - f.write(f"Target: {target_host}:{target_port}\n") + f.write(f"NFS Target: {target_host}:{target_port}\n") f.write(f"\nTo view job logs, run:\n") f.write(f"gcloud logging read 'resource.type=gce_instance AND resource.labels.instance_id={job_name}' --project={project_id}\n") @@ -334,8 +270,7 @@ f.write(f"Job name: {job_name}\n") f.write(f"Project: {project_id}\n") f.write(f"Region: {args.region}\n") - f.write(f"Test Type: {args.test_type}\n") - f.write(f"Target: {target_host}:{target_port}\n") + f.write(f"NFS Target: {target_host}:{target_port}\n") f.write(f"Traceback:\n") f.write(traceback.format_exc())