diff gcp_batch_netcat.py @ 6:d25792770df8 draft

planemo upload for repository https://github.com/afgane/gcp_batch_netcat commit ece227052d14d755b0d0b07a827152b2e98fb94b-dirty
author enis
date Thu, 24 Jul 2025 21:59:57 +0000
parents b2ce158b4f22
children fcfb703748b1
line wrap: on
line diff
--- a/gcp_batch_netcat.py	Thu Jul 24 21:41:18 2025 +0000
+++ b/gcp_batch_netcat.py	Thu Jul 24 21:59:57 2025 +0000
@@ -14,129 +14,68 @@
 )
 logger = logging.getLogger(__name__)
 
+def discover_nfs_loadbalancer_ip():
+    """
+    Try to discover NFS LoadBalancer IP via Kubernetes API
+    Returns the external IP if found, None otherwise
+    """
+    try:
+        import subprocess
+        logger.info("Attempting to discover NFS LoadBalancer IP via kubectl...")
+        result = subprocess.run(['kubectl', 'get', 'svc', '-n', 'nfs-provisioner', '-o', 'json'], capture_output=True, text=True)
+        if result.returncode == 0:
+            services = json.loads(result.stdout)
+            for item in services.get('items', []):
+                name = item.get('metadata', {}).get('name', '')
+                # Look for NFS-related service names
+                if any(keyword in name.lower() for keyword in ['nfs-provisioner-nfs-server-provisioner']):
+                    spec = item.get('spec', {})
+                    if spec.get('type') == 'LoadBalancer':
+                        ingress = item.get('status', {}).get('loadBalancer', {}).get('ingress', [])
+                        if ingress:
+                            ip = ingress[0].get('ip')
+                            if ip:
+                                logger.info(f"Found NFS LoadBalancer service '{name}' with external IP: {ip}")
+                                return ip
+            logger.warning("No NFS LoadBalancer services found via kubectl")
+        else:
+            logger.warning(f"kubectl command failed: {result.stderr}")
+    except Exception as e:
+        logger.warning(f"Could not discover NFS LoadBalancer IP via kubectl: {e}")
+    return None
+
 def determine_test_target(args):
     """Determine the target host and port based on test type"""
 
-    if args.test_type == 'custom':
-        if not args.custom_host:
-            raise ValueError("custom_host is required when test_type is 'custom'")
-        return args.custom_host, args.custom_port
-
-    elif args.test_type == 'nfs':
+    if args.test_type == 'nfs':
         # Extract NFS server address if not provided
         if args.nfs_address:
             nfs_address = args.nfs_address
             logger.info(f"Using provided NFS address: {nfs_address}")
         else:
-            try:
-                # Try to detect NFS server from /galaxy/server/database/ mount
-                import subprocess
-                result = subprocess.run(['mount'], capture_output=True, text=True)
-                nfs_address = None
-
-                for line in result.stdout.split('\n'):
-                    if '/galaxy/server/database' in line and ':' in line:
-                        # Look for NFS mount pattern: server:/path on /galaxy/server/database
-                        parts = line.split()
-                        for part in parts:
-                            if ':' in part and part.count(':') == 1:
-                                nfs_address = part.split(':')[0]
-                                break
-                        if nfs_address:
-                            logger.info(f"Detected NFS address from mount: {nfs_address}")
-                            break
-
-                if not nfs_address:
-                    # Fallback: try to parse /proc/mounts
-                    try:
-                        with open('/proc/mounts', 'r') as f:
-                            for line in f:
-                                if '/galaxy/server/database' in line and ':' in line:
-                                    parts = line.split()
-                                    if len(parts) > 0 and ':' in parts[0]:
-                                        nfs_address = parts[0].split(':')[0]
-                                        logger.info(f"Detected NFS address from /proc/mounts: {nfs_address}")
-                                        break
-                    except:
-                        pass
-
-                if not nfs_address:
-                    raise ValueError("Could not auto-detect NFS server address from /galaxy/server/database/ mount")
-
-                logger.info(f"Auto-detected NFS address from mount: {nfs_address}")
-            except Exception as e:
-                logger.error(f"Failed to auto-detect NFS address: {e}")
-                raise
+            # Try to auto-discover NFS LoadBalancer IP via Kubernetes API
+            nfs_address = discover_nfs_loadbalancer_ip()
+            if not nfs_address:
+                raise ValueError("Could not auto-detect NFS LoadBalancer IP. Please provide --nfs_address parameter with the LoadBalancer external IP.")
         return nfs_address, 2049
 
-    elif args.test_type == 'galaxy_web':
-        # Try to detect Galaxy web service
-        try:
-            import subprocess
-            result = subprocess.run(['kubectl', 'get', 'svc', '-o', 'json'], capture_output=True, text=True)
-            if result.returncode == 0:
-                services = json.loads(result.stdout)
-                for item in services.get('items', []):
-                    name = item.get('metadata', {}).get('name', '')
-                    if 'galaxy' in name.lower() and ('web' in name.lower() or 'nginx' in name.lower()):
-                        # Found a Galaxy web service
-                        spec = item.get('spec', {})
-                        if spec.get('type') == 'LoadBalancer':
-                            ingress = item.get('status', {}).get('loadBalancer', {}).get('ingress', [])
-                            if ingress:
-                                ip = ingress[0].get('ip')
-                                if ip:
-                                    port = 80
-                                    for port_spec in spec.get('ports', []):
-                                        if port_spec.get('port'):
-                                            port = port_spec['port']
-                                            break
-                                    logger.info(f"Found Galaxy web service LoadBalancer: {ip}:{port}")
-                                    return ip, port
-                        # Fallback to ClusterIP
-                        cluster_ip = spec.get('clusterIP')
-                        if cluster_ip and cluster_ip != 'None':
-                            port = 80
-                            for port_spec in spec.get('ports', []):
-                                if port_spec.get('port'):
-                                    port = port_spec['port']
-                                    break
-                            logger.info(f"Found Galaxy web service ClusterIP: {cluster_ip}:{port}")
-                            return cluster_ip, port
-        except Exception as e:
-            logger.warning(f"Could not auto-detect Galaxy web service: {e}")
-
-        # Fallback: try common Galaxy service names
-        common_hosts = ['galaxy-web', 'galaxy-nginx', 'galaxy']
-        logger.info(f"Trying common Galaxy service name: {common_hosts[0]}")
-        return common_hosts[0], 80
-
-    elif args.test_type == 'k8s_dns':
-        # Test Kubernetes DNS resolution
-        return 'kubernetes.default.svc.cluster.local', 443
-
-    elif args.test_type == 'google_dns':
-        # Test external connectivity
-        return '8.8.8.8', 53
-
     else:
         raise ValueError(f"Unsupported test type: {args.test_type}")
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--nfs_address', required=False, help='NFS server address (if not provided, will be auto-detected from /galaxy/server/database/ mount)')
+    parser.add_argument('--nfs_address', required=False, help='NFS server LoadBalancer IP address (if not provided, will be auto-detected via Kubernetes API)')
     parser.add_argument('--output', required=True)
     parser.add_argument('--project', required=False, help='GCP Project ID (if not provided, will be extracted from service account key)')
     parser.add_argument('--region', required=True)
     parser.add_argument('--network', default='default', help='GCP Network name')
     parser.add_argument('--subnet', default='default', help='GCP Subnet name')
     parser.add_argument('--service_account_key', required=True)
-    parser.add_argument('--test_type', default='nfs', choices=['nfs', 'galaxy_web', 'k8s_dns', 'google_dns', 'custom'],
-                       help='Type of connectivity test to perform')
-    parser.add_argument('--custom_host', required=False, help='Custom host to test (required if test_type is custom)')
-    parser.add_argument('--custom_port', type=int, default=80, help='Custom port to test (default: 80)')
     args = parser.parse_args()
 
+    # Default to NFS test type since that's what this tool is for
+    args.test_type = 'nfs'
+
     # Set up authentication using the service account key
     os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = args.service_account_key
     logger.info(f"Authentication configured with service account: {args.service_account_key}")
@@ -182,8 +121,7 @@
     # Create a comprehensive test script
     test_script = f'''#!/bin/bash
 set -e
-echo "=== GCP Batch Connectivity Test ==="
-echo "Test Type: {args.test_type}"
+echo "=== GCP Batch NFS Connectivity Test ==="
 echo "Target: {target_host}:{target_port}"
 echo "Timestamp: $(date)"
 echo "Container hostname: $(hostname)"
@@ -214,8 +152,8 @@
 echo ""
 
 # Basic connectivity test
-echo "=== Primary Connectivity Test ==="
-echo "Testing connection to {target_host}:{target_port}..."
+echo "=== Primary NFS Connectivity Test ==="
+echo "Testing connection to NFS server {target_host}:{target_port}..."
 timeout 30 nc -z -v -w 10 {target_host} {target_port}
 nc_result=$?
 echo "Netcat result: $nc_result"
@@ -223,32 +161,31 @@
 
 # Additional connectivity tests
 echo "=== Additional Connectivity Tests ==="
-echo "Testing Google DNS (8.8.8.8:53):"
+echo "Testing external connectivity (Google DNS 8.8.8.8:53):"
 timeout 10 nc -z -v -w 5 8.8.8.8 53 && echo "✓ External DNS reachable" || echo "✗ External DNS unreachable"
 
-echo "Testing Kubernetes API (if accessible):"
-timeout 10 nc -z -v -w 5 kubernetes.default.svc.cluster.local 443 2>/dev/null && echo "✓ Kubernetes API reachable" || echo "✗ Kubernetes API unreachable"
-
 echo ""
 echo "=== Network Troubleshooting ==="
 echo "Route table:"
 ip route
 echo ""
-echo "ARP table:"
-arp -a 2>/dev/null || echo "ARP command not available"
-echo ""
 
 echo "=== Final Result ==="
 if [ $nc_result -eq 0 ]; then
-    echo "✓ SUCCESS: Connection to {target_host}:{target_port} successful"
+    echo "✓ SUCCESS: Connection to NFS server {target_host}:{target_port} successful"
     exit 0
 else
-    echo "✗ FAILED: Connection to {target_host}:{target_port} failed"
-    echo "This suggests a network connectivity issue between GCP Batch and the target service."
+    echo "✗ FAILED: Connection to NFS server {target_host}:{target_port} failed"
+    echo "This suggests a network connectivity issue between GCP Batch and the NFS server."
     echo "Common causes:"
-    echo "- Firewall rules blocking traffic"
-    echo "- Service not accessible from external networks"
-    echo "- Target service only accepting internal cluster traffic"
+    echo "- Firewall rules blocking NFS traffic (port 2049)"
+    echo "- NFS service not accessible from external networks (only ClusterIP)"
+    echo "- NFS server not properly exposed via LoadBalancer"
+    echo ""
+    echo "Solutions:"
+    echo "- Ensure NFS service has type LoadBalancer with external IP"
+    echo "- Check GCP firewall rules allow traffic from Batch subnet to NFS"
+    echo "- Verify the IP address is the LoadBalancer external IP, not ClusterIP"
     exit 1
 fi
 '''
@@ -317,8 +254,7 @@
             f.write(f"Job UID: {job_response.uid}\n")
             f.write(f"Project: {project_id}\n")
             f.write(f"Region: {args.region}\n")
-            f.write(f"Test Type: {args.test_type}\n")
-            f.write(f"Target: {target_host}:{target_port}\n")
+            f.write(f"NFS Target: {target_host}:{target_port}\n")
             f.write(f"\nTo view job logs, run:\n")
             f.write(f"gcloud logging read 'resource.type=gce_instance AND resource.labels.instance_id={job_name}' --project={project_id}\n")
 
@@ -334,8 +270,7 @@
             f.write(f"Job name: {job_name}\n")
             f.write(f"Project: {project_id}\n")
             f.write(f"Region: {args.region}\n")
-            f.write(f"Test Type: {args.test_type}\n")
-            f.write(f"Target: {target_host}:{target_port}\n")
+            f.write(f"NFS Target: {target_host}:{target_port}\n")
             f.write(f"Traceback:\n")
             f.write(traceback.format_exc())