comparison gcp_batch_netcat.py @ 9:3fd12035e6c9 draft

planemo upload for repository https://github.com/afgane/gcp_batch_netcat commit 4194f0defc4bebd6c95c76b58860c9ae6b9d1899-dirty
author enis
date Sun, 10 Aug 2025 21:32:04 +0000
parents fcfb703748b1
children cdfa2e1a7ef4
comparison
equal deleted inserted replaced
8:7c660a6be068 9:3fd12035e6c9
79 # Create Batch client 79 # Create Batch client
80 logger.info("Creating Batch client...") 80 logger.info("Creating Batch client...")
81 client = batch_v1.BatchServiceClient() 81 client = batch_v1.BatchServiceClient()
82 logger.info("Batch client created successfully") 82 logger.info("Batch client created successfully")
83 83
84 # Define the job using the Python client library objects
85 logger.info("Building job specification...")
86 runnable = batch_v1.Runnable()
87 runnable.container = batch_v1.Runnable.Container()
88 runnable.container.image_uri = "afgane/gcp-batch-netcat:0.2.0"
89
90 # Create a comprehensive test script 84 # Create a comprehensive test script
91 test_script = f'''#!/bin/bash 85 test_script = f'''#!/bin/bash
92 set -e 86 set -e
93 echo "=== GCP Batch NFS Connectivity Test ===" 87 echo "=== GCP Batch NFS Connectivity Test ==="
94 echo "Target: {target_host}:{target_port}" 88 echo "Target: {target_host}:{target_port}"
95 echo "Timestamp: $(date)" 89 echo "Timestamp: $(date)"
96 echo "Container hostname: $(hostname)" 90 echo "Container hostname: $(hostname)"
91 echo "Host VM Image: galaxy-k8s-boot-v2025-08-10"
92 echo "Container Image: afgane/gcp-batch-netcat:0.2.0"
93 echo ""
94
95 # Basic system info
96 echo "=== System Information ==="
97 echo "OS Release:"
98 cat /etc/os-release | head -5 2>/dev/null || echo "OS release info not available"
99 echo "Kernel version:"
100 uname -r
101 echo "Architecture:"
102 uname -m
97 echo "" 103 echo ""
98 104
99 # Basic network info 105 # Basic network info
100 echo "=== Network Information ===" 106 echo "=== Network Information ==="
101 echo "Container IP addresses:" 107 echo "Container IP addresses:"
126 timeout 30 nc -z -v -w 10 {target_host} {target_port} 132 timeout 30 nc -z -v -w 10 {target_host} {target_port}
127 nc_result=$? 133 nc_result=$?
128 echo "Netcat result: $nc_result" 134 echo "Netcat result: $nc_result"
129 echo "" 135 echo ""
130 136
137 # NFS client capabilities
138 echo "=== NFS Client Information ==="
139 echo "NFS client version:"
140 /sbin/mount.nfs -V 2>/dev/null || echo "mount.nfs not available"
141 echo "RPC services:"
142 rpcinfo -p 2>/dev/null || echo "rpcinfo not available"
143 echo ""
144
131 # Additional connectivity tests 145 # Additional connectivity tests
132 echo "=== Additional Connectivity Tests ===" 146 echo "=== Additional Connectivity Tests ==="
133 echo "Testing external connectivity (Google DNS 8.8.8.8:53):" 147 echo "Testing external connectivity (Google DNS 8.8.8.8:53):"
134 timeout 10 nc -z -v -w 5 8.8.8.8 53 && echo "✓ External DNS reachable" || echo "✗ External DNS unreachable" 148 timeout 10 nc -z -v -w 5 8.8.8.8 53 && echo "✓ External DNS reachable" || echo "✗ External DNS unreachable"
135 149
137 echo "=== Network Troubleshooting ===" 151 echo "=== Network Troubleshooting ==="
138 echo "Route table:" 152 echo "Route table:"
139 ip route 153 ip route
140 echo "" 154 echo ""
141 155
156 # NFS Mount Test
157 echo "=== NFS Mount Test ==="
158 MOUNT_POINT="/tmp/nfs_test_mount"
159 echo "Creating mount point: $MOUNT_POINT"
160 mkdir -p "$MOUNT_POINT"
161
162 echo "Attempting to mount NFS share..."
163 echo "Command: mount -t nfs -o vers=3,tcp {target_host}:/ $MOUNT_POINT"
164
165 # Try mounting the NFS share
166 mount_result=1
167 if mount -t nfs -o vers=3,tcp {target_host}:/ "$MOUNT_POINT" 2>&1; then
168 mount_result=0
169 echo "✓ NFS mount successful!"
170
171 echo ""
172 echo "=== NFS Share Contents ==="
173 echo "Long listing of NFS share root:"
174 ls -la "$MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents"
175
176 echo ""
177 echo "Disk usage of NFS share:"
178 df -h "$MOUNT_POINT" 2>/dev/null || echo "Could not get disk usage"
179
180 echo ""
181 echo "Mount information:"
182 mount | grep "$MOUNT_POINT" || echo "Mount info not found"
183
184 # Try to find common Galaxy directories
185 echo ""
186 echo "=== Looking for Galaxy directories ==="
187 for dir in "database" "database/files" "database/objects" "tools" "shed_tools"; do
188 if [ -d "$MOUNT_POINT/$dir" ]; then
189 echo "✓ Found: $dir"
190 ls -la "$MOUNT_POINT/$dir" | head -10
191 else
192 echo "✗ Not found: $dir"
193 fi
194 done
195
196 echo ""
197 echo "Unmounting NFS share..."
198 umount "$MOUNT_POINT" 2>/dev/null && echo "✓ Unmount successful" || echo "✗ Unmount failed"
199 else
200 echo "✗ NFS mount failed"
201 echo "Mount error details above"
202
203 # Try alternative mount options
204 echo ""
205 echo "Trying alternative NFS mount options..."
206 echo "Command: mount -t nfs -o vers=4,tcp {target_host}:/ $MOUNT_POINT"
207 if mount -t nfs -o vers=4,tcp {target_host}:/ "$MOUNT_POINT" 2>&1; then
208 mount_result=0
209 echo "✓ NFS v4 mount successful!"
210 ls -la "$MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents"
211 umount "$MOUNT_POINT" 2>/dev/null && echo "✓ Unmount successful" || echo "✗ Unmount failed"
212 else
213 echo "✗ NFS v4 mount also failed"
214 fi
215 fi
216
217 # CVMFS Mount Test
218 echo ""
219 echo "=== CVMFS Access Test ==="
220 echo "Checking if CVMFS is bind-mounted from host VM..."
221 if [ -d "/cvmfs" ]; then
222 echo "✓ /cvmfs directory exists (bind-mounted from host)"
223 ls -la /cvmfs 2>/dev/null || echo "Could not list /cvmfs contents"
224
225 echo ""
226 echo "Checking for Galaxy CVMFS repository..."
227 cvmfs_result=1
228 if [ -d "/cvmfs/data.galaxyproject.org" ]; then
229 cvmfs_result=0
230 echo "✓ Galaxy CVMFS repository accessible!"
231
232 echo ""
233 echo "=== CVMFS Repository Contents ==="
234 echo "Long listing of CVMFS repository root:"
235 ls -la "/cvmfs/data.galaxyproject.org" 2>/dev/null | head -10 || echo "Could not list directory contents"
236
237 echo ""
238 echo "Checking for Galaxy reference data directories:"
239 for dir in "byhand" "location" "tool-data" "genomes"; do
240 if [ -d "/cvmfs/data.galaxyproject.org/$dir" ]; then
241 echo "✓ Found CVMFS directory: $dir"
242 ls "/cvmfs/data.galaxyproject.org/$dir" | head -5 2>/dev/null || echo "Could not list contents"
243 else
244 echo "✗ Not found: $dir"
245 fi
246 done
247
248 echo ""
249 echo "CVMFS mount information from host:"
250 mount | grep cvmfs || echo "CVMFS mount info not visible from container"
251 else
252 echo "✗ Galaxy CVMFS repository not found at /cvmfs/data.galaxyproject.org"
253 echo "This may indicate:"
254 echo "- CVMFS client not running on host VM"
255 echo "- Repository not mounted on host"
256 echo "- Bind mount not properly configured"
257 fi
258 else
259 echo "✗ /cvmfs directory not found"
260 echo "This indicates the bind mount from host VM failed"
261 echo "Expected: /cvmfs from host VM bind-mounted into container"
262 fi
263
264 echo ""
142 echo "=== Final Result ===" 265 echo "=== Final Result ==="
143 if [ $nc_result -eq 0 ]; then 266 if [ $nc_result -eq 0 ] && [ $mount_result -eq 0 ]; then
144 echo "✓ SUCCESS: Connection to NFS server {target_host}:{target_port} successful" 267 echo "✓ SUCCESS: Both network connectivity and NFS mount to {target_host}:{target_port} successful"
268 if [ $cvmfs_result -eq 0 ]; then
269 echo "✓ BONUS: CVMFS repository mount also successful"
270 else
271 echo "ℹ INFO: CVMFS mount failed (may not be available in this image)"
272 fi
145 exit 0 273 exit 0
274 elif [ $nc_result -eq 0 ]; then
275 echo "⚠ PARTIAL SUCCESS: Network connectivity successful but NFS mount failed"
276 echo "Network connection to {target_host}:{target_port} works, but NFS mounting failed."
277 echo "This suggests:"
278 echo "- NFS server is reachable but may not be properly configured"
279 echo "- NFS export permissions may be incorrect"
280 echo "- NFS version mismatch (tried NFSv3 and NFSv4)"
281 echo "- Firewall may allow port 2049 but block other NFS ports (111, 20048)"
282 if [ $cvmfs_result -eq 0 ]; then
283 echo "✓ CVMFS repository mount was successful"
284 fi
285 exit 1
146 else 286 else
147 echo "✗ FAILED: Connection to NFS server {target_host}:{target_port} failed" 287 echo "✗ FAILED: Network connectivity to NFS server {target_host}:{target_port} failed"
148 echo "This suggests a network connectivity issue between GCP Batch and the NFS server." 288 echo "This suggests a network connectivity issue between GCP Batch and the NFS server."
149 echo "Common causes:" 289 echo "Common causes:"
150 echo "- Firewall rules blocking NFS traffic (port 2049)" 290 echo "- Firewall rules blocking NFS traffic (port 2049)"
151 echo "- NFS service not accessible from external networks (only ClusterIP)" 291 echo "- NFS service not accessible from external networks (only ClusterIP)"
152 echo "- NFS server not properly exposed via LoadBalancer" 292 echo "- NFS server not properly exposed via LoadBalancer"
153 echo "" 293 echo ""
154 echo "Solutions:" 294 echo "Solutions:"
155 echo "- Ensure NFS service has type LoadBalancer with external IP" 295 echo "- Ensure NFS service has type LoadBalancer with external IP"
156 echo "- Check GCP firewall rules allow traffic from Batch subnet to NFS" 296 echo "- Check GCP firewall rules allow traffic from Batch subnet to NFS"
157 echo "- Verify the IP address is the LoadBalancer external IP, not ClusterIP" 297 echo "- Verify the IP address is the LoadBalancer external IP, not ClusterIP"
298 if [ $cvmfs_result -eq 0 ]; then
299 echo ""
300 echo "✓ CVMFS repository mount was successful (good network connectivity to external services)"
301 fi
158 exit 1 302 exit 1
159 fi 303 fi
160 ''' 304 '''
161 305
306 # Define the job using the Python client library objects
307 logger.info("Building job specification...")
308 runnable = batch_v1.Runnable()
309 runnable.container = batch_v1.Runnable.Container()
310 runnable.container.image_uri = "afgane/gcp-batch-netcat:0.2.0"
311
312 # Bind mount /cvmfs from the host VM (which has CVMFS client) into the container
313 cvmfs_volume = batch_v1.Volume()
314 cvmfs_volume.host_path = "/cvmfs"
315 cvmfs_volume.mount_path = "/cvmfs"
316 runnable.container.volumes = [cvmfs_volume]
317
162 runnable.container.entrypoint = "/bin/bash" 318 runnable.container.entrypoint = "/bin/bash"
163 runnable.container.commands = ["-c", test_script] 319 runnable.container.commands = ["-c", test_script]
164 logger.debug(f"Container config: image={runnable.container.image_uri}, entrypoint={runnable.container.entrypoint}") 320 logger.debug(f"Container config: image={runnable.container.image_uri}, with /cvmfs bind mount from custom VM")
165 321
166 task = batch_v1.TaskSpec() 322 task = batch_v1.TaskSpec()
167 task.runnables = [runnable] 323 task.runnables = [runnable]
168 task.compute_resource = batch_v1.ComputeResource() 324 task.compute_resource = batch_v1.ComputeResource()
169 task.compute_resource.cpu_milli = 1000 325 task.compute_resource.cpu_milli = 1000
184 logger.debug(f"Subnet: {network_interface.subnetwork}") 340 logger.debug(f"Subnet: {network_interface.subnetwork}")
185 341
186 network_policy = batch_v1.AllocationPolicy.NetworkPolicy() 342 network_policy = batch_v1.AllocationPolicy.NetworkPolicy()
187 network_policy.network_interfaces = [network_interface] 343 network_policy.network_interfaces = [network_interface]
188 344
345 # Instance policy with custom VM image
346 instance_policy = batch_v1.AllocationPolicy.InstancePolicy()
347 instance_policy.machine_type = "e2-medium" # Specify machine type for custom image
348 instance_policy.boot_disk = batch_v1.AllocationPolicy.Disk()
349 instance_policy.boot_disk.image = f"projects/{project_id}/global/images/galaxy-k8s-boot-v2025-08-10"
350 instance_policy.boot_disk.size_gb = 99
351 logger.debug(f"Using custom VM image: {instance_policy.boot_disk.image}")
352
189 allocation_policy = batch_v1.AllocationPolicy() 353 allocation_policy = batch_v1.AllocationPolicy()
190 allocation_policy.network = network_policy 354 allocation_policy.network = network_policy
355 allocation_policy.instances = [instance_policy]
191 356
192 job = batch_v1.Job() 357 job = batch_v1.Job()
193 job.task_groups = [task_group] 358 job.task_groups = [task_group]
194 job.allocation_policy = allocation_policy 359 job.allocation_policy = allocation_policy
195 job.logs_policy = batch_v1.LogsPolicy() 360 job.logs_policy = batch_v1.LogsPolicy()