Mercurial > repos > enis > gcp_batch_netcat
comparison gcp_batch_netcat.py @ 11:fe0bf22037a5 draft
planemo upload for repository https://github.com/afgane/gcp_batch_netcat commit f730cbb207e028a5d4fd982fe65ece7345af4879
author | enis |
---|---|
date | Thu, 14 Aug 2025 16:39:36 +0000 |
parents | cdfa2e1a7ef4 |
children |
comparison
equal
deleted
inserted
replaced
10:cdfa2e1a7ef4 | 11:fe0bf22037a5 |
---|---|
86 set -e | 86 set -e |
87 echo "=== GCP Batch NFS Connectivity Test ===" | 87 echo "=== GCP Batch NFS Connectivity Test ===" |
88 echo "Target: {target_host}:{target_port}" | 88 echo "Target: {target_host}:{target_port}" |
89 echo "Timestamp: $(date)" | 89 echo "Timestamp: $(date)" |
90 echo "Container hostname: $(hostname)" | 90 echo "Container hostname: $(hostname)" |
91 echo "Host VM Image: galaxy-k8s-boot-v2025-08-10" | 91 echo "Host VM Image: galaxy-k8s-boot-v2025-08-12" |
92 echo "Container Image: afgane/gcp-batch-netcat:0.3.0" | 92 echo "Container Image: afgane/gcp-batch-netcat:0.3.0" |
93 echo "" | 93 echo "" |
94 | 94 |
95 # Basic system info | 95 # Basic system info |
96 echo "=== System Information ===" | 96 echo "=== System Information ===" |
151 echo "=== Network Troubleshooting ===" | 151 echo "=== Network Troubleshooting ===" |
152 echo "Route table:" | 152 echo "Route table:" |
153 ip route | 153 ip route |
154 echo "" | 154 echo "" |
155 | 155 |
156 # NFS Mount Test | 156 # NFS Mount Test - Check if Batch mounted it for us |
157 echo "=== NFS Mount Test ===" | 157 echo "=== NFS Mount Test (via Batch Volume) ===" |
158 MOUNT_POINT="/tmp/nfs_test_mount" | 158 NFS_MOUNT_POINT="/mnt/nfs" |
159 echo "Creating mount point: $MOUNT_POINT" | |
160 mkdir -p "$MOUNT_POINT" | |
161 | |
162 echo "Attempting to mount NFS share..." | |
163 echo "Command: mount -t nfs -o vers=3,tcp {target_host}:/ $MOUNT_POINT" | |
164 | |
165 # Try mounting the NFS share | |
166 mount_result=1 | 159 mount_result=1 |
167 if mount -t nfs -o vers=3,tcp {target_host}:/ "$MOUNT_POINT" 2>&1; then | 160 |
168 mount_result=0 | 161 echo "Checking if NFS is mounted by Batch at $NFS_MOUNT_POINT..." |
169 echo "✓ NFS mount successful!" | 162 if [ -d "$NFS_MOUNT_POINT" ]; then |
170 | 163 echo "✓ NFS mount point exists" |
171 echo "" | 164 |
172 echo "=== NFS Share Contents ===" | 165 # Check if it's actually mounted |
173 echo "Long listing of NFS share root:" | 166 if mount | grep "$NFS_MOUNT_POINT"; then |
174 ls -la "$MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents" | 167 mount_result=0 |
175 | 168 echo "✓ NFS mounted by Batch successfully!" |
176 echo "" | 169 |
177 echo "Disk usage of NFS share:" | 170 echo "" |
178 df -h "$MOUNT_POINT" 2>/dev/null || echo "Could not get disk usage" | 171 echo "=== NFS Share Contents ===" |
179 | 172 echo "Long listing of NFS share:" |
180 echo "" | 173 ls -la "$NFS_MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents" |
181 echo "Mount information:" | 174 |
182 mount | grep "$MOUNT_POINT" || echo "Mount info not found" | 175 echo "" |
183 | 176 echo "Disk usage of NFS share:" |
184 # Try to find common Galaxy directories | 177 df -h "$NFS_MOUNT_POINT" 2>/dev/null || echo "Could not get disk usage" |
185 echo "" | 178 |
186 echo "=== Looking for Galaxy directories ===" | 179 # Look for export subdirectories |
187 for dir in "database" "database/files" "database/objects" "tools" "shed_tools"; do | 180 echo "" |
188 if [ -d "$MOUNT_POINT/$dir" ]; then | 181 echo "=== Looking for export directories ===" |
189 echo "✓ Found: $dir" | 182 if [ -d "$NFS_MOUNT_POINT/export" ]; then |
190 ls -la "$MOUNT_POINT/$dir" | head -10 | 183 echo "✓ Found: export directory" |
184 ls -la "$NFS_MOUNT_POINT/export" | head -10 2>/dev/null || echo "Could not list export contents" | |
185 | |
186 # Look for PVC subdirectories | |
187 echo "Looking for PVC directories in export..." | |
188 find "$NFS_MOUNT_POINT/export" -name "pvc-*" -type d | head -5 2>/dev/null || echo "No PVC directories found" | |
191 else | 189 else |
192 echo "✗ Not found: $dir" | 190 echo "✗ No export directory found" |
193 fi | 191 fi |
194 done | 192 |
195 | 193 # Try to find common Galaxy directories |
196 echo "" | 194 echo "" |
197 echo "Unmounting NFS share..." | 195 echo "=== Looking for Galaxy directories ===" |
198 umount "$MOUNT_POINT" 2>/dev/null && echo "✓ Unmount successful" || echo "✗ Unmount failed" | 196 |
197 # First check if they exist directly in the NFS root | |
198 galaxy_dirs_in_root=0 | |
199 for dir in "jobs_directory" "shed_tools" "objects" "tools" "cache" "config"; do | |
200 if [ -d "$NFS_MOUNT_POINT/$dir" ]; then | |
201 echo "✓ Found in root: $dir" | |
202 ls -la "$NFS_MOUNT_POINT/$dir" | head -5 | |
203 galaxy_dirs_in_root=$((galaxy_dirs_in_root + 1)) | |
204 fi | |
205 done | |
206 | |
207 if [ $galaxy_dirs_in_root -eq 0 ]; then | |
208 echo "✗ No Galaxy directories found in NFS root" | |
209 else | |
210 echo "✓ Found $galaxy_dirs_in_root Galaxy directories in NFS root" | |
211 fi | |
212 | |
213 # Then check inside any PVC directories under export | |
214 if [ -d "$NFS_MOUNT_POINT/export" ]; then | |
215 echo "" | |
216 echo "=== Checking PVC directories for Galaxy structure ===" | |
217 | |
218 # Find all PVC directories | |
219 pvc_count=0 | |
220 for pvc_dir in $(find "$NFS_MOUNT_POINT/export" -name "pvc-*" -type d 2>/dev/null); do | |
221 pvc_count=$((pvc_count + 1)) | |
222 echo "" | |
223 echo "Checking PVC ($pvc_count): $(basename $pvc_dir)" | |
224 echo " Full path: $pvc_dir" | |
225 | |
226 # Show directory listing of PVC | |
227 echo " Contents:" | |
228 ls -la "$pvc_dir" | head -10 | sed 's/^/ /' | |
229 | |
230 # Check for Galaxy directories inside this PVC | |
231 galaxy_dirs_found=0 | |
232 for dir in "jobs_directory" "shed_tools" "objects" "tools" "cache" "config" "deps" "tmp"; do | |
233 if [ -d "$pvc_dir/$dir" ]; then | |
234 echo " ✓ Found Galaxy directory: $dir" | |
235 # Show a sample of contents | |
236 ls -la "$pvc_dir/$dir" 2>/dev/null | head -3 | sed 's/^/ /' | |
237 galaxy_dirs_found=$((galaxy_dirs_found + 1)) | |
238 fi | |
239 done | |
240 | |
241 # Check for Galaxy-specific files | |
242 galaxy_files_found=0 | |
243 for file in "galaxy.yml" "universe_wsgi.ini" "config/galaxy.yml" "results.sqlite" "celery-beat-schedule"; do | |
244 if [ -f "$pvc_dir/$file" ]; then | |
245 echo " ✓ Found Galaxy file: $file" | |
246 galaxy_files_found=$((galaxy_files_found + 1)) | |
247 fi | |
248 done | |
249 | |
250 total_indicators=$((galaxy_dirs_found + galaxy_files_found)) | |
251 if [ $total_indicators -gt 0 ]; then | |
252 echo " 🎯 This PVC contains $galaxy_dirs_found Galaxy directories and $galaxy_files_found Galaxy files" | |
253 | |
254 # Test write access | |
255 test_file="$pvc_dir/.batch_test_file_$(date +%s)" | |
256 if echo "test" > "$test_file" 2>/dev/null; then | |
257 echo " ✓ Write access confirmed" | |
258 rm -f "$test_file" 2>/dev/null | |
259 else | |
260 echo " ✗ No write access" | |
261 fi | |
262 | |
263 # Test specific Galaxy directories access | |
264 if [ -d "$pvc_dir/jobs_directory" ]; then | |
265 echo " � Jobs directory details:" | |
266 du -sh "$pvc_dir/jobs_directory" 2>/dev/null | sed 's/^/ /' || echo " Could not get size" | |
267 job_count=$(find "$pvc_dir/jobs_directory" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | wc -l) | |
268 echo " Job subdirectories: $job_count" | |
269 fi | |
270 | |
271 if [ -d "$pvc_dir/shed_tools" ]; then | |
272 echo " 🔧 Shed tools directory details:" | |
273 du -sh "$pvc_dir/shed_tools" 2>/dev/null | sed 's/^/ /' || echo " Could not get size" | |
274 tool_count=$(find "$pvc_dir/shed_tools" -name "*.py" -o -name "*.xml" 2>/dev/null | wc -l) | |
275 echo " Tool files (py/xml): $tool_count" | |
276 fi | |
277 else | |
278 echo " ✗ No Galaxy directories or files found in this PVC" | |
279 fi | |
280 done | |
281 | |
282 if [ $pvc_count -eq 0 ]; then | |
283 echo "✗ No PVC directories found in export" | |
284 else | |
285 echo "" | |
286 echo "📊 Summary: Found $pvc_count PVC directories in export" | |
287 fi | |
288 else | |
289 echo "" | |
290 echo "✗ No export directory found in NFS mount" | |
291 fi | |
292 else | |
293 echo "✗ NFS mount point exists but is not mounted" | |
294 echo "This suggests Batch volume configuration may be incorrect" | |
295 fi | |
199 else | 296 else |
200 echo "✗ NFS mount failed" | 297 echo "✗ NFS mount point $NFS_MOUNT_POINT does not exist" |
201 echo "Mount error details above" | 298 echo "This suggests Batch volume was not configured" |
202 | |
203 # Try alternative mount options | |
204 echo "" | |
205 echo "Trying alternative NFS mount options..." | |
206 echo "Command: mount -t nfs -o vers=4,tcp {target_host}:/ $MOUNT_POINT" | |
207 if mount -t nfs -o vers=4,tcp {target_host}:/ "$MOUNT_POINT" 2>&1; then | |
208 mount_result=0 | |
209 echo "✓ NFS v4 mount successful!" | |
210 ls -la "$MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents" | |
211 umount "$MOUNT_POINT" 2>/dev/null && echo "✓ Unmount successful" || echo "✗ Unmount failed" | |
212 else | |
213 echo "✗ NFS v4 mount also failed" | |
214 fi | |
215 fi | 299 fi |
216 | 300 |
217 # CVMFS Mount Test | 301 # CVMFS Mount Test |
218 echo "" | 302 echo "" |
219 echo "=== CVMFS Access Test ===" | 303 echo "=== CVMFS Access Test ===" |
233 echo "=== CVMFS Repository Contents ===" | 317 echo "=== CVMFS Repository Contents ===" |
234 echo "Long listing of CVMFS repository root:" | 318 echo "Long listing of CVMFS repository root:" |
235 ls -la "/cvmfs/data.galaxyproject.org" 2>/dev/null | head -10 || echo "Could not list directory contents" | 319 ls -la "/cvmfs/data.galaxyproject.org" 2>/dev/null | head -10 || echo "Could not list directory contents" |
236 | 320 |
237 echo "" | 321 echo "" |
238 echo "Checking for Galaxy reference data directories:" | 322 echo "Listing Galaxy reference data directories:" |
239 for dir in "byhand" "location" "tool-data" "genomes"; do | 323 for dir in "byhand" "managed"; do |
240 if [ -d "/cvmfs/data.galaxyproject.org/$dir" ]; then | 324 if [ -d "/cvmfs/data.galaxyproject.org/$dir" ]; then |
241 echo "✓ Found CVMFS directory: $dir" | 325 echo "✓ Found CVMFS directory: $dir" |
242 ls "/cvmfs/data.galaxyproject.org/$dir" | head -5 2>/dev/null || echo "Could not list contents" | 326 ls "/cvmfs/data.galaxyproject.org/$dir" | head -5 2>/dev/null || echo "Could not list contents" |
243 else | 327 else |
244 echo "✗ Not found: $dir" | 328 echo "✗ Not found: $dir" |
245 fi | 329 fi |
246 done | 330 done |
331 | |
332 echo "" | |
333 echo "=== CVMFS File Access Test ===" | |
334 echo "Testing access to specific Galaxy reference file..." | |
335 echo "File: /cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq/Arabidopsis_thaliana_TAIR10.fa.fai" | |
336 | |
337 CVMFS_TEST_FILE="/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq/Arabidopsis_thaliana_TAIR10.fa.fai" | |
338 if [ -f "$CVMFS_TEST_FILE" ]; then | |
339 echo "✓ File exists, reading first 10 lines:" | |
340 head "$CVMFS_TEST_FILE" 2>/dev/null || echo "Could not read file contents" | |
341 else | |
342 echo "✗ File not found" | |
343 echo "Checking if parent directories exist:" | |
344 [ -d "/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10" ] && echo " ✓ Arabidopsis_thaliana_TAIR10 directory exists" || echo " ✗ Arabidopsis_thaliana_TAIR10 directory missing" | |
345 [ -d "/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq" ] && echo " ✓ seq directory exists" || echo " ✗ seq directory missing" | |
346 fi | |
247 | 347 |
248 echo "" | 348 echo "" |
249 echo "CVMFS mount information from host:" | 349 echo "CVMFS mount information from host:" |
250 mount | grep cvmfs || echo "CVMFS mount info not visible from container" | 350 mount | grep cvmfs || echo "CVMFS mount info not visible from container" |
251 else | 351 else |
258 else | 358 else |
259 echo "✗ /cvmfs directory not found" | 359 echo "✗ /cvmfs directory not found" |
260 echo "This indicates the bind mount from host VM failed" | 360 echo "This indicates the bind mount from host VM failed" |
261 echo "Expected: /cvmfs from host VM bind-mounted into container" | 361 echo "Expected: /cvmfs from host VM bind-mounted into container" |
262 fi | 362 fi |
363 | |
263 | 364 |
264 echo "" | 365 echo "" |
265 echo "=== Final Result ===" | 366 echo "=== Final Result ===" |
266 if [ $nc_result -eq 0 ] && [ $mount_result -eq 0 ]; then | 367 if [ $nc_result -eq 0 ] && [ $mount_result -eq 0 ]; then |
267 echo "✓ SUCCESS: Both network connectivity and NFS mount to {target_host}:{target_port} successful" | 368 echo "✓ SUCCESS: Both network connectivity and NFS mount to {target_host}:{target_port} successful" |
275 echo "⚠ PARTIAL SUCCESS: Network connectivity successful but NFS mount failed" | 376 echo "⚠ PARTIAL SUCCESS: Network connectivity successful but NFS mount failed" |
276 echo "Network connection to {target_host}:{target_port} works, but NFS mounting failed." | 377 echo "Network connection to {target_host}:{target_port} works, but NFS mounting failed." |
277 echo "This suggests:" | 378 echo "This suggests:" |
278 echo "- NFS server is reachable but may not be properly configured" | 379 echo "- NFS server is reachable but may not be properly configured" |
279 echo "- NFS export permissions may be incorrect" | 380 echo "- NFS export permissions may be incorrect" |
280 echo "- NFS version mismatch (tried NFSv3 and NFSv4)" | |
281 echo "- Firewall may allow port 2049 but block other NFS ports (111, 20048)" | 381 echo "- Firewall may allow port 2049 but block other NFS ports (111, 20048)" |
282 if [ $cvmfs_result -eq 0 ]; then | 382 if [ $cvmfs_result -eq 0 ]; then |
283 echo "✓ CVMFS repository mount was successful" | 383 echo "✓ CVMFS repository mount was successful" |
284 fi | 384 fi |
285 exit 1 | 385 exit 1 |
303 fi | 403 fi |
304 ''' | 404 ''' |
305 | 405 |
306 # Define the job using the Python client library objects | 406 # Define the job using the Python client library objects |
307 logger.info("Building job specification...") | 407 logger.info("Building job specification...") |
408 | |
409 # Escape the test script for use in docker command (outside f-string to avoid backslash issues) | |
410 escaped_test_script = test_script.replace("'", "'\"'\"'") | |
411 | |
412 # Create a host script that triggers CVMFS mount and then runs the container | |
413 host_script = f'''#!/bin/bash | |
414 set -e | |
415 echo "=== Pre-Container Host Script ===" | |
416 echo "Timestamp: $(date)" | |
417 echo "Host VM Image: galaxy-k8s-boot-v2025-08-12" | |
418 echo "Running on host before container starts..." | |
419 echo "" | |
420 | |
421 echo "=== Triggering CVMFS Mount ===" | |
422 echo "Checking CVMFS autofs status:" | |
423 mount | grep cvmfs || echo "No CVMFS mounts yet" | |
424 | |
425 echo "" | |
426 echo "Triggering CVMFS mount by accessing repository:" | |
427 ls /cvmfs/data.galaxyproject.org/ || echo "Could not access CVMFS repository" | |
428 | |
429 echo "" | |
430 echo "After access - checking CVMFS mounts:" | |
431 mount | grep cvmfs || echo "Still no CVMFS mounts visible" | |
432 | |
433 echo "" | |
434 echo "Testing specific file access from host:" | |
435 if [ -f "/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq/Arabidopsis_thaliana_TAIR10.fa.fai" ]; then | |
436 echo "✓ CVMFS file accessible from host" | |
437 head -3 "/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq/Arabidopsis_thaliana_TAIR10.fa.fai" | |
438 else | |
439 echo "✗ CVMFS file not accessible from host" | |
440 fi | |
441 | |
442 echo "" | |
443 echo "=== Starting Container ===" | |
444 echo "Running container with bind-mounted CVMFS and NFS..." | |
445 | |
446 # Run the container with the test script and volume mounts | |
447 docker run --rm \\ | |
448 -v /cvmfs:/cvmfs:ro \\ | |
449 -v /mnt/nfs:/mnt/nfs:rw \\ | |
450 afgane/gcp-batch-netcat:0.3.0 \\ | |
451 /bin/bash -c '{escaped_test_script}' | |
452 ''' | |
453 | |
308 runnable = batch_v1.Runnable() | 454 runnable = batch_v1.Runnable() |
309 runnable.container = batch_v1.Runnable.Container() | 455 runnable.script = batch_v1.Runnable.Script() |
310 runnable.container.image_uri = "afgane/gcp-batch-netcat:0.3.0" | 456 runnable.script.text = host_script |
311 | 457 logger.debug(f"Host script configured to trigger CVMFS mount and run container") |
312 # Bind mount /cvmfs from the host VM (which has CVMFS client) into the container | |
313 # Use the docker-style volume syntax for bind mounting host paths | |
314 runnable.container.volumes = ["/cvmfs:/cvmfs:ro"] | |
315 | |
316 runnable.container.entrypoint = "/bin/bash" | |
317 runnable.container.commands = ["-c", test_script] | |
318 logger.debug(f"Container config: image={runnable.container.image_uri}, with /cvmfs bind mount from custom VM") | |
319 | 458 |
320 task = batch_v1.TaskSpec() | 459 task = batch_v1.TaskSpec() |
321 task.runnables = [runnable] | 460 task.runnables = [runnable] |
322 task.compute_resource = batch_v1.ComputeResource() | 461 task.compute_resource = batch_v1.ComputeResource() |
323 task.compute_resource.cpu_milli = 1000 | 462 task.compute_resource.cpu_milli = 1000 |
324 task.compute_resource.memory_mib = 1024 | 463 task.compute_resource.memory_mib = 1024 |
325 logger.debug(f"Compute resources: CPU={task.compute_resource.cpu_milli}m, Memory={task.compute_resource.memory_mib}MiB") | 464 logger.debug(f"Compute resources: CPU={task.compute_resource.cpu_milli}m, Memory={task.compute_resource.memory_mib}MiB") |
326 | 465 |
466 # Configure NFS volume in the task | |
467 volume = batch_v1.Volume() | |
468 volume.nfs = batch_v1.NFS() | |
469 volume.nfs.server = target_host | |
470 volume.nfs.remote_path = "/" # Root of the NFS export | |
471 volume.mount_path = "/mnt/nfs" | |
472 | |
473 task.volumes = [volume] | |
474 logger.debug(f"NFS volume configured: {target_host}:/ -> /mnt/nfs") | |
475 | |
327 task_group = batch_v1.TaskGroup() | 476 task_group = batch_v1.TaskGroup() |
328 task_group.task_count = 1 | 477 task_group.task_count = 1 |
329 task_group.parallelism = 1 | 478 task_group.parallelism = 1 |
330 task_group.task_spec = task | 479 task_group.task_spec = task |
331 logger.debug(f"Task group: count={task_group.task_count}, parallelism={task_group.parallelism}") | 480 logger.debug(f"Task group: count={task_group.task_count}, parallelism={task_group.parallelism}") |
342 | 491 |
343 # Instance policy with custom VM image | 492 # Instance policy with custom VM image |
344 instance_policy = batch_v1.AllocationPolicy.InstancePolicy() | 493 instance_policy = batch_v1.AllocationPolicy.InstancePolicy() |
345 instance_policy.machine_type = "e2-medium" # Specify machine type for custom image | 494 instance_policy.machine_type = "e2-medium" # Specify machine type for custom image |
346 instance_policy.boot_disk = batch_v1.AllocationPolicy.Disk() | 495 instance_policy.boot_disk = batch_v1.AllocationPolicy.Disk() |
347 instance_policy.boot_disk.image = f"projects/{project_id}/global/images/galaxy-k8s-boot-v2025-08-10" | 496 instance_policy.boot_disk.image = f"projects/{project_id}/global/images/galaxy-k8s-boot-v2025-08-12" |
348 instance_policy.boot_disk.size_gb = 99 | 497 instance_policy.boot_disk.size_gb = 99 |
349 logger.debug(f"Using custom VM image: {instance_policy.boot_disk.image}") | 498 logger.debug(f"Using custom VM image: {instance_policy.boot_disk.image}") |
350 | 499 |
351 # Wrap the instance policy in InstancePolicyOrTemplate | 500 # Wrap the instance policy in InstancePolicyOrTemplate |
352 instance_policy_or_template = batch_v1.AllocationPolicy.InstancePolicyOrTemplate() | 501 instance_policy_or_template = batch_v1.AllocationPolicy.InstancePolicyOrTemplate() |