comparison gcp_batch_netcat.py @ 11:fe0bf22037a5 draft

planemo upload for repository https://github.com/afgane/gcp_batch_netcat commit f730cbb207e028a5d4fd982fe65ece7345af4879
author enis
date Thu, 14 Aug 2025 16:39:36 +0000
parents cdfa2e1a7ef4
children
comparison
equal deleted inserted replaced
10:cdfa2e1a7ef4 11:fe0bf22037a5
86 set -e 86 set -e
87 echo "=== GCP Batch NFS Connectivity Test ===" 87 echo "=== GCP Batch NFS Connectivity Test ==="
88 echo "Target: {target_host}:{target_port}" 88 echo "Target: {target_host}:{target_port}"
89 echo "Timestamp: $(date)" 89 echo "Timestamp: $(date)"
90 echo "Container hostname: $(hostname)" 90 echo "Container hostname: $(hostname)"
91 echo "Host VM Image: galaxy-k8s-boot-v2025-08-10" 91 echo "Host VM Image: galaxy-k8s-boot-v2025-08-12"
92 echo "Container Image: afgane/gcp-batch-netcat:0.3.0" 92 echo "Container Image: afgane/gcp-batch-netcat:0.3.0"
93 echo "" 93 echo ""
94 94
95 # Basic system info 95 # Basic system info
96 echo "=== System Information ===" 96 echo "=== System Information ==="
151 echo "=== Network Troubleshooting ===" 151 echo "=== Network Troubleshooting ==="
152 echo "Route table:" 152 echo "Route table:"
153 ip route 153 ip route
154 echo "" 154 echo ""
155 155
156 # NFS Mount Test 156 # NFS Mount Test - Check if Batch mounted it for us
157 echo "=== NFS Mount Test ===" 157 echo "=== NFS Mount Test (via Batch Volume) ==="
158 MOUNT_POINT="/tmp/nfs_test_mount" 158 NFS_MOUNT_POINT="/mnt/nfs"
159 echo "Creating mount point: $MOUNT_POINT"
160 mkdir -p "$MOUNT_POINT"
161
162 echo "Attempting to mount NFS share..."
163 echo "Command: mount -t nfs -o vers=3,tcp {target_host}:/ $MOUNT_POINT"
164
165 # Try mounting the NFS share
166 mount_result=1 159 mount_result=1
167 if mount -t nfs -o vers=3,tcp {target_host}:/ "$MOUNT_POINT" 2>&1; then 160
168 mount_result=0 161 echo "Checking if NFS is mounted by Batch at $NFS_MOUNT_POINT..."
169 echo "✓ NFS mount successful!" 162 if [ -d "$NFS_MOUNT_POINT" ]; then
170 163 echo "✓ NFS mount point exists"
171 echo "" 164
172 echo "=== NFS Share Contents ===" 165 # Check if it's actually mounted
173 echo "Long listing of NFS share root:" 166 if mount | grep "$NFS_MOUNT_POINT"; then
174 ls -la "$MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents" 167 mount_result=0
175 168 echo "✓ NFS mounted by Batch successfully!"
176 echo "" 169
177 echo "Disk usage of NFS share:" 170 echo ""
178 df -h "$MOUNT_POINT" 2>/dev/null || echo "Could not get disk usage" 171 echo "=== NFS Share Contents ==="
179 172 echo "Long listing of NFS share:"
180 echo "" 173 ls -la "$NFS_MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents"
181 echo "Mount information:" 174
182 mount | grep "$MOUNT_POINT" || echo "Mount info not found" 175 echo ""
183 176 echo "Disk usage of NFS share:"
184 # Try to find common Galaxy directories 177 df -h "$NFS_MOUNT_POINT" 2>/dev/null || echo "Could not get disk usage"
185 echo "" 178
186 echo "=== Looking for Galaxy directories ===" 179 # Look for export subdirectories
187 for dir in "database" "database/files" "database/objects" "tools" "shed_tools"; do 180 echo ""
188 if [ -d "$MOUNT_POINT/$dir" ]; then 181 echo "=== Looking for export directories ==="
189 echo "✓ Found: $dir" 182 if [ -d "$NFS_MOUNT_POINT/export" ]; then
190 ls -la "$MOUNT_POINT/$dir" | head -10 183 echo "✓ Found: export directory"
184 ls -la "$NFS_MOUNT_POINT/export" | head -10 2>/dev/null || echo "Could not list export contents"
185
186 # Look for PVC subdirectories
187 echo "Looking for PVC directories in export..."
188 find "$NFS_MOUNT_POINT/export" -name "pvc-*" -type d | head -5 2>/dev/null || echo "No PVC directories found"
191 else 189 else
192 echo "✗ Not found: $dir" 190 echo "✗ No export directory found"
193 fi 191 fi
194 done 192
195 193 # Try to find common Galaxy directories
196 echo "" 194 echo ""
197 echo "Unmounting NFS share..." 195 echo "=== Looking for Galaxy directories ==="
198 umount "$MOUNT_POINT" 2>/dev/null && echo "✓ Unmount successful" || echo "✗ Unmount failed" 196
197 # First check if they exist directly in the NFS root
198 galaxy_dirs_in_root=0
199 for dir in "jobs_directory" "shed_tools" "objects" "tools" "cache" "config"; do
200 if [ -d "$NFS_MOUNT_POINT/$dir" ]; then
201 echo "✓ Found in root: $dir"
202 ls -la "$NFS_MOUNT_POINT/$dir" | head -5
203 galaxy_dirs_in_root=$((galaxy_dirs_in_root + 1))
204 fi
205 done
206
207 if [ $galaxy_dirs_in_root -eq 0 ]; then
208 echo "✗ No Galaxy directories found in NFS root"
209 else
210 echo "✓ Found $galaxy_dirs_in_root Galaxy directories in NFS root"
211 fi
212
213 # Then check inside any PVC directories under export
214 if [ -d "$NFS_MOUNT_POINT/export" ]; then
215 echo ""
216 echo "=== Checking PVC directories for Galaxy structure ==="
217
218 # Find all PVC directories
219 pvc_count=0
220 for pvc_dir in $(find "$NFS_MOUNT_POINT/export" -name "pvc-*" -type d 2>/dev/null); do
221 pvc_count=$((pvc_count + 1))
222 echo ""
223 echo "Checking PVC ($pvc_count): $(basename $pvc_dir)"
224 echo " Full path: $pvc_dir"
225
226 # Show directory listing of PVC
227 echo " Contents:"
228 ls -la "$pvc_dir" | head -10 | sed 's/^/ /'
229
230 # Check for Galaxy directories inside this PVC
231 galaxy_dirs_found=0
232 for dir in "jobs_directory" "shed_tools" "objects" "tools" "cache" "config" "deps" "tmp"; do
233 if [ -d "$pvc_dir/$dir" ]; then
234 echo " ✓ Found Galaxy directory: $dir"
235 # Show a sample of contents
236 ls -la "$pvc_dir/$dir" 2>/dev/null | head -3 | sed 's/^/ /'
237 galaxy_dirs_found=$((galaxy_dirs_found + 1))
238 fi
239 done
240
241 # Check for Galaxy-specific files
242 galaxy_files_found=0
243 for file in "galaxy.yml" "universe_wsgi.ini" "config/galaxy.yml" "results.sqlite" "celery-beat-schedule"; do
244 if [ -f "$pvc_dir/$file" ]; then
245 echo " ✓ Found Galaxy file: $file"
246 galaxy_files_found=$((galaxy_files_found + 1))
247 fi
248 done
249
250 total_indicators=$((galaxy_dirs_found + galaxy_files_found))
251 if [ $total_indicators -gt 0 ]; then
252 echo " 🎯 This PVC contains $galaxy_dirs_found Galaxy directories and $galaxy_files_found Galaxy files"
253
254 # Test write access
255 test_file="$pvc_dir/.batch_test_file_$(date +%s)"
256 if echo "test" > "$test_file" 2>/dev/null; then
257 echo " ✓ Write access confirmed"
258 rm -f "$test_file" 2>/dev/null
259 else
260 echo " ✗ No write access"
261 fi
262
263 # Test specific Galaxy directories access
264 if [ -d "$pvc_dir/jobs_directory" ]; then
265 echo " � Jobs directory details:"
266 du -sh "$pvc_dir/jobs_directory" 2>/dev/null | sed 's/^/ /' || echo " Could not get size"
267 job_count=$(find "$pvc_dir/jobs_directory" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | wc -l)
268 echo " Job subdirectories: $job_count"
269 fi
270
271 if [ -d "$pvc_dir/shed_tools" ]; then
272 echo " 🔧 Shed tools directory details:"
273 du -sh "$pvc_dir/shed_tools" 2>/dev/null | sed 's/^/ /' || echo " Could not get size"
274 tool_count=$(find "$pvc_dir/shed_tools" -name "*.py" -o -name "*.xml" 2>/dev/null | wc -l)
275 echo " Tool files (py/xml): $tool_count"
276 fi
277 else
278 echo " ✗ No Galaxy directories or files found in this PVC"
279 fi
280 done
281
282 if [ $pvc_count -eq 0 ]; then
283 echo "✗ No PVC directories found in export"
284 else
285 echo ""
286 echo "📊 Summary: Found $pvc_count PVC directories in export"
287 fi
288 else
289 echo ""
290 echo "✗ No export directory found in NFS mount"
291 fi
292 else
293 echo "✗ NFS mount point exists but is not mounted"
294 echo "This suggests Batch volume configuration may be incorrect"
295 fi
199 else 296 else
200 echo "✗ NFS mount failed" 297 echo "✗ NFS mount point $NFS_MOUNT_POINT does not exist"
201 echo "Mount error details above" 298 echo "This suggests Batch volume was not configured"
202
203 # Try alternative mount options
204 echo ""
205 echo "Trying alternative NFS mount options..."
206 echo "Command: mount -t nfs -o vers=4,tcp {target_host}:/ $MOUNT_POINT"
207 if mount -t nfs -o vers=4,tcp {target_host}:/ "$MOUNT_POINT" 2>&1; then
208 mount_result=0
209 echo "✓ NFS v4 mount successful!"
210 ls -la "$MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents"
211 umount "$MOUNT_POINT" 2>/dev/null && echo "✓ Unmount successful" || echo "✗ Unmount failed"
212 else
213 echo "✗ NFS v4 mount also failed"
214 fi
215 fi 299 fi
216 300
217 # CVMFS Mount Test 301 # CVMFS Mount Test
218 echo "" 302 echo ""
219 echo "=== CVMFS Access Test ===" 303 echo "=== CVMFS Access Test ==="
233 echo "=== CVMFS Repository Contents ===" 317 echo "=== CVMFS Repository Contents ==="
234 echo "Long listing of CVMFS repository root:" 318 echo "Long listing of CVMFS repository root:"
235 ls -la "/cvmfs/data.galaxyproject.org" 2>/dev/null | head -10 || echo "Could not list directory contents" 319 ls -la "/cvmfs/data.galaxyproject.org" 2>/dev/null | head -10 || echo "Could not list directory contents"
236 320
237 echo "" 321 echo ""
238 echo "Checking for Galaxy reference data directories:" 322 echo "Listing Galaxy reference data directories:"
239 for dir in "byhand" "location" "tool-data" "genomes"; do 323 for dir in "byhand" "managed"; do
240 if [ -d "/cvmfs/data.galaxyproject.org/$dir" ]; then 324 if [ -d "/cvmfs/data.galaxyproject.org/$dir" ]; then
241 echo "✓ Found CVMFS directory: $dir" 325 echo "✓ Found CVMFS directory: $dir"
242 ls "/cvmfs/data.galaxyproject.org/$dir" | head -5 2>/dev/null || echo "Could not list contents" 326 ls "/cvmfs/data.galaxyproject.org/$dir" | head -5 2>/dev/null || echo "Could not list contents"
243 else 327 else
244 echo "✗ Not found: $dir" 328 echo "✗ Not found: $dir"
245 fi 329 fi
246 done 330 done
331
332 echo ""
333 echo "=== CVMFS File Access Test ==="
334 echo "Testing access to specific Galaxy reference file..."
335 echo "File: /cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq/Arabidopsis_thaliana_TAIR10.fa.fai"
336
337 CVMFS_TEST_FILE="/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq/Arabidopsis_thaliana_TAIR10.fa.fai"
338 if [ -f "$CVMFS_TEST_FILE" ]; then
339 echo "✓ File exists, reading first 10 lines:"
340 head "$CVMFS_TEST_FILE" 2>/dev/null || echo "Could not read file contents"
341 else
342 echo "✗ File not found"
343 echo "Checking if parent directories exist:"
344 [ -d "/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10" ] && echo " ✓ Arabidopsis_thaliana_TAIR10 directory exists" || echo " ✗ Arabidopsis_thaliana_TAIR10 directory missing"
345 [ -d "/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq" ] && echo " ✓ seq directory exists" || echo " ✗ seq directory missing"
346 fi
247 347
248 echo "" 348 echo ""
249 echo "CVMFS mount information from host:" 349 echo "CVMFS mount information from host:"
250 mount | grep cvmfs || echo "CVMFS mount info not visible from container" 350 mount | grep cvmfs || echo "CVMFS mount info not visible from container"
251 else 351 else
258 else 358 else
259 echo "✗ /cvmfs directory not found" 359 echo "✗ /cvmfs directory not found"
260 echo "This indicates the bind mount from host VM failed" 360 echo "This indicates the bind mount from host VM failed"
261 echo "Expected: /cvmfs from host VM bind-mounted into container" 361 echo "Expected: /cvmfs from host VM bind-mounted into container"
262 fi 362 fi
363
263 364
264 echo "" 365 echo ""
265 echo "=== Final Result ===" 366 echo "=== Final Result ==="
266 if [ $nc_result -eq 0 ] && [ $mount_result -eq 0 ]; then 367 if [ $nc_result -eq 0 ] && [ $mount_result -eq 0 ]; then
267 echo "✓ SUCCESS: Both network connectivity and NFS mount to {target_host}:{target_port} successful" 368 echo "✓ SUCCESS: Both network connectivity and NFS mount to {target_host}:{target_port} successful"
275 echo "⚠ PARTIAL SUCCESS: Network connectivity successful but NFS mount failed" 376 echo "⚠ PARTIAL SUCCESS: Network connectivity successful but NFS mount failed"
276 echo "Network connection to {target_host}:{target_port} works, but NFS mounting failed." 377 echo "Network connection to {target_host}:{target_port} works, but NFS mounting failed."
277 echo "This suggests:" 378 echo "This suggests:"
278 echo "- NFS server is reachable but may not be properly configured" 379 echo "- NFS server is reachable but may not be properly configured"
279 echo "- NFS export permissions may be incorrect" 380 echo "- NFS export permissions may be incorrect"
280 echo "- NFS version mismatch (tried NFSv3 and NFSv4)"
281 echo "- Firewall may allow port 2049 but block other NFS ports (111, 20048)" 381 echo "- Firewall may allow port 2049 but block other NFS ports (111, 20048)"
282 if [ $cvmfs_result -eq 0 ]; then 382 if [ $cvmfs_result -eq 0 ]; then
283 echo "✓ CVMFS repository mount was successful" 383 echo "✓ CVMFS repository mount was successful"
284 fi 384 fi
285 exit 1 385 exit 1
303 fi 403 fi
304 ''' 404 '''
305 405
306 # Define the job using the Python client library objects 406 # Define the job using the Python client library objects
307 logger.info("Building job specification...") 407 logger.info("Building job specification...")
408
409 # Escape the test script for use in docker command (outside f-string to avoid backslash issues)
410 escaped_test_script = test_script.replace("'", "'\"'\"'")
411
412 # Create a host script that triggers CVMFS mount and then runs the container
413 host_script = f'''#!/bin/bash
414 set -e
415 echo "=== Pre-Container Host Script ==="
416 echo "Timestamp: $(date)"
417 echo "Host VM Image: galaxy-k8s-boot-v2025-08-12"
418 echo "Running on host before container starts..."
419 echo ""
420
421 echo "=== Triggering CVMFS Mount ==="
422 echo "Checking CVMFS autofs status:"
423 mount | grep cvmfs || echo "No CVMFS mounts yet"
424
425 echo ""
426 echo "Triggering CVMFS mount by accessing repository:"
427 ls /cvmfs/data.galaxyproject.org/ || echo "Could not access CVMFS repository"
428
429 echo ""
430 echo "After access - checking CVMFS mounts:"
431 mount | grep cvmfs || echo "Still no CVMFS mounts visible"
432
433 echo ""
434 echo "Testing specific file access from host:"
435 if [ -f "/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq/Arabidopsis_thaliana_TAIR10.fa.fai" ]; then
436 echo "✓ CVMFS file accessible from host"
437 head -3 "/cvmfs/data.galaxyproject.org/byhand/Arabidopsis_thaliana_TAIR10/seq/Arabidopsis_thaliana_TAIR10.fa.fai"
438 else
439 echo "✗ CVMFS file not accessible from host"
440 fi
441
442 echo ""
443 echo "=== Starting Container ==="
444 echo "Running container with bind-mounted CVMFS and NFS..."
445
446 # Run the container with the test script and volume mounts
447 docker run --rm \\
448 -v /cvmfs:/cvmfs:ro \\
449 -v /mnt/nfs:/mnt/nfs:rw \\
450 afgane/gcp-batch-netcat:0.3.0 \\
451 /bin/bash -c '{escaped_test_script}'
452 '''
453
308 runnable = batch_v1.Runnable() 454 runnable = batch_v1.Runnable()
309 runnable.container = batch_v1.Runnable.Container() 455 runnable.script = batch_v1.Runnable.Script()
310 runnable.container.image_uri = "afgane/gcp-batch-netcat:0.3.0" 456 runnable.script.text = host_script
311 457 logger.debug(f"Host script configured to trigger CVMFS mount and run container")
312 # Bind mount /cvmfs from the host VM (which has CVMFS client) into the container
313 # Use the docker-style volume syntax for bind mounting host paths
314 runnable.container.volumes = ["/cvmfs:/cvmfs:ro"]
315
316 runnable.container.entrypoint = "/bin/bash"
317 runnable.container.commands = ["-c", test_script]
318 logger.debug(f"Container config: image={runnable.container.image_uri}, with /cvmfs bind mount from custom VM")
319 458
320 task = batch_v1.TaskSpec() 459 task = batch_v1.TaskSpec()
321 task.runnables = [runnable] 460 task.runnables = [runnable]
322 task.compute_resource = batch_v1.ComputeResource() 461 task.compute_resource = batch_v1.ComputeResource()
323 task.compute_resource.cpu_milli = 1000 462 task.compute_resource.cpu_milli = 1000
324 task.compute_resource.memory_mib = 1024 463 task.compute_resource.memory_mib = 1024
325 logger.debug(f"Compute resources: CPU={task.compute_resource.cpu_milli}m, Memory={task.compute_resource.memory_mib}MiB") 464 logger.debug(f"Compute resources: CPU={task.compute_resource.cpu_milli}m, Memory={task.compute_resource.memory_mib}MiB")
326 465
466 # Configure NFS volume in the task
467 volume = batch_v1.Volume()
468 volume.nfs = batch_v1.NFS()
469 volume.nfs.server = target_host
470 volume.nfs.remote_path = "/" # Root of the NFS export
471 volume.mount_path = "/mnt/nfs"
472
473 task.volumes = [volume]
474 logger.debug(f"NFS volume configured: {target_host}:/ -> /mnt/nfs")
475
327 task_group = batch_v1.TaskGroup() 476 task_group = batch_v1.TaskGroup()
328 task_group.task_count = 1 477 task_group.task_count = 1
329 task_group.parallelism = 1 478 task_group.parallelism = 1
330 task_group.task_spec = task 479 task_group.task_spec = task
331 logger.debug(f"Task group: count={task_group.task_count}, parallelism={task_group.parallelism}") 480 logger.debug(f"Task group: count={task_group.task_count}, parallelism={task_group.parallelism}")
342 491
343 # Instance policy with custom VM image 492 # Instance policy with custom VM image
344 instance_policy = batch_v1.AllocationPolicy.InstancePolicy() 493 instance_policy = batch_v1.AllocationPolicy.InstancePolicy()
345 instance_policy.machine_type = "e2-medium" # Specify machine type for custom image 494 instance_policy.machine_type = "e2-medium" # Specify machine type for custom image
346 instance_policy.boot_disk = batch_v1.AllocationPolicy.Disk() 495 instance_policy.boot_disk = batch_v1.AllocationPolicy.Disk()
347 instance_policy.boot_disk.image = f"projects/{project_id}/global/images/galaxy-k8s-boot-v2025-08-10" 496 instance_policy.boot_disk.image = f"projects/{project_id}/global/images/galaxy-k8s-boot-v2025-08-12"
348 instance_policy.boot_disk.size_gb = 99 497 instance_policy.boot_disk.size_gb = 99
349 logger.debug(f"Using custom VM image: {instance_policy.boot_disk.image}") 498 logger.debug(f"Using custom VM image: {instance_policy.boot_disk.image}")
350 499
351 # Wrap the instance policy in InstancePolicyOrTemplate 500 # Wrap the instance policy in InstancePolicyOrTemplate
352 instance_policy_or_template = batch_v1.AllocationPolicy.InstancePolicyOrTemplate() 501 instance_policy_or_template = batch_v1.AllocationPolicy.InstancePolicyOrTemplate()