Mercurial > repos > enis > gcp_batch_netcat
comparison gcp_batch_netcat.py @ 9:3fd12035e6c9 draft
planemo upload for repository https://github.com/afgane/gcp_batch_netcat commit 4194f0defc4bebd6c95c76b58860c9ae6b9d1899-dirty
author | enis |
---|---|
date | Sun, 10 Aug 2025 21:32:04 +0000 |
parents | fcfb703748b1 |
children | cdfa2e1a7ef4 |
comparison
equal
deleted
inserted
replaced
8:7c660a6be068 | 9:3fd12035e6c9 |
---|---|
79 # Create Batch client | 79 # Create Batch client |
80 logger.info("Creating Batch client...") | 80 logger.info("Creating Batch client...") |
81 client = batch_v1.BatchServiceClient() | 81 client = batch_v1.BatchServiceClient() |
82 logger.info("Batch client created successfully") | 82 logger.info("Batch client created successfully") |
83 | 83 |
84 # Define the job using the Python client library objects | |
85 logger.info("Building job specification...") | |
86 runnable = batch_v1.Runnable() | |
87 runnable.container = batch_v1.Runnable.Container() | |
88 runnable.container.image_uri = "afgane/gcp-batch-netcat:0.2.0" | |
89 | |
90 # Create a comprehensive test script | 84 # Create a comprehensive test script |
91 test_script = f'''#!/bin/bash | 85 test_script = f'''#!/bin/bash |
92 set -e | 86 set -e |
93 echo "=== GCP Batch NFS Connectivity Test ===" | 87 echo "=== GCP Batch NFS Connectivity Test ===" |
94 echo "Target: {target_host}:{target_port}" | 88 echo "Target: {target_host}:{target_port}" |
95 echo "Timestamp: $(date)" | 89 echo "Timestamp: $(date)" |
96 echo "Container hostname: $(hostname)" | 90 echo "Container hostname: $(hostname)" |
91 echo "Host VM Image: galaxy-k8s-boot-v2025-08-10" | |
92 echo "Container Image: afgane/gcp-batch-netcat:0.2.0" | |
93 echo "" | |
94 | |
95 # Basic system info | |
96 echo "=== System Information ===" | |
97 echo "OS Release:" | |
98 cat /etc/os-release | head -5 2>/dev/null || echo "OS release info not available" | |
99 echo "Kernel version:" | |
100 uname -r | |
101 echo "Architecture:" | |
102 uname -m | |
97 echo "" | 103 echo "" |
98 | 104 |
99 # Basic network info | 105 # Basic network info |
100 echo "=== Network Information ===" | 106 echo "=== Network Information ===" |
101 echo "Container IP addresses:" | 107 echo "Container IP addresses:" |
126 timeout 30 nc -z -v -w 10 {target_host} {target_port} | 132 timeout 30 nc -z -v -w 10 {target_host} {target_port} |
127 nc_result=$? | 133 nc_result=$? |
128 echo "Netcat result: $nc_result" | 134 echo "Netcat result: $nc_result" |
129 echo "" | 135 echo "" |
130 | 136 |
137 # NFS client capabilities | |
138 echo "=== NFS Client Information ===" | |
139 echo "NFS client version:" | |
140 /sbin/mount.nfs -V 2>/dev/null || echo "mount.nfs not available" | |
141 echo "RPC services:" | |
142 rpcinfo -p 2>/dev/null || echo "rpcinfo not available" | |
143 echo "" | |
144 | |
131 # Additional connectivity tests | 145 # Additional connectivity tests |
132 echo "=== Additional Connectivity Tests ===" | 146 echo "=== Additional Connectivity Tests ===" |
133 echo "Testing external connectivity (Google DNS 8.8.8.8:53):" | 147 echo "Testing external connectivity (Google DNS 8.8.8.8:53):" |
134 timeout 10 nc -z -v -w 5 8.8.8.8 53 && echo "✓ External DNS reachable" || echo "✗ External DNS unreachable" | 148 timeout 10 nc -z -v -w 5 8.8.8.8 53 && echo "✓ External DNS reachable" || echo "✗ External DNS unreachable" |
135 | 149 |
137 echo "=== Network Troubleshooting ===" | 151 echo "=== Network Troubleshooting ===" |
138 echo "Route table:" | 152 echo "Route table:" |
139 ip route | 153 ip route |
140 echo "" | 154 echo "" |
141 | 155 |
156 # NFS Mount Test | |
157 echo "=== NFS Mount Test ===" | |
158 MOUNT_POINT="/tmp/nfs_test_mount" | |
159 echo "Creating mount point: $MOUNT_POINT" | |
160 mkdir -p "$MOUNT_POINT" | |
161 | |
162 echo "Attempting to mount NFS share..." | |
163 echo "Command: mount -t nfs -o vers=3,tcp {target_host}:/ $MOUNT_POINT" | |
164 | |
165 # Try mounting the NFS share | |
166 mount_result=1 | |
167 if mount -t nfs -o vers=3,tcp {target_host}:/ "$MOUNT_POINT" 2>&1; then | |
168 mount_result=0 | |
169 echo "✓ NFS mount successful!" | |
170 | |
171 echo "" | |
172 echo "=== NFS Share Contents ===" | |
173 echo "Long listing of NFS share root:" | |
174 ls -la "$MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents" | |
175 | |
176 echo "" | |
177 echo "Disk usage of NFS share:" | |
178 df -h "$MOUNT_POINT" 2>/dev/null || echo "Could not get disk usage" | |
179 | |
180 echo "" | |
181 echo "Mount information:" | |
182 mount | grep "$MOUNT_POINT" || echo "Mount info not found" | |
183 | |
184 # Try to find common Galaxy directories | |
185 echo "" | |
186 echo "=== Looking for Galaxy directories ===" | |
187 for dir in "database" "database/files" "database/objects" "tools" "shed_tools"; do | |
188 if [ -d "$MOUNT_POINT/$dir" ]; then | |
189 echo "✓ Found: $dir" | |
190 ls -la "$MOUNT_POINT/$dir" | head -10 | |
191 else | |
192 echo "✗ Not found: $dir" | |
193 fi | |
194 done | |
195 | |
196 echo "" | |
197 echo "Unmounting NFS share..." | |
198 umount "$MOUNT_POINT" 2>/dev/null && echo "✓ Unmount successful" || echo "✗ Unmount failed" | |
199 else | |
200 echo "✗ NFS mount failed" | |
201 echo "Mount error details above" | |
202 | |
203 # Try alternative mount options | |
204 echo "" | |
205 echo "Trying alternative NFS mount options..." | |
206 echo "Command: mount -t nfs -o vers=4,tcp {target_host}:/ $MOUNT_POINT" | |
207 if mount -t nfs -o vers=4,tcp {target_host}:/ "$MOUNT_POINT" 2>&1; then | |
208 mount_result=0 | |
209 echo "✓ NFS v4 mount successful!" | |
210 ls -la "$MOUNT_POINT" 2>/dev/null || echo "Could not list directory contents" | |
211 umount "$MOUNT_POINT" 2>/dev/null && echo "✓ Unmount successful" || echo "✗ Unmount failed" | |
212 else | |
213 echo "✗ NFS v4 mount also failed" | |
214 fi | |
215 fi | |
216 | |
217 # CVMFS Mount Test | |
218 echo "" | |
219 echo "=== CVMFS Access Test ===" | |
220 echo "Checking if CVMFS is bind-mounted from host VM..." | |
221 if [ -d "/cvmfs" ]; then | |
222 echo "✓ /cvmfs directory exists (bind-mounted from host)" | |
223 ls -la /cvmfs 2>/dev/null || echo "Could not list /cvmfs contents" | |
224 | |
225 echo "" | |
226 echo "Checking for Galaxy CVMFS repository..." | |
227 cvmfs_result=1 | |
228 if [ -d "/cvmfs/data.galaxyproject.org" ]; then | |
229 cvmfs_result=0 | |
230 echo "✓ Galaxy CVMFS repository accessible!" | |
231 | |
232 echo "" | |
233 echo "=== CVMFS Repository Contents ===" | |
234 echo "Long listing of CVMFS repository root:" | |
235 ls -la "/cvmfs/data.galaxyproject.org" 2>/dev/null | head -10 || echo "Could not list directory contents" | |
236 | |
237 echo "" | |
238 echo "Checking for Galaxy reference data directories:" | |
239 for dir in "byhand" "location" "tool-data" "genomes"; do | |
240 if [ -d "/cvmfs/data.galaxyproject.org/$dir" ]; then | |
241 echo "✓ Found CVMFS directory: $dir" | |
242 ls "/cvmfs/data.galaxyproject.org/$dir" | head -5 2>/dev/null || echo "Could not list contents" | |
243 else | |
244 echo "✗ Not found: $dir" | |
245 fi | |
246 done | |
247 | |
248 echo "" | |
249 echo "CVMFS mount information from host:" | |
250 mount | grep cvmfs || echo "CVMFS mount info not visible from container" | |
251 else | |
252 echo "✗ Galaxy CVMFS repository not found at /cvmfs/data.galaxyproject.org" | |
253 echo "This may indicate:" | |
254 echo "- CVMFS client not running on host VM" | |
255 echo "- Repository not mounted on host" | |
256 echo "- Bind mount not properly configured" | |
257 fi | |
258 else | |
259 echo "✗ /cvmfs directory not found" | |
260 echo "This indicates the bind mount from host VM failed" | |
261 echo "Expected: /cvmfs from host VM bind-mounted into container" | |
262 fi | |
263 | |
264 echo "" | |
142 echo "=== Final Result ===" | 265 echo "=== Final Result ===" |
143 if [ $nc_result -eq 0 ]; then | 266 if [ $nc_result -eq 0 ] && [ $mount_result -eq 0 ]; then |
144 echo "✓ SUCCESS: Connection to NFS server {target_host}:{target_port} successful" | 267 echo "✓ SUCCESS: Both network connectivity and NFS mount to {target_host}:{target_port} successful" |
268 if [ $cvmfs_result -eq 0 ]; then | |
269 echo "✓ BONUS: CVMFS repository mount also successful" | |
270 else | |
271 echo "ℹ INFO: CVMFS mount failed (may not be available in this image)" | |
272 fi | |
145 exit 0 | 273 exit 0 |
274 elif [ $nc_result -eq 0 ]; then | |
275 echo "⚠ PARTIAL SUCCESS: Network connectivity successful but NFS mount failed" | |
276 echo "Network connection to {target_host}:{target_port} works, but NFS mounting failed." | |
277 echo "This suggests:" | |
278 echo "- NFS server is reachable but may not be properly configured" | |
279 echo "- NFS export permissions may be incorrect" | |
280 echo "- NFS version mismatch (tried NFSv3 and NFSv4)" | |
281 echo "- Firewall may allow port 2049 but block other NFS ports (111, 20048)" | |
282 if [ $cvmfs_result -eq 0 ]; then | |
283 echo "✓ CVMFS repository mount was successful" | |
284 fi | |
285 exit 1 | |
146 else | 286 else |
147 echo "✗ FAILED: Connection to NFS server {target_host}:{target_port} failed" | 287 echo "✗ FAILED: Network connectivity to NFS server {target_host}:{target_port} failed" |
148 echo "This suggests a network connectivity issue between GCP Batch and the NFS server." | 288 echo "This suggests a network connectivity issue between GCP Batch and the NFS server." |
149 echo "Common causes:" | 289 echo "Common causes:" |
150 echo "- Firewall rules blocking NFS traffic (port 2049)" | 290 echo "- Firewall rules blocking NFS traffic (port 2049)" |
151 echo "- NFS service not accessible from external networks (only ClusterIP)" | 291 echo "- NFS service not accessible from external networks (only ClusterIP)" |
152 echo "- NFS server not properly exposed via LoadBalancer" | 292 echo "- NFS server not properly exposed via LoadBalancer" |
153 echo "" | 293 echo "" |
154 echo "Solutions:" | 294 echo "Solutions:" |
155 echo "- Ensure NFS service has type LoadBalancer with external IP" | 295 echo "- Ensure NFS service has type LoadBalancer with external IP" |
156 echo "- Check GCP firewall rules allow traffic from Batch subnet to NFS" | 296 echo "- Check GCP firewall rules allow traffic from Batch subnet to NFS" |
157 echo "- Verify the IP address is the LoadBalancer external IP, not ClusterIP" | 297 echo "- Verify the IP address is the LoadBalancer external IP, not ClusterIP" |
298 if [ $cvmfs_result -eq 0 ]; then | |
299 echo "" | |
300 echo "✓ CVMFS repository mount was successful (good network connectivity to external services)" | |
301 fi | |
158 exit 1 | 302 exit 1 |
159 fi | 303 fi |
160 ''' | 304 ''' |
161 | 305 |
306 # Define the job using the Python client library objects | |
307 logger.info("Building job specification...") | |
308 runnable = batch_v1.Runnable() | |
309 runnable.container = batch_v1.Runnable.Container() | |
310 runnable.container.image_uri = "afgane/gcp-batch-netcat:0.2.0" | |
311 | |
312 # Bind mount /cvmfs from the host VM (which has CVMFS client) into the container | |
313 cvmfs_volume = batch_v1.Volume() | |
314 cvmfs_volume.host_path = "/cvmfs" | |
315 cvmfs_volume.mount_path = "/cvmfs" | |
316 runnable.container.volumes = [cvmfs_volume] | |
317 | |
162 runnable.container.entrypoint = "/bin/bash" | 318 runnable.container.entrypoint = "/bin/bash" |
163 runnable.container.commands = ["-c", test_script] | 319 runnable.container.commands = ["-c", test_script] |
164 logger.debug(f"Container config: image={runnable.container.image_uri}, entrypoint={runnable.container.entrypoint}") | 320 logger.debug(f"Container config: image={runnable.container.image_uri}, with /cvmfs bind mount from custom VM") |
165 | 321 |
166 task = batch_v1.TaskSpec() | 322 task = batch_v1.TaskSpec() |
167 task.runnables = [runnable] | 323 task.runnables = [runnable] |
168 task.compute_resource = batch_v1.ComputeResource() | 324 task.compute_resource = batch_v1.ComputeResource() |
169 task.compute_resource.cpu_milli = 1000 | 325 task.compute_resource.cpu_milli = 1000 |
184 logger.debug(f"Subnet: {network_interface.subnetwork}") | 340 logger.debug(f"Subnet: {network_interface.subnetwork}") |
185 | 341 |
186 network_policy = batch_v1.AllocationPolicy.NetworkPolicy() | 342 network_policy = batch_v1.AllocationPolicy.NetworkPolicy() |
187 network_policy.network_interfaces = [network_interface] | 343 network_policy.network_interfaces = [network_interface] |
188 | 344 |
345 # Instance policy with custom VM image | |
346 instance_policy = batch_v1.AllocationPolicy.InstancePolicy() | |
347 instance_policy.machine_type = "e2-medium" # Specify machine type for custom image | |
348 instance_policy.boot_disk = batch_v1.AllocationPolicy.Disk() | |
349 instance_policy.boot_disk.image = f"projects/{project_id}/global/images/galaxy-k8s-boot-v2025-08-10" | |
350 instance_policy.boot_disk.size_gb = 99 | |
351 logger.debug(f"Using custom VM image: {instance_policy.boot_disk.image}") | |
352 | |
189 allocation_policy = batch_v1.AllocationPolicy() | 353 allocation_policy = batch_v1.AllocationPolicy() |
190 allocation_policy.network = network_policy | 354 allocation_policy.network = network_policy |
355 allocation_policy.instances = [instance_policy] | |
191 | 356 |
192 job = batch_v1.Job() | 357 job = batch_v1.Job() |
193 job.task_groups = [task_group] | 358 job.task_groups = [task_group] |
194 job.allocation_policy = allocation_policy | 359 job.allocation_policy = allocation_policy |
195 job.logs_policy = batch_v1.LogsPolicy() | 360 job.logs_policy = batch_v1.LogsPolicy() |