From 40b1a63cf514afc0965aea9b11a03fa8efbbe966 Mon Sep 17 00:00:00 2001 From: Kobe Date: Mon, 23 Jun 2025 14:50:37 +0200 Subject: [PATCH] much improved launch process --- app.py | 4 + routes/launch_api.py | 129 +++++++++++++++++---------- static/css/launch_progress.css | 13 +++ static/js/launch_progress.js | 153 +++++++++++++++++++++++++++------ 4 files changed, 225 insertions(+), 74 deletions(-) diff --git a/app.py b/app.py index dd6e765..8ebc0f5 100644 --- a/app.py +++ b/app.py @@ -36,6 +36,10 @@ def create_app(): app.config['CSS_VERSION'] = os.getenv('CSS_VERSION', '1.0.3') # Add CSS version for cache busting app.config['SERVER_NAME'] = os.getenv('SERVER_NAME', '127.0.0.1:5000') app.config['PREFERRED_URL_SCHEME'] = os.getenv('PREFERRED_URL_SCHEME', 'http') + + # Configure request timeouts for long-running operations + app.config['REQUEST_TIMEOUT'] = int(os.getenv('REQUEST_TIMEOUT', '300')) # 5 minutes default + app.config['STACK_DEPLOYMENT_TIMEOUT'] = int(os.getenv('STACK_DEPLOYMENT_TIMEOUT', '300')) # 5 minutes for stack deployment # Initialize extensions db.init_app(app) diff --git a/routes/launch_api.py b/routes/launch_api.py index fb55099..46303ce 100644 --- a/routes/launch_api.py +++ b/routes/launch_api.py @@ -214,7 +214,6 @@ def list_gitea_repos(): return jsonify({'message': 'Missing required fields'}), 400 try: - # Try different authentication methods headers = { 'Accept': 'application/json' } @@ -789,6 +788,9 @@ def deploy_stack(): if not portainer_settings: return jsonify({'error': 'Portainer settings not configured'}), 400 + # Define timeout early to ensure it's available throughout the function + stack_timeout = current_app.config.get('STACK_DEPLOYMENT_TIMEOUT', 300) # Default to 5 minutes + # Verify Portainer authentication auth_response = requests.get( f"{portainer_settings['url'].rstrip('/')}/api/status", @@ -830,6 +832,7 @@ def deploy_stack(): # Log the request data current_app.logger.info(f"Creating stack with data: {json.dumps(data)}") current_app.logger.info(f"Using endpoint ID: {endpoint_id}") + current_app.logger.info(f"Using timeout: {stack_timeout} seconds") # First, check if a stack with this name already exists stacks_url = f"{portainer_settings['url'].rstrip('/')}/api/stacks" @@ -867,7 +870,7 @@ def deploy_stack(): # Add endpointId as a query parameter params = {'endpointId': endpoint_id} - # Use a shorter timeout for stack creation initiation (2 minutes) + # Use a configurable timeout for stack creation initiation create_response = requests.post( url, headers={ @@ -877,7 +880,7 @@ def deploy_stack(): }, params=params, json=request_body, - timeout=120 # 2 minutes timeout for stack creation initiation + timeout=stack_timeout # Use configurable timeout ) # Log the response details @@ -909,8 +912,14 @@ def deploy_stack(): }) except requests.exceptions.Timeout: - current_app.logger.error("Request timed out while initiating stack deployment") - return jsonify({'error': 'Request timed out while initiating stack deployment. The operation may still be in progress.'}), 504 + current_app.logger.error(f"Request timed out after {stack_timeout} seconds while initiating stack deployment") + current_app.logger.error(f"Stack name: {data.get('name', 'unknown') if 'data' in locals() else 'unknown'}") + current_app.logger.error(f"Portainer URL: {portainer_settings.get('url', 'unknown') if 'portainer_settings' in locals() else 'unknown'}") + return jsonify({ + 'error': f'Request timed out after {stack_timeout} seconds while initiating stack deployment. The operation may still be in progress.', + 'timeout_seconds': stack_timeout, + 'stack_name': data.get('name', 'unknown') if 'data' in locals() else 'unknown' + }), 504 except Exception as e: current_app.logger.error(f"Error deploying stack: {str(e)}") return jsonify({'error': str(e)}), 500 @@ -975,48 +984,74 @@ def check_stack_status(): # Get stack services to check their status services_url = f"{portainer_settings['url'].rstrip('/')}/api/endpoints/{endpoint_id}/docker/services" - services_response = requests.get( - services_url, - headers={ - 'X-API-Key': portainer_settings['api_key'], - 'Accept': 'application/json' - }, - params={'filters': json.dumps({'label': f'com.docker.stack.namespace={data["stack_name"]}'})}, - timeout=30 - ) - - if not services_response.ok: - return jsonify({'error': 'Failed to get stack services'}), 500 - - services = services_response.json() + current_app.logger.info(f"Checking services for stack {data['stack_name']} at endpoint {endpoint_id}") - # Check if all services are running - all_running = True - service_statuses = [] - - for service in services: - replicas_running = service.get('Spec', {}).get('Mode', {}).get('Replicated', {}).get('Replicas', 0) - replicas_actual = service.get('ServiceStatus', {}).get('RunningTasks', 0) - - service_status = { - 'name': service.get('Spec', {}).get('Name', 'Unknown'), - 'replicas_expected': replicas_running, - 'replicas_running': replicas_actual, - 'status': 'running' if replicas_actual >= replicas_running else 'not_running' - } - - service_statuses.append(service_status) - - if replicas_actual < replicas_running: - all_running = False + try: + services_response = requests.get( + services_url, + headers={ + 'X-API-Key': portainer_settings['api_key'], + 'Accept': 'application/json' + }, + params={'filters': json.dumps({'label': f'com.docker.stack.namespace={data["stack_name"]}'})}, + timeout=30 + ) - # Determine overall stack status - if all_running and len(services) > 0: - status = 'active' - elif len(services) > 0: - status = 'partial' - else: - status = 'inactive' + current_app.logger.info(f"Services API response status: {services_response.status_code}") + + if services_response.ok: + services = services_response.json() + current_app.logger.info(f"Found {len(services)} services for stack {data['stack_name']}") + + # Check if all services are running + all_running = True + service_statuses = [] + + for service in services: + replicas_running = service.get('Spec', {}).get('Mode', {}).get('Replicated', {}).get('Replicas', 0) + replicas_actual = service.get('ServiceStatus', {}).get('RunningTasks', 0) + + service_status = { + 'name': service.get('Spec', {}).get('Name', 'Unknown'), + 'replicas_expected': replicas_running, + 'replicas_running': replicas_actual, + 'status': 'running' if replicas_actual >= replicas_running else 'not_running' + } + + service_statuses.append(service_status) + + if replicas_actual < replicas_running: + all_running = False + + # Determine overall stack status + if all_running and len(services) > 0: + status = 'active' + elif len(services) > 0: + status = 'partial' + else: + status = 'inactive' + else: + # Services API failed, but stack exists - assume it's still starting up + current_app.logger.warning(f"Failed to get services for stack {data['stack_name']}: {services_response.status_code} - {services_response.text}") + + # Provide more specific error context + if services_response.status_code == 404: + current_app.logger.info(f"Services endpoint not found for stack {data['stack_name']} - stack may still be initializing") + elif services_response.status_code == 403: + current_app.logger.warning(f"Access denied to services for stack {data['stack_name']} - check Portainer permissions") + elif services_response.status_code >= 500: + current_app.logger.warning(f"Portainer server error when getting services for stack {data['stack_name']}") + + services = [] + service_statuses = [] + status = 'starting' # Stack exists but services not available yet + + except Exception as e: + # Exception occurred while getting services, but stack exists + current_app.logger.warning(f"Exception getting services for stack {data['stack_name']}: {str(e)}") + services = [] + service_statuses = [] + status = 'starting' # Stack exists but services not available yet return jsonify({ 'success': True, @@ -1026,7 +1061,9 @@ def check_stack_status(): 'status': status, 'services': service_statuses, 'total_services': len(services), - 'running_services': len([s for s in service_statuses if s['status'] == 'running']) + 'running_services': len([s for s in service_statuses if s['status'] == 'running']), + 'stack_created_at': target_stack.get('CreatedAt', 'unknown'), + 'stack_updated_at': target_stack.get('UpdatedAt', 'unknown') } }) diff --git a/static/css/launch_progress.css b/static/css/launch_progress.css index c97661e..661af88 100644 --- a/static/css/launch_progress.css +++ b/static/css/launch_progress.css @@ -45,6 +45,10 @@ background-color: #ffebee; } +.step-item.warning { + background-color: #fff3cd; +} + .step-icon { width: 40px; height: 40px; @@ -72,6 +76,11 @@ color: white; } +.step-item.warning .step-icon { + background-color: #ffc107; + color: white; +} + .step-content { flex-grow: 1; } @@ -92,4 +101,8 @@ .step-item.failed .step-status { color: #dc3545; +} + +.step-item.warning .step-status { + color: #856404; } \ No newline at end of file diff --git a/static/js/launch_progress.js b/static/js/launch_progress.js index a0999e8..2c62aa8 100644 --- a/static/js/launch_progress.js +++ b/static/js/launch_progress.js @@ -499,14 +499,44 @@ async function startLaunch(data) { `; stackDeployStepElement.querySelector('.step-content').appendChild(stackProgressDiv); - const stackResult = await deployStack(dockerComposeResult.content, data.instanceName, data.port); + const stackResult = await deployStack(dockerComposeResult.content, `docupulse_${data.port}`, data.port); launchReport.steps.push({ step: 'Stack Deployment', status: stackResult.success ? 'success' : 'error', details: stackResult }); + + // Handle different stack deployment scenarios if (!stackResult.success) { - throw new Error(stackResult.error || 'Failed to deploy stack'); + // Check if this is a timeout but the stack might still be deploying + if (stackResult.error && stackResult.error.includes('timed out')) { + console.log('Stack deployment timed out, but may still be in progress'); + + // Update the step to show warning instead of error + const stackDeployStep = document.querySelectorAll('.step-item')[8]; + stackDeployStep.classList.remove('active'); + stackDeployStep.classList.add('warning'); + stackDeployStep.querySelector('.step-status').textContent = 'Stack deployment timed out but may still be in progress'; + + // Add a note about the timeout + const timeoutNote = document.createElement('div'); + timeoutNote.className = 'alert alert-warning mt-2'; + timeoutNote.innerHTML = ` + + Note: The stack deployment request timed out, but the deployment may still be in progress. + You can check the status in your Portainer dashboard or wait a few minutes and refresh this page. + `; + stackDeployStep.querySelector('.step-content').appendChild(timeoutNote); + + // Continue with the process using the available data + stackResult.data = stackResult.data || { + name: `docupulse_${data.port}`, + status: 'creating', + id: null + }; + } else { + throw new Error(stackResult.error || 'Failed to deploy stack'); + } } // Update the step to show success @@ -2146,6 +2176,18 @@ async function checkInstanceHealth(instanceUrl) { attempts: currentAttempt, elapsedTime: elapsedTime }; + } else if (data.status === 'inactive') { + console.log(`Stack ${stackName} is inactive, continuing to poll...`); + lastKnownStatus = 'inactive'; + if (progressText) { + progressText.textContent = `Stack is starting up (${attempts} attempts, ${Math.round(elapsed / 1000)}s elapsed)...`; + } + } else if (data.status === 'starting') { + console.log(`Stack ${stackName} is starting up, continuing to poll...`); + lastKnownStatus = 'starting'; + if (progressText) { + progressText.textContent = `Stack is initializing (${attempts} attempts, ${Math.round(elapsed / 1000)}s elapsed)...`; + } } else { throw new Error('Instance is not healthy'); } @@ -2597,14 +2639,20 @@ async function deployStack(dockerComposeContent, stackName, port) { // Handle 504 Gateway Timeout as successful initiation if (response.status === 504) { console.log('Received 504 Gateway Timeout - stack creation may still be in progress'); - return { - success: true, - data: { - name: stackName, - id: null, // Will be determined during polling - status: 'creating' - } - }; + + // Update progress to show that we're now polling + const progressBar = document.getElementById('stackProgress'); + const progressText = document.getElementById('stackProgressText'); + if (progressBar && progressText) { + progressBar.style.width = '25%'; + progressBar.textContent = '25%'; + progressText.textContent = 'Stack creation initiated (timed out, but continuing to monitor)...'; + } + + // Start polling immediately since the stack creation was initiated + console.log('Starting to poll for stack status after 504 timeout...'); + const pollResult = await pollStackStatus(`docupulse_${port}`, 15 * 60 * 1000); // 15 minutes max + return pollResult; } if (!response.ok) { @@ -2618,7 +2666,7 @@ async function deployStack(dockerComposeContent, stackName, port) { // If stack is being created, poll for status if (result.data.status === 'creating') { console.log('Stack is being created, polling for status...'); - const pollResult = await pollStackStatus(stackName, 10 * 60 * 1000); // 10 minutes max + const pollResult = await pollStackStatus(`docupulse_${port}`, 10 * 60 * 1000); // 10 minutes max return pollResult; } @@ -2638,12 +2686,26 @@ async function deployStack(dockerComposeContent, stackName, port) { } // Function to poll stack status -async function pollStackStatus(stackName, maxWaitTime = 10 * 60 * 1000) { +async function pollStackStatus(stackName, maxWaitTime = 15 * 60 * 1000) { const startTime = Date.now(); const pollInterval = 5000; // 5 seconds let attempts = 0; + let lastKnownStatus = 'unknown'; - console.log(`Starting to poll stack status for: ${stackName}`); + // Validate stack name + if (!stackName || typeof stackName !== 'string') { + console.error('Invalid stack name provided to pollStackStatus:', stackName); + return { + success: false, + error: `Invalid stack name: ${stackName}`, + data: { + name: stackName, + status: 'error' + } + }; + } + + console.log(`Starting to poll stack status for: ${stackName} (max wait: ${maxWaitTime / 1000}s)`); // Update progress indicator const progressBar = document.getElementById('stackProgress'); @@ -2653,25 +2715,29 @@ async function pollStackStatus(stackName, maxWaitTime = 10 * 60 * 1000) { attempts++; console.log(`Polling attempt ${attempts} for stack: ${stackName}`); - // Update progress + // Update progress - start at 25% if we came from a 504 timeout, otherwise start at 0% const elapsed = Date.now() - startTime; - const progress = Math.min((elapsed / maxWaitTime) * 100, 95); // Cap at 95% until complete + const baseProgress = progressBar && progressBar.style.width === '25%' ? 25 : 0; + const progress = Math.min(baseProgress + (elapsed / maxWaitTime) * 70, 95); // Cap at 95% until complete if (progressBar && progressText) { progressBar.style.width = `${progress}%`; progressBar.textContent = `${Math.round(progress)}%`; - progressText.textContent = `Checking stack status (attempt ${attempts})...`; } try { + const requestBody = { + stack_name: stackName + }; + console.log(`Sending stack status check request:`, requestBody); + const response = await fetch('/api/admin/check-stack-status', { method: 'POST', headers: { 'Content-Type': 'application/json', 'X-CSRF-Token': document.querySelector('meta[name="csrf-token"]').content }, - body: JSON.stringify({ - stack_name: stackName - }) + body: JSON.stringify(requestBody), + timeout: 30000 // 30 second timeout for status checks }); if (response.ok) { @@ -2684,7 +2750,9 @@ async function pollStackStatus(stackName, maxWaitTime = 10 * 60 * 1000) { if (progressBar && progressText) { progressBar.style.width = '100%'; progressBar.textContent = '100%'; - progressText.textContent = 'Stack is now active!'; + progressBar.classList.remove('progress-bar-animated'); + progressBar.classList.add('bg-success'); + progressText.textContent = 'Stack is now active and running!'; } return { success: true, @@ -2696,35 +2764,57 @@ async function pollStackStatus(stackName, maxWaitTime = 10 * 60 * 1000) { }; } else if (result.data && result.data.status === 'partial') { console.log(`Stack ${stackName} is partially running, continuing to poll...`); + lastKnownStatus = 'partial'; if (progressText) { - progressText.textContent = `Stack is partially running (attempt ${attempts})...`; + progressText.textContent = `Stack is partially running (${attempts} attempts, ${Math.round(elapsed / 1000)}s elapsed)...`; } } else if (result.data && result.data.status === 'inactive') { console.log(`Stack ${stackName} is inactive, continuing to poll...`); + lastKnownStatus = 'inactive'; if (progressText) { - progressText.textContent = `Stack is starting up (attempt ${attempts})...`; + progressText.textContent = `Stack is starting up (${attempts} attempts, ${Math.round(elapsed / 1000)}s elapsed)...`; } + } else if (result.data && result.data.status === 'starting') { + console.log(`Stack ${stackName} exists and is starting up - continuing to next step`); + // Stack exists, we can continue - no need to wait for all services + if (progressBar && progressText) { + progressBar.style.width = '100%'; + progressBar.textContent = '100%'; + progressBar.classList.remove('progress-bar-animated'); + progressBar.classList.add('bg-success'); + progressText.textContent = 'Stack created successfully!'; + } + return { + success: true, + data: { + name: stackName, + id: result.data.stack_id, + status: 'starting' + } + }; } else { console.log(`Stack ${stackName} status unknown, continuing to poll...`); + lastKnownStatus = 'unknown'; if (progressText) { - progressText.textContent = `Checking stack status (attempt ${attempts})...`; + progressText.textContent = `Checking stack status (${attempts} attempts, ${Math.round(elapsed / 1000)}s elapsed)...`; } } } else if (response.status === 404) { console.log(`Stack ${stackName} not found yet, continuing to poll...`); + lastKnownStatus = 'not_found'; if (progressText) { - progressText.textContent = `Stack not found yet (attempt ${attempts})...`; + progressText.textContent = `Stack not found yet (${attempts} attempts, ${Math.round(elapsed / 1000)}s elapsed)...`; } } else { console.log(`Stack status check failed with status ${response.status}, continuing to poll...`); if (progressText) { - progressText.textContent = `Status check failed (attempt ${attempts})...`; + progressText.textContent = `Status check failed (${attempts} attempts, ${Math.round(elapsed / 1000)}s elapsed)...`; } } } catch (error) { console.error(`Error polling stack status (attempt ${attempts}):`, error); if (progressText) { - progressText.textContent = `Error checking status (attempt ${attempts})...`; + progressText.textContent = `Error checking status (${attempts} attempts, ${Math.round(elapsed / 1000)}s elapsed)...`; } } @@ -2738,10 +2828,17 @@ async function pollStackStatus(stackName, maxWaitTime = 10 * 60 * 1000) { progressBar.style.width = '100%'; progressBar.classList.remove('progress-bar-animated'); progressBar.classList.add('bg-warning'); - progressText.textContent = 'Stack deployment timed out'; + progressText.textContent = `Stack deployment timed out after ${Math.round(maxWaitTime / 1000)}s. Last known status: ${lastKnownStatus}`; } + + // Return a more informative error message + const statusMessage = lastKnownStatus !== 'unknown' ? ` (last known status: ${lastKnownStatus})` : ''; return { success: false, - error: `Stack deployment timed out after ${maxWaitTime / 1000} seconds. The stack may still be deploying.` + error: `Stack deployment timed out after ${Math.round(maxWaitTime / 1000)} seconds${statusMessage}. The stack may still be deploying in the background.`, + data: { + name: stackName, + status: lastKnownStatus + } }; } \ No newline at end of file