package org.apache.dolphinscheduler.server.master.service;

import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import lombok.Generated;
import lombok.NonNull;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.dolphinscheduler.common.enums.Flag;
import org.apache.dolphinscheduler.common.enums.StateEventType;
import org.apache.dolphinscheduler.common.model.Server;
import org.apache.dolphinscheduler.dao.entity.ProcessInstance;
import org.apache.dolphinscheduler.dao.entity.TaskInstance;
import org.apache.dolphinscheduler.dao.repository.TaskInstanceDao;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.plugin.task.api.enums.TaskExecutionStatus;
import org.apache.dolphinscheduler.plugin.task.api.utils.LogUtils;
import org.apache.dolphinscheduler.registry.api.RegistryClient;
import org.apache.dolphinscheduler.registry.api.enums.RegistryNodeType;
import org.apache.dolphinscheduler.server.master.builder.TaskExecutionContextBuilder;
import org.apache.dolphinscheduler.server.master.cache.ProcessInstanceExecCacheManager;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.event.TaskStateEvent;
import org.apache.dolphinscheduler.server.master.metrics.TaskMetrics;
import org.apache.dolphinscheduler.server.master.runner.WorkflowExecuteRunnable;
import org.apache.dolphinscheduler.server.master.runner.WorkflowExecuteThreadPool;
import org.apache.dolphinscheduler.server.master.utils.TaskUtils;
import org.apache.dolphinscheduler.service.log.LogClient;
import org.apache.dolphinscheduler.service.process.ProcessService;
import org.apache.dolphinscheduler.service.utils.ProcessUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

@Service
/* loaded from: input_file:org/apache/dolphinscheduler/server/master/service/WorkerFailoverService.class */
public class WorkerFailoverService {

    @Generated
    private static final Logger log = LoggerFactory.getLogger(WorkerFailoverService.class);
    private final RegistryClient registryClient;
    private final MasterConfig masterConfig;
    private final ProcessService processService;
    private final WorkflowExecuteThreadPool workflowExecuteThreadPool;
    private final ProcessInstanceExecCacheManager cacheManager;
    private final LogClient logClient;
    private final String localAddress;
    private final TaskInstanceDao taskInstanceDao;

    public WorkerFailoverService(@NonNull RegistryClient registryClient, @NonNull MasterConfig masterConfig, @NonNull ProcessService processService, @NonNull WorkflowExecuteThreadPool workflowExecuteThreadPool, @NonNull ProcessInstanceExecCacheManager processInstanceExecCacheManager, @NonNull LogClient logClient, @NonNull TaskInstanceDao taskInstanceDao) {
        if (registryClient == null) {
            throw new NullPointerException("registryClient is marked non-null but is null");
        }
        if (masterConfig == null) {
            throw new NullPointerException("masterConfig is marked non-null but is null");
        }
        if (processService == null) {
            throw new NullPointerException("processService is marked non-null but is null");
        }
        if (workflowExecuteThreadPool == null) {
            throw new NullPointerException("workflowExecuteThreadPool is marked non-null but is null");
        }
        if (processInstanceExecCacheManager == null) {
            throw new NullPointerException("cacheManager is marked non-null but is null");
        }
        if (logClient == null) {
            throw new NullPointerException("logClient is marked non-null but is null");
        }
        if (taskInstanceDao == null) {
            throw new NullPointerException("taskInstanceDao is marked non-null but is null");
        }
        this.registryClient = registryClient;
        this.masterConfig = masterConfig;
        this.processService = processService;
        this.workflowExecuteThreadPool = workflowExecuteThreadPool;
        this.cacheManager = processInstanceExecCacheManager;
        this.logClient = logClient;
        this.localAddress = masterConfig.getMasterAddress();
        this.taskInstanceDao = taskInstanceDao;
    }

    public void failoverWorker(@NonNull String str) {
        ProcessInstance processInstance;
        if (str == null) {
            throw new NullPointerException("workerHost is marked non-null but is null");
        }
        log.info("Worker[{}] failover starting", str);
        StopWatch createStarted = StopWatch.createStarted();
        Optional<Date> serverStartupTime = getServerStartupTime(this.registryClient.getServerList(RegistryNodeType.WORKER), str);
        List<TaskInstance> needFailoverTaskInstance = getNeedFailoverTaskInstance(str);
        if (CollectionUtils.isEmpty(needFailoverTaskInstance)) {
            log.info("Worker[{}] failover finished there are no taskInstance need to failover", str);
            return;
        }
        log.info("Worker[{}] failover there are {} taskInstance may need to failover, will do a deep check, taskInstanceIds: {}", new Object[]{str, Integer.valueOf(needFailoverTaskInstance.size()), needFailoverTaskInstance.stream().map((v0) -> {
            return v0.getId();
        }).collect(Collectors.toList())});
        HashMap hashMap = new HashMap();
        for (TaskInstance taskInstance : needFailoverTaskInstance) {
            LogUtils.MDCAutoClosableContext workflowAndTaskInstanceIDMDC = LogUtils.setWorkflowAndTaskInstanceIDMDC(Integer.valueOf(taskInstance.getProcessInstanceId()), taskInstance.getId());
            Throwable th = null;
            try {
                try {
                    try {
                        processInstance = (ProcessInstance) hashMap.computeIfAbsent(Integer.valueOf(taskInstance.getProcessInstanceId()), num -> {
                            WorkflowExecuteRunnable byProcessInstanceId = this.cacheManager.getByProcessInstanceId(taskInstance.getProcessInstanceId());
                            if (byProcessInstanceId == null) {
                                return null;
                            }
                            return byProcessInstanceId.getWorkflowExecuteContext().getWorkflowInstance();
                        });
                    } finally {
                    }
                } catch (Exception e) {
                    log.info("Worker[{}] failover taskInstance occur exception", str, e);
                }
                if (checkTaskInstanceNeedFailover(serverStartupTime, processInstance, taskInstance)) {
                    log.info("Worker[{}] failover: begin to failover taskInstance, will set the status to NEED_FAULT_TOLERANCE", str);
                    failoverTaskInstance(processInstance, taskInstance);
                    log.info("Worker[{}] failover: Finish failover taskInstance", str);
                    if (workflowAndTaskInstanceIDMDC != null) {
                        if (0 != 0) {
                            try {
                                workflowAndTaskInstanceIDMDC.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            workflowAndTaskInstanceIDMDC.close();
                        }
                    }
                } else {
                    log.info("Worker[{}] the current taskInstance doesn't need to failover", str);
                    if (workflowAndTaskInstanceIDMDC != null) {
                        if (0 != 0) {
                            try {
                                workflowAndTaskInstanceIDMDC.close();
                            } catch (Throwable th3) {
                                th.addSuppressed(th3);
                            }
                        } else {
                            workflowAndTaskInstanceIDMDC.close();
                        }
                    }
                }
            } catch (Throwable th4) {
                if (workflowAndTaskInstanceIDMDC != null) {
                    if (th != null) {
                        try {
                            workflowAndTaskInstanceIDMDC.close();
                        } catch (Throwable th5) {
                            th.addSuppressed(th5);
                        }
                    } else {
                        workflowAndTaskInstanceIDMDC.close();
                    }
                }
                throw th4;
            }
        }
        createStarted.stop();
        log.info("Worker[{}] failover finished, useTime:{}ms", str, Long.valueOf(createStarted.getTime(TimeUnit.MILLISECONDS)));
    }

    private void failoverTaskInstance(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) {
        if (processInstance == null) {
            throw new NullPointerException("processInstance is marked non-null but is null");
        }
        if (taskInstance == null) {
            throw new NullPointerException("taskInstance is marked non-null but is null");
        }
        TaskMetrics.incTaskInstanceByState("failover");
        taskInstance.setProcessInstance(processInstance);
        if (TaskUtils.isMasterTask(taskInstance.getTaskType())) {
            log.info("The failover taskInstance is a master task, no need to failover in worker failover");
        } else {
            log.info("The failover taskInstance is not master task");
            TaskExecutionContext create = TaskExecutionContextBuilder.get().buildWorkflowInstanceHost(this.masterConfig.getMasterAddress()).buildTaskInstanceRelatedInfo(taskInstance).buildProcessInstanceRelatedInfo(processInstance).buildProcessDefinitionRelatedInfo(processInstance.getProcessDefinition()).create();
            if (this.masterConfig.isKillApplicationWhenTaskFailover()) {
                log.info("TaskInstance failover begin kill the task related yarn or k8s job");
                ProcessUtils.killApplication(this.logClient, create);
            }
        }
        taskInstance.setState(TaskExecutionStatus.NEED_FAULT_TOLERANCE);
        taskInstance.setFlag(Flag.NO);
        this.taskInstanceDao.upsertTaskInstance(taskInstance);
        this.workflowExecuteThreadPool.submitStateEvent(TaskStateEvent.builder().processInstanceId(processInstance.getId().intValue()).taskInstanceId(taskInstance.getId()).status(TaskExecutionStatus.NEED_FAULT_TOLERANCE).type(StateEventType.TASK_STATE_CHANGE).build());
    }

    private boolean checkTaskInstanceNeedFailover(Optional<Date> optional, @Nullable ProcessInstance processInstance, TaskInstance taskInstance) {
        if (processInstance == null) {
            log.error("Failover task instance error, cannot find the related processInstance form memory, this case shouldn't happened");
            return false;
        }
        if (taskInstance == null) {
            log.error("Master failover task instance error, taskInstance is null, this case shouldn't happened");
            return false;
        }
        if (!StringUtils.equalsIgnoreCase(processInstance.getHost(), this.localAddress)) {
            log.error("Master failover task instance error, the taskInstance's processInstance's host: {} is not the current master: {}", processInstance.getHost(), this.localAddress);
            return false;
        }
        if (taskInstance.getState() != null && taskInstance.getState().isFinished()) {
            log.info("The task is already finished, doesn't need to failover");
            return false;
        }
        if (!optional.isPresent() || taskInstance.getSubmitTime() == null || !taskInstance.getSubmitTime().after(optional.get())) {
            return true;
        }
        log.info("The taskInstance's submitTime: {} is after the need failover worker's start time: {}, the taskInstance is newly submit, it doesn't need to failover", taskInstance.getSubmitTime(), optional.get());
        return false;
    }

    private List<TaskInstance> getNeedFailoverTaskInstance(@NonNull String str) {
        if (str == null) {
            throw new NullPointerException("failoverWorkerHost is marked non-null but is null");
        }
        return (List) this.cacheManager.getAll().stream().flatMap(workflowExecuteRunnable -> {
            return workflowExecuteRunnable.getAllTaskInstances().stream();
        }).filter(taskInstance -> {
            return str.equals(taskInstance.getHost()) && taskInstance.getState().shouldFailover();
        }).collect(Collectors.toList());
    }

    private Optional<Date> getServerStartupTime(List<Server> list, String str) {
        if (CollectionUtils.isEmpty(list)) {
            return Optional.empty();
        }
        Date date = null;
        Iterator<Server> it = list.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Server next = it.next();
            if (str.equals(next.getHost() + ":" + next.getPort())) {
                date = next.getCreateTime();
                break;
            }
        }
        return Optional.ofNullable(date);
    }
}
