package org.apache.kylin.rest.job;

import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.CliCommandExecutor;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.HiveCmdBuilder;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.engine.mr.JobBuilderSupport;
import org.apache.kylin.job.engine.JobEngineConfig;
import org.apache.kylin.job.execution.ExecutableManager;
import org.apache.kylin.job.execution.ExecutableState;
import org.apache.kylin.source.SourceManager;
import org.apache.kylin.storage.hbase.HBaseConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/kylin/rest/job/StorageCleanupJob.class */
public class StorageCleanupJob extends AbstractApplication {
    protected static final Option OPTION_DELETE;
    protected static final Option OPTION_FORCE;
    protected static final Logger logger;
    protected final KylinConfig config;
    protected final FileSystem hbaseFs;
    protected final FileSystem defaultFs;
    protected final ExecutableManager executableManager;
    protected boolean delete;
    protected boolean force;
    private List<String> hiveGarbageTables;
    private List<String> hbaseGarbageTables;
    private List<String> hdfsGarbageFiles;
    private long hdfsGarbageFileBytes;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/apache/kylin/rest/job/StorageCleanupJob$UnusedHdfsFileCollector.class */
    public class UnusedHdfsFileCollector {
        LinkedHashSet<Pair<FileSystem, String>> list = new LinkedHashSet<>();

        protected UnusedHdfsFileCollector() {
        }

        public void add(FileSystem fileSystem, String str) {
            this.list.add(Pair.newPair(fileSystem, str));
        }
    }

    public StorageCleanupJob() throws IOException {
        this(KylinConfig.getInstanceFromEnv(), HadoopUtil.getWorkingFileSystem(), HBaseConnection.getFileSystemInHBaseCluster(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()));
    }

    protected StorageCleanupJob(KylinConfig kylinConfig, FileSystem fileSystem, FileSystem fileSystem2) {
        this.delete = false;
        this.force = false;
        this.hiveGarbageTables = Collections.emptyList();
        this.hbaseGarbageTables = Collections.emptyList();
        this.hdfsGarbageFiles = Collections.emptyList();
        this.hdfsGarbageFileBytes = 0L;
        this.config = kylinConfig;
        this.defaultFs = fileSystem;
        this.hbaseFs = fileSystem2;
        this.executableManager = ExecutableManager.getInstance(kylinConfig);
    }

    public void setDelete(boolean z) {
        this.delete = z;
    }

    public void setForce(boolean z) {
        this.force = z;
    }

    public List<String> getHiveGarbageTables() {
        return this.hiveGarbageTables;
    }

    public List<String> getHbaseGarbageTables() {
        return this.hbaseGarbageTables;
    }

    public List<String> getHdfsGarbageFiles() {
        return this.hdfsGarbageFiles;
    }

    public long getHdfsFileGarbageBytes() {
        return this.hdfsGarbageFileBytes;
    }

    protected Options getOptions() {
        Options options = new Options();
        options.addOption(OPTION_DELETE);
        options.addOption(OPTION_FORCE);
        return options;
    }

    protected void execute(OptionsHelper optionsHelper) throws Exception {
        logger.info("options: '" + optionsHelper.getOptionsAsString() + "'");
        logger.info("delete option value: '" + optionsHelper.getOptionValue(OPTION_DELETE) + "'");
        logger.info("force option value: '" + optionsHelper.getOptionValue(OPTION_FORCE) + "'");
        this.delete = Boolean.parseBoolean(optionsHelper.getOptionValue(OPTION_DELETE));
        this.force = Boolean.parseBoolean(optionsHelper.getOptionValue(OPTION_FORCE));
        cleanup();
    }

    public void cleanup() throws Exception {
        cleanUnusedIntermediateHiveTable();
        cleanUnusedHBaseTables();
        cleanUnusedHdfsFiles();
    }

    protected void cleanUnusedHBaseTables() throws IOException {
        if (!"hbase".equals(this.config.getStorageUrl().getScheme()) || "".equals(this.config.getMetadataUrl().getScheme())) {
            return;
        }
        try {
            Class<?> cls = Class.forName("org.apache.kylin.rest.job.StorageCleanJobHbaseUtil");
            this.hbaseGarbageTables = (List) cls.getDeclaredMethod("cleanUnusedHBaseTables", Boolean.TYPE, Integer.TYPE).invoke(cls, Boolean.valueOf(this.delete), 10);
        } catch (Throwable th) {
            logger.error("Error during HBase clean up", th);
        }
    }

    private void cleanUnusedHdfsFiles() throws IOException {
        UnusedHdfsFileCollector unusedHdfsFileCollector = new UnusedHdfsFileCollector();
        collectUnusedHdfsFiles(unusedHdfsFileCollector);
        if (unusedHdfsFileCollector.list.isEmpty()) {
            logger.info("No HDFS files to clean up");
            return;
        }
        long j = 0;
        ArrayList arrayList = new ArrayList();
        Iterator<Pair<FileSystem, String>> it = unusedHdfsFileCollector.list.iterator();
        while (it.hasNext()) {
            Pair<FileSystem, String> next = it.next();
            FileSystem fileSystem = (FileSystem) next.getKey();
            String str = (String) next.getValue();
            try {
                arrayList.add(str);
                ContentSummary contentSummary = fileSystem.getContentSummary(new Path(str));
                if (contentSummary != null) {
                    j += contentSummary.getLength();
                }
                if (this.delete) {
                    logger.info("Deleting HDFS path " + str);
                    fileSystem.delete(new Path(str), true);
                } else {
                    logger.info("Dry run, pending delete HDFS path " + str);
                }
            } catch (IOException e) {
                logger.error("Error dealing unused HDFS path " + str, e);
            }
        }
        this.hdfsGarbageFileBytes = j;
        this.hdfsGarbageFiles = arrayList;
    }

    protected void collectUnusedHdfsFiles(UnusedHdfsFileCollector unusedHdfsFileCollector) throws IOException {
        if (StringUtils.isNotEmpty(this.config.getHBaseClusterFs())) {
            cleanUnusedHdfsFiles(this.hbaseFs, unusedHdfsFileCollector, true);
        }
        cleanUnusedHdfsFiles(this.defaultFs, unusedHdfsFileCollector, false);
    }

    private void cleanUnusedHdfsFiles(FileSystem fileSystem, UnusedHdfsFileCollector unusedHdfsFileCollector, boolean z) throws IOException {
        JobEngineConfig jobEngineConfig = new JobEngineConfig(this.config);
        CubeManager cubeManager = CubeManager.getInstance(this.config);
        ArrayList arrayList = new ArrayList();
        try {
            FileStatus[] listStatus = fileSystem.listStatus(Path.getPathWithoutSchemeAndAuthority(new Path(this.config.getHdfsWorkingDirectory())));
            if (listStatus != null) {
                for (FileStatus fileStatus : listStatus) {
                    if (fileStatus.getPath().getName().startsWith("kylin-")) {
                        arrayList.add(fileStatus.getPath().toString());
                    }
                }
            }
        } catch (FileNotFoundException e) {
            logger.error("Working Directory does not exist on HDFS.", e);
        }
        for (String str : this.executableManager.getAllJobIds()) {
            ExecutableState state = this.executableManager.getOutput(str).getState();
            if (!state.isFinalState()) {
                String jobWorkingDir = JobBuilderSupport.getJobWorkingDir(jobEngineConfig.getHdfsWorkingDirectory(), str);
                String makeQualifiedPathInHBaseCluster = z ? HBaseConnection.makeQualifiedPathInHBaseCluster(jobWorkingDir) : HadoopUtil.getFileSystem(jobWorkingDir).makeQualified(Path.getPathWithoutSchemeAndAuthority(new Path(jobWorkingDir))).toString();
                arrayList.remove(makeQualifiedPathInHBaseCluster);
                logger.info("Skip " + makeQualifiedPathInHBaseCluster + " from deletion list, as the path belongs to job " + str + " with status " + state);
            }
        }
        for (CubeInstance cubeInstance : cubeManager.reloadAndListAllCubes()) {
            Iterator it = cubeInstance.getSegments().iterator();
            while (it.hasNext()) {
                CubeSegment cubeSegment = (CubeSegment) it.next();
                String lastBuildJobID = cubeSegment.getLastBuildJobID();
                if (lastBuildJobID != null && !lastBuildJobID.equals("")) {
                    String jobWorkingDir2 = JobBuilderSupport.getJobWorkingDir(jobEngineConfig.getHdfsWorkingDirectory(), lastBuildJobID);
                    String makeQualifiedPathInHBaseCluster2 = z ? HBaseConnection.makeQualifiedPathInHBaseCluster(jobWorkingDir2) : HadoopUtil.getFileSystem(jobWorkingDir2).makeQualified(Path.getPathWithoutSchemeAndAuthority(new Path(jobWorkingDir2))).toString();
                    arrayList.remove(makeQualifiedPathInHBaseCluster2);
                    logger.info("Skip " + makeQualifiedPathInHBaseCluster2 + " from deletion list, as the path belongs to segment " + cubeSegment + " of cube " + cubeInstance.getName());
                }
            }
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            unusedHdfsFileCollector.add(fileSystem, (String) it2.next());
        }
    }

    private void cleanUnusedIntermediateHiveTable() throws Exception {
        try {
            cleanUnusedIntermediateHiveTableInternal();
        } catch (NoClassDefFoundError e) {
            if (!e.getMessage().contains("HiveConf")) {
                throw e;
            }
            logger.info("Skip cleanup of tntermediate Hive table, seems no Hive on classpath");
        }
    }

    private void cleanUnusedIntermediateHiveTableInternal() throws Exception {
        Iterable<String> filter = Iterables.filter(getHiveTables(), new Predicate<String>() { // from class: org.apache.kylin.rest.job.StorageCleanupJob.1
            public boolean apply(@Nullable String str) {
                return str != null && str.startsWith("kylin_intermediate_");
            }
        });
        List<String> allJobIds = this.executableManager.getAllJobIds();
        ArrayList arrayList = new ArrayList();
        HashMap newHashMap = Maps.newHashMap();
        for (String str : allJobIds) {
            if (!this.executableManager.getOutput(str).getState().isFinalState()) {
                arrayList.add(str);
            }
            try {
                String segmentIdFromJobId = getSegmentIdFromJobId(str);
                if (segmentIdFromJobId != null) {
                    newHashMap.put(segmentIdFromJobId, str);
                }
            } catch (Exception e) {
                logger.warn("Failed to find segment ID from job ID " + str + ", ignore it");
            }
        }
        logger.debug("Working jobIDs: " + arrayList);
        ArrayList arrayList2 = new ArrayList();
        for (String str2 : filter) {
            logger.debug("Checking if table is garbage -- " + str2);
            if (str2.startsWith("kylin_intermediate_")) {
                if (this.force) {
                    logger.debug("Force include table " + str2);
                    arrayList2.add(str2);
                } else {
                    boolean z = true;
                    if (str2.length() < "kylin_intermediate_".length() + 36) {
                        logger.debug("Skip table because length is not qualified, " + str2);
                    } else {
                        String replace = str2.substring(str2.length() - 36, str2.length()).replace("_", "-");
                        if (Pattern.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").matcher(replace).matches()) {
                            if (allJobIds.contains(replace)) {
                                z = !arrayList.contains(replace);
                            } else if (isTableInUse(replace, arrayList)) {
                                logger.debug("Skip table because the table is in use, " + str2);
                                z = false;
                            }
                            if (z) {
                                arrayList2.add(str2);
                            }
                        } else {
                            logger.debug("Skip table because pattern doesn't match, " + str2);
                        }
                    }
                }
            }
        }
        this.hiveGarbageTables = arrayList2;
        if (arrayList2.isEmpty()) {
            logger.info("No Hive tables to clean up");
            return;
        }
        if (!this.delete) {
            Iterator<String> it = arrayList2.iterator();
            while (it.hasNext()) {
                logger.info("Dry run, pending delete Hive table " + it.next());
            }
        } else {
            try {
                deleteHiveTables(arrayList2, newHashMap);
            } catch (IOException e2) {
                logger.error("Error during deleting Hive tables", e2);
            }
        }
    }

    protected List<String> getHiveTables() throws Exception {
        return SourceManager.getDefaultSource().getSourceMetadataExplorer().listTables(this.config.getHiveDatabaseForIntermediateTable());
    }

    protected CliCommandExecutor getCliCommandExecutor() throws IOException {
        return this.config.getCliCommandExecutor();
    }

    private void deleteHiveTables(List<String> list, Map<String, String> map) throws IOException {
        JobEngineConfig jobEngineConfig = new JobEngineConfig(this.config);
        String str = "USE " + this.config.getHiveDatabaseForIntermediateTable() + ";";
        HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
        hiveCmdBuilder.addStatement(str);
        for (String str2 : list) {
            hiveCmdBuilder.addStatement("drop table if exists " + str2 + "; ");
            logger.info("Deleting Hive table " + str2);
        }
        getCliCommandExecutor().execute(hiveCmdBuilder.build());
        for (String str3 : list) {
            String replace = str3.substring(str3.length() - 36, str3.length()).replace("_", "-");
            if (map.containsKey(replace)) {
                String str4 = JobBuilderSupport.getJobWorkingDir(jobEngineConfig.getHdfsWorkingDirectory(), map.get(replace)) + "/" + str3;
                Path path = new Path(str4);
                if (this.defaultFs.exists(path)) {
                    this.defaultFs.delete(path, true);
                    logger.info("Hive table {}'s external path {} deleted", str3, str4);
                } else {
                    logger.info("Hive table {}'s external path {} not exist. It's normal if kylin.source.hive.keep-flat-table set false (By default)", str3, str4);
                }
            } else {
                logger.warn("Hive table {}'s job ID not found, segmentId2JobId: {}", str3, map.toString());
            }
        }
    }

    private String getSegmentIdFromJobId(String str) {
        return this.executableManager.getJob(str).getParam("segmentId");
    }

    private boolean isTableInUse(String str, List<String> list) {
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            if (str.equals(getSegmentIdFromJobId(it.next()))) {
                return true;
            }
        }
        return false;
    }

    static {
        OptionBuilder.withArgName("delete");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withDescription("Delete the unused storage");
        OPTION_DELETE = OptionBuilder.create("delete");
        OptionBuilder.withArgName("force");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withDescription("Warning: will delete all kylin intermediate hive tables");
        OPTION_FORCE = OptionBuilder.create("force");
        logger = LoggerFactory.getLogger(StorageCleanupJob.class);
    }
}
