/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark;

import java.util.HashMap;
import java.util.Objects;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.StorageURL;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.engine.EngineFactory;
import org.apache.kylin.engine.mr.CubingJob;
import org.apache.kylin.engine.mr.JobBuilderSupport;
import org.apache.kylin.engine.mr.LookupMaterializeContext;
import org.apache.kylin.engine.spark.ISparkInput;
import org.apache.kylin.engine.spark.ISparkOutput;
import org.apache.kylin.engine.spark.SparkBuildDictionary;
import org.apache.kylin.engine.spark.SparkCubingByLayer;
import org.apache.kylin.engine.spark.SparkExecutable;
import org.apache.kylin.engine.spark.SparkExecutableFactory;
import org.apache.kylin.engine.spark.SparkFactDistinct;
import org.apache.kylin.engine.spark.SparkUHCDictionary;
import org.apache.kylin.engine.spark.SparkUtil;
import org.apache.kylin.job.JoinedFlatTable;
import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SparkBatchCubingJobBuilder2
extends JobBuilderSupport {
    private static final Logger logger = LoggerFactory.getLogger(SparkBatchCubingJobBuilder2.class);
    private final ISparkInput.ISparkBatchCubingInputSide inputSide;
    private final ISparkOutput.ISparkBatchCubingOutputSide outputSide;

    public SparkBatchCubingJobBuilder2(CubeSegment newSegment, String submitter) {
        this(newSegment, submitter, 0);
    }

    public SparkBatchCubingJobBuilder2(CubeSegment newSegment, String submitter, Integer priorityOffset) {
        super(newSegment, submitter, priorityOffset);
        this.inputSide = SparkUtil.getBatchCubingInputSide(this.seg);
        this.outputSide = SparkUtil.getBatchCubingOutputSide(this.seg);
    }

    public CubingJob build() {
        logger.info("Spark new job to BUILD segment " + this.seg);
        CubingJob result = CubingJob.createBuildJob(this.seg, this.submitter, this.config);
        String jobId = result.getId();
        String cuboidRootPath = this.getCuboidRootPath(jobId);
        this.inputSide.addStepPhase1_CreateFlatTable(result);
        this.buildHiveGlobalDictionaryByMR(result, jobId);
        if (this.seg.getConfig().isSparkFactDistinctEnable()) {
            result.addTask(this.createFactDistinctColumnsSparkStep(jobId));
        } else {
            result.addTask(this.createFactDistinctColumnsStep(jobId));
        }
        if (this.isEnableUHCDictStep()) {
            result.addTask(this.createBuildUHCDictStep(jobId));
        } else if (this.isEnableUHCDictSparkStep()) {
            result.addTask(this.createBuildUHCDictSparkStep(jobId));
        }
        if (this.isEnabledSparkDimensionDictionary()) {
            result.addTask(this.createBuildDictionarySparkStep(jobId));
        } else {
            result.addTask(this.createBuildDictionaryStep(jobId));
        }
        result.addTask(this.createSaveStatisticsStep(jobId));
        LookupMaterializeContext lookupMaterializeContext = this.addMaterializeLookupTableSteps(result);
        this.outputSide.addStepPhase2_BuildDictionary(result);
        this.addLayerCubingSteps(result, jobId, cuboidRootPath);
        this.outputSide.addStepPhase3_BuildCube(result);
        result.addTask(this.createUpdateCubeInfoAfterBuildStep(jobId, lookupMaterializeContext));
        this.inputSide.addStepPhase4_Cleanup(result);
        this.outputSide.addStepPhase4_Cleanup(result);
        result.setPriorityBasedOnPriorityOffset(this.priorityOffset);
        result.getTasks().forEach(task -> task.setPriorityBasedOnPriorityOffset(this.priorityOffset));
        return result;
    }

    public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
        SparkExecutable sparkExecutable = SparkExecutableFactory.instance(this.seg.getConfig());
        IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(this.seg);
        String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, this.getJobWorkingDir(jobId));
        sparkExecutable.setClassName(SparkFactDistinct.class.getName());
        sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), this.seg.getRealization().getName());
        sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), this.getSegmentMetadataUrl(this.seg.getConfig(), jobId));
        sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), this.seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
        sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
        sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), this.getFactDistinctColumnsPath(jobId));
        sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), this.seg.getUuid());
        sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(this.config.getConfig().getCubingInMemSamplingPercent()));
        sparkExecutable.setJobId(jobId);
        sparkExecutable.setName("Extract Fact Table Distinct Columns:" + this.seg.toString());
        sparkExecutable.setCounterSaveAs("sourceRecordCount,sourceSizeBytes", this.getCounterOutputPath(jobId));
        StringBuilder jars = new StringBuilder();
        StringUtil.appendWithSeparator(jars, this.seg.getConfig().getSparkAdditionalJars());
        sparkExecutable.setJars(jars.toString());
        return sparkExecutable;
    }

    public SparkExecutable createBuildUHCDictSparkStep(String jobId) {
        SparkExecutable sparkExecutable = SparkExecutableFactory.instance(this.seg.getConfig());
        sparkExecutable.setClassName(SparkUHCDictionary.class.getName());
        sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), this.seg.getRealization().getName());
        sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), this.getSegmentMetadataUrl(this.seg.getConfig(), jobId));
        sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), this.getFactDistinctColumnsPath(jobId));
        sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), this.getDictRootPath(jobId));
        sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
        sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), this.seg.getUuid());
        sparkExecutable.setJobId(jobId);
        sparkExecutable.setName("Build UHC Dictionary with spark");
        sparkExecutable.setCounterSaveAs("sourceRecordCount,sourceSizeBytes", this.getCounterOutputPath(jobId));
        StringBuilder jars = new StringBuilder();
        StringUtil.appendWithSeparator(jars, this.seg.getConfig().getSparkAdditionalJars());
        sparkExecutable.setJars(jars.toString());
        return sparkExecutable;
    }

    public SparkExecutable createBuildDictionarySparkStep(String jobId) {
        SparkExecutable sparkExecutable = SparkExecutableFactory.instance(this.seg.getConfig());
        sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
        sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), this.getSegmentMetadataUrl(this.seg.getConfig(), jobId));
        sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), this.seg.getRealization().getName());
        sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), this.getFactDistinctColumnsPath(jobId));
        sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), this.getDictRootPath(jobId));
        sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), this.seg.getUuid());
        sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
        sparkExecutable.setJobId(jobId);
        sparkExecutable.setName("Build Dimension Dictionary with Spark");
        sparkExecutable.setCounterSaveAs("sourceSizeBytes", this.getCounterOutputPath(jobId));
        StringBuilder jars = new StringBuilder();
        StringUtil.appendWithSeparator(jars, this.seg.getConfig().getSparkAdditionalJars());
        sparkExecutable.setJars(jars.toString());
        return sparkExecutable;
    }

    protected void addLayerCubingSteps(CubingJob result, String jobId, String cuboidRootPath) {
        SparkExecutable sparkExecutable = SparkExecutableFactory.instance(this.seg.getConfig());
        sparkExecutable.setClassName(SparkCubingByLayer.class.getName());
        this.configureSparkJob(this.seg, sparkExecutable, jobId, cuboidRootPath);
        result.addTask(sparkExecutable);
    }

    public void configureSparkJob(CubeSegment seg, SparkExecutable sparkExecutable, String jobId, String cuboidRootPath) {
        IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
        String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, this.getJobWorkingDir(jobId));
        sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
        sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
        sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
        sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath);
        sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(), this.getSegmentMetadataUrl(seg.getConfig(), jobId));
        sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
        if (seg.getCubeDesc().isShrunkenDictFromGlobalEnabled()) {
            sparkExecutable.setParam(SparkCubingByLayer.OPTION_SHRUNK_INPUT_PATH.getOpt(), this.getShrunkenDictionaryPath(jobId));
        }
        sparkExecutable.setJobId(jobId);
        StringBuilder jars = new StringBuilder();
        StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
        sparkExecutable.setJars(jars.toString());
        sparkExecutable.setName("Build Cube with Spark:" + seg.toString());
    }

    @Override
    public String getSegmentMetadataUrl(KylinConfig kylinConfig, String jobId) {
        HashMap<String, String> param = new HashMap<String, String>();
        param.put("path", this.getDumpMetadataPath(jobId));
        return new StorageURL(kylinConfig.getMetadataUrl().getIdentifier(), "hdfs", param).toString();
    }

    protected void buildHiveGlobalDictionaryByMR(CubingJob result, String jobId) {
        KylinConfig dictConfig = this.seg.getConfig();
        String[] mrHiveDictColumnExcludeRef = dictConfig.getMrHiveDictColumnsExcludeRefColumns();
        String[] mrHiveDictColumns = dictConfig.getMrHiveDictColumns();
        if (Objects.nonNull(mrHiveDictColumnExcludeRef) && mrHiveDictColumnExcludeRef.length > 0 && !"".equals(mrHiveDictColumnExcludeRef[0])) {
            result.addTask(this.createBuildGlobalHiveDictPartBuildJob(jobId));
            result.addTask(this.createBuildGlobalHiveDictTotalBuildJob(jobId));
        }
        if (Objects.nonNull(mrHiveDictColumns) && mrHiveDictColumns.length > 0 && !"".equals(mrHiveDictColumns[0])) {
            this.inputSide.addStepPhase_ReplaceFlatTableGlobalColumnValue(result);
        }
    }
}

