/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.flink;

import com.google.common.collect.Maps;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.functions.MapPartitionFunction;
import org.apache.flink.api.common.functions.Partitioner;
import org.apache.flink.api.common.functions.RichMapPartitionFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.MapPartitionOperator;
import org.apache.flink.api.java.operators.Operator;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ArrayPrimitiveWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.BytesUtil;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.engine.flink.FlinkBatchCubingJobBuilder2;
import org.apache.kylin.engine.flink.FlinkUtil;
import org.apache.kylin.engine.flink.HadoopMultipleOutputFormat;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.SerializableConfiguration;
import org.apache.kylin.engine.mr.steps.FactDistinctColumnsBase;
import org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducerMapping;
import org.apache.kylin.engine.mr.steps.SelfDefineSortableKey;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FlinkFactDistinctColumns
extends AbstractApplication {
    protected static final Logger logger = LoggerFactory.getLogger(FlinkFactDistinctColumns.class);
    public static final Option OPTION_CUBE_NAME;
    public static final Option OPTION_SEGMENT_ID;
    public static final Option OPTION_META_URL;
    public static final Option OPTION_INPUT_TABLE;
    public static final Option OPTION_INPUT_PATH;
    public static final Option OPTION_OUTPUT_PATH;
    public static final Option OPTION_COUNTER_PATH;
    public static final Option OPTION_STATS_SAMPLING_PERCENT;
    public static final Option OPTION_ENABLE_OBJECT_REUSE;
    private Options options = new Options();

    public FlinkFactDistinctColumns() {
        this.options.addOption(OPTION_CUBE_NAME);
        this.options.addOption(OPTION_META_URL);
        this.options.addOption(OPTION_OUTPUT_PATH);
        this.options.addOption(OPTION_INPUT_TABLE);
        this.options.addOption(OPTION_INPUT_PATH);
        this.options.addOption(OPTION_SEGMENT_ID);
        this.options.addOption(OPTION_STATS_SAMPLING_PERCENT);
        this.options.addOption(OPTION_COUNTER_PATH);
        this.options.addOption(OPTION_ENABLE_OBJECT_REUSE);
    }

    @Override
    protected Options getOptions() {
        return this.options;
    }

    @Override
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
        String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
        String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
        String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE);
        String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
        String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
        String counterPath = optionsHelper.getOptionValue(OPTION_COUNTER_PATH);
        int samplingPercent = Integer.parseInt(optionsHelper.getOptionValue(OPTION_STATS_SAMPLING_PERCENT));
        String enableObjectReuseOptValue = optionsHelper.getOptionValue(OPTION_ENABLE_OBJECT_REUSE);
        Job job = Job.getInstance();
        FileSystem fs = HadoopUtil.getWorkingFileSystem(job.getConfiguration());
        HadoopUtil.deletePath(job.getConfiguration(), new Path(outputPath));
        SerializableConfiguration sConf = new SerializableConfiguration(job.getConfiguration());
        KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
        CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName);
        FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance);
        int totalReducer = reducerMapping.getTotalReducerNum();
        logger.info("getTotalReducerNum: {}", (Object)totalReducer);
        logger.info("getCuboidRowCounterReducerNum: {}", (Object)reducerMapping.getCuboidRowCounterReducerNum());
        logger.info("counter path {}", (Object)counterPath);
        boolean isSequenceFile = "SEQUENCEFILE".equalsIgnoreCase(envConfig.getFlatTableStorageFormat());
        String bytesWrittenName = "byte-writer-counter";
        String recordCounterName = "record-counter";
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        if (!StringUtil.isEmpty(enableObjectReuseOptValue) && enableObjectReuseOptValue.equalsIgnoreCase("true")) {
            env.getConfig().enableObjectReuse();
        }
        DataSet<String[]> recordDataSet = FlinkUtil.readHiveRecords(isSequenceFile, env, inputPath, hiveTable, job);
        MapPartitionOperator flatOutputDataSet = recordDataSet.mapPartition((MapPartitionFunction)new FlatOutputMapPartitionFunction(sConf, cubeName, segmentId, metaUrl, samplingPercent, "byte-writer-counter", "record-counter"));
        Operator partitionDataSet = flatOutputDataSet.partitionCustom((Partitioner)new FactDistinctColumnPartitioner(cubeName, metaUrl, sConf), 0).setParallelism(totalReducer);
        Operator outputDataSet = partitionDataSet.mapPartition((MapPartitionFunction)new MultiOutputMapPartitionFunction(sConf, cubeName, segmentId, metaUrl, samplingPercent)).setParallelism(totalReducer);
        MultipleOutputs.addNamedOutput((Job)job, (String)"column", SequenceFileOutputFormat.class, NullWritable.class, Text.class);
        MultipleOutputs.addNamedOutput((Job)job, (String)"dict", SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
        MultipleOutputs.addNamedOutput((Job)job, (String)"statistics", SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
        MultipleOutputs.addNamedOutput((Job)job, (String)"partition", TextOutputFormat.class, NullWritable.class, LongWritable.class);
        FileOutputFormat.setOutputPath((Job)job, (Path)new Path(outputPath));
        FileOutputFormat.setCompressOutput((Job)job, (boolean)false);
        LazyOutputFormat.setOutputFormatClass((Job)job, SequenceFileOutputFormat.class);
        outputDataSet.output(new HadoopMultipleOutputFormat(new LazyOutputFormat(), job));
        JobExecutionResult jobExecutionResult = env.execute("Fact distinct columns for:" + cubeName + " segment " + segmentId);
        Map accumulatorResults = jobExecutionResult.getAllAccumulatorResults();
        Long recordCount = (Long)accumulatorResults.get("record-counter");
        Long bytesWritten = (Long)accumulatorResults.get("byte-writer-counter");
        logger.info("Map input records={}", (Object)recordCount);
        logger.info("HDFS Read: {} HDFS Write", (Object)bytesWritten);
        logger.info("HDFS: Number of bytes written=" + FlinkBatchCubingJobBuilder2.getFileSize(outputPath, fs));
        HashMap<String, String> counterMap = Maps.newHashMap();
        counterMap.put("source_records_count", String.valueOf(recordCount));
        counterMap.put("source_records_size", String.valueOf(bytesWritten));
        HadoopUtil.writeToSequenceFile(job.getConfiguration(), counterPath, counterMap);
    }

    static {
        OptionBuilder.withArgName((String)"cubename");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube Name");
        OPTION_CUBE_NAME = OptionBuilder.create((String)"cubename");
        OptionBuilder.withArgName((String)"segmentId");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube Segment Id");
        OPTION_SEGMENT_ID = OptionBuilder.create((String)"segmentId");
        OptionBuilder.withArgName((String)"metaUrl");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"HDFS metadata url");
        OPTION_META_URL = OptionBuilder.create((String)"metaUrl");
        OptionBuilder.withArgName((String)"hiveTable");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Hive Intermediate Table");
        OPTION_INPUT_TABLE = OptionBuilder.create((String)"hiveTable");
        OptionBuilder.withArgName((String)"input");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Hive Intermediate Table PATH");
        OPTION_INPUT_PATH = OptionBuilder.create((String)"input");
        OptionBuilder.withArgName((String)"output");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube output path");
        OPTION_OUTPUT_PATH = OptionBuilder.create((String)"output");
        OptionBuilder.withArgName((String)"counterOutput");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Counter output path");
        OPTION_COUNTER_PATH = OptionBuilder.create((String)"counterOutput");
        OptionBuilder.withArgName((String)"statisticssamplingpercent");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Statistics sampling percent");
        OPTION_STATS_SAMPLING_PERCENT = OptionBuilder.create((String)"statisticssamplingpercent");
        OptionBuilder.withArgName((String)"enableObjectReuse");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)false);
        OptionBuilder.withDescription((String)"Enable object reuse");
        OPTION_ENABLE_OBJECT_REUSE = OptionBuilder.create((String)"enableObjectReuse");
    }

    static class MultiOutputMapPartitionFunction
    extends RichMapPartitionFunction<Tuple2<SelfDefineSortableKey, Text>, Tuple2<String, Tuple3<Writable, Writable, String>>> {
        private String cubeName;
        private String segmentId;
        private String metaUrl;
        private SerializableConfiguration conf;
        private int samplingPercentage;
        private FactDistinctColumnsBase base;

        public MultiOutputMapPartitionFunction(SerializableConfiguration conf, String cubeName, String segmentId, String metaUrl, int samplingPercentage) {
            this.cubeName = cubeName;
            this.segmentId = segmentId;
            this.metaUrl = metaUrl;
            this.conf = conf;
            this.samplingPercentage = samplingPercentage;
        }

        public void open(Configuration parameters) throws Exception {
            int taskId = this.getRuntimeContext().getIndexOfThisSubtask();
            this.base = new FactDistinctColumnsBase(this.cubeName, this.segmentId, this.metaUrl, this.conf, this.samplingPercentage);
            this.base.setupReduce(taskId);
        }

        public void mapPartition(Iterable<Tuple2<SelfDefineSortableKey, Text>> values, final Collector<Tuple2<String, Tuple3<Writable, Writable, String>>> out) throws Exception {
            FactDistinctColumnsBase.Visitor<Writable, Writable> visitor = new FactDistinctColumnsBase.Visitor<Writable, Writable>(){

                @Override
                public void collect(String namedOutput, Writable key, Writable value, String outputPath) {
                    out.collect((Object)new Tuple2((Object)namedOutput, (Object)new Tuple3((Object)key, (Object)value, (Object)outputPath)));
                }
            };
            for (Tuple2<SelfDefineSortableKey, Text> value : values) {
                this.base.reduce(new Pair<Object, Object>(value.f0, value.f1), visitor);
            }
            this.base.postReduce(visitor);
        }
    }

    static class FactDistinctColumnPartitioner
    implements Partitioner<SelfDefineSortableKey> {
        private volatile transient boolean initialized = false;
        private String cubeName;
        private String metaUrl;
        private SerializableConfiguration conf;
        private transient FactDistinctColumnsReducerMapping reducerMapping;

        public FactDistinctColumnPartitioner(String cubeName, String metaUrl, SerializableConfiguration conf) {
            this.cubeName = cubeName;
            this.metaUrl = metaUrl;
            this.conf = conf;
        }

        private void init() {
            KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(this.conf, this.metaUrl);
            try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(kConfig);){
                CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(this.cubeName);
                this.reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance);
                this.initialized = true;
            }
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         * Enabled aggressive block sorting
         * Enabled unnecessary exception pruning
         * Enabled aggressive exception aggregation
         * Converted monitor instructions to comments
         * Lifted jumps to return sites
         */
        public int partition(SelfDefineSortableKey key, int numPartitions) {
            Text keyText;
            if (!this.initialized) {
                Class<FlinkFactDistinctColumns> clazz = FlinkFactDistinctColumns.class;
                // MONITORENTER : org.apache.kylin.engine.flink.FlinkFactDistinctColumns.class
                if (!this.initialized) {
                    this.init();
                }
                // MONITOREXIT : clazz
            }
            if ((keyText = key.getText()).getBytes()[0] != -1) return BytesUtil.readUnsigned(keyText.getBytes(), 0, 1);
            Long cuboidId = Bytes.toLong(keyText.getBytes(), 1, 8);
            return this.reducerMapping.getReducerIdForCuboidRowCount(cuboidId);
        }
    }

    static class FlatOutputMapPartitionFunction
    extends RichMapPartitionFunction<String[], Tuple2<SelfDefineSortableKey, Text>> {
        private String cubeName;
        private String segmentId;
        private String metaUrl;
        private SerializableConfiguration conf;
        private int samplingPercentage;
        private String bytesWrittenName;
        private String recordCounterName;
        private LongCounter bytesWrittenCounter;
        private LongCounter recordCounter;
        private FactDistinctColumnsBase base;

        public FlatOutputMapPartitionFunction(SerializableConfiguration conf, String cubeName, String segmentId, String metaUrl, int samplingPercentage, String bytesWrittenName, String recordCounterName) {
            this.cubeName = cubeName;
            this.segmentId = segmentId;
            this.metaUrl = metaUrl;
            this.conf = conf;
            this.samplingPercentage = samplingPercentage;
            this.bytesWrittenName = bytesWrittenName;
            this.recordCounterName = recordCounterName;
            this.bytesWrittenCounter = new LongCounter();
            this.recordCounter = new LongCounter();
        }

        public void open(Configuration parameters) throws Exception {
            this.getRuntimeContext().addAccumulator(this.bytesWrittenName, (Accumulator)this.bytesWrittenCounter);
            this.getRuntimeContext().addAccumulator(this.recordCounterName, (Accumulator)this.recordCounter);
            this.base = new FactDistinctColumnsBase(this.cubeName, this.segmentId, this.metaUrl, this.conf, this.samplingPercentage);
            this.base.setupMap();
        }

        public void mapPartition(Iterable<String[]> values, final Collector<Tuple2<SelfDefineSortableKey, Text>> out) throws Exception {
            FactDistinctColumnsBase.Visitor<SelfDefineSortableKey, Text> visitor = new FactDistinctColumnsBase.Visitor<SelfDefineSortableKey, Text>(){

                @Override
                public void collect(String namedOutput, SelfDefineSortableKey key, Text value, String outputPath) {
                    out.collect((Object)new Tuple2((Object)key, (Object)value));
                }
            };
            for (String[] row : values) {
                this.bytesWrittenCounter.add(this.base.countSizeInBytes(row));
                this.recordCounter.add(1L);
                this.base.map(row, visitor);
            }
            this.base.postMap(visitor);
        }
    }
}

