/*
 * Decompiled with CFR 0.152.
 */
package org.apache.iceberg.mr.mapreduce;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.function.Consumer;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.llap.LlapHiveUtils;
import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.iceberg.CombinedScanTask;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataTableScan;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.IncrementalAppendScan;
import org.apache.iceberg.Partitioning;
import org.apache.iceberg.Scan;
import org.apache.iceberg.ScanTaskGroup;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SnapshotRef;
import org.apache.iceberg.SystemConfigs;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableScan;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.mr.Catalogs;
import org.apache.iceberg.mr.InputFormatConfig;
import org.apache.iceberg.mr.hive.HiveIcebergStorageHandler;
import org.apache.iceberg.mr.mapreduce.IcebergMergeRecordReader;
import org.apache.iceberg.mr.mapreduce.IcebergMergeSplit;
import org.apache.iceberg.mr.mapreduce.IcebergRecordReader;
import org.apache.iceberg.mr.mapreduce.IcebergSplit;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.SerializationUtil;
import org.apache.iceberg.util.TableScanUtil;
import org.apache.iceberg.util.ThreadPools;

public class IcebergInputFormat<T>
extends InputFormat<Void, T> {
    public static InputFormatConfig.ConfigBuilder configure(Job job) {
        job.setInputFormatClass(IcebergInputFormat.class);
        return new InputFormatConfig.ConfigBuilder(job.getConfiguration());
    }

    private static TableScan createTableScan(Table table, Configuration conf) {
        long asOfTime;
        TableScan scan = table.newScan();
        long snapshotId = -1L;
        try {
            snapshotId = conf.getLong("iceberg.mr.snapshot.id", -1L);
        }
        catch (NumberFormatException e) {
            String version = conf.get("iceberg.mr.snapshot.id");
            SnapshotRef ref = table.refs().get(version);
            if (ref == null) {
                throw new RuntimeException("Cannot find matching snapshot ID or reference name for version " + version);
            }
            snapshotId = ref.snapshotId();
        }
        String refName = conf.get("iceberg.mr.output.table.snapshot.ref");
        if (StringUtils.isNotEmpty((CharSequence)refName)) {
            scan = scan.useRef(HiveUtils.getTableSnapshotRef((String)refName));
        }
        if (snapshotId != -1L) {
            scan = scan.useSnapshot(snapshotId);
        }
        if ((asOfTime = conf.getLong("iceberg.mr.as.of.time", -1L)) != -1L) {
            scan = scan.asOfTime(asOfTime);
        }
        return scan;
    }

    private static IncrementalAppendScan createIncrementalAppendScan(Table table, Configuration conf) {
        long fromSnapshot = conf.getLong("iceberg.mr.snapshot.id.interval.from", -1L);
        return (IncrementalAppendScan)table.newIncrementalAppendScan().fromSnapshotExclusive(fromSnapshot);
    }

    private static <T extends Scan<T, FileScanTask, CombinedScanTask>> Scan<T, FileScanTask, CombinedScanTask> applyConfig(Configuration conf, Scan<T, FileScanTask, CombinedScanTask> scanToConfigure) {
        Schema readSchema;
        MapWork mapWork;
        Scan scan = (Scan)scanToConfigure.caseSensitive(conf.getBoolean("iceberg.mr.case.sensitive", true));
        long splitSize = conf.getLong("iceberg.mr.split.size", 0L);
        if (splitSize > 0L) {
            scan = (Scan)scan.option("read.split.target-size", String.valueOf(splitSize));
        }
        if ((mapWork = LlapHiveUtils.findMapWork((JobConf)((JobConf)conf))) != null && mapWork.getCacheAffinity()) {
            Long openFileCost = splitSize > 0L ? splitSize : 0x8000000L;
            scan = (Scan)scan.option("read.split.open-file-cost", String.valueOf(openFileCost));
        }
        if ((readSchema = InputFormatConfig.readSchema(conf)) != null) {
            scan = (Scan)scan.project(readSchema);
        } else {
            String[] selectedColumns = InputFormatConfig.selectedColumns(conf);
            if (selectedColumns != null) {
                scan = (Scan)scan.select(selectedColumns);
            }
        }
        Expression filter = (Expression)SerializationUtil.deserializeFromBase64(conf.get("iceberg.mr.filter.expression"));
        if (filter != null) {
            scan = (Scan)((Scan)scan.filter(filter)).ignoreResiduals();
        }
        return scan;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public List<InputSplit> getSplits(JobContext context) {
        Configuration conf = context.getConfiguration();
        Table table = Optional.ofNullable(HiveIcebergStorageHandler.table(conf, conf.get("iceberg.mr.table.identifier"))).orElseGet(() -> {
            Table tbl = Catalogs.loadTable(conf);
            conf.set("iceberg.mr.table.identifier", tbl.name());
            conf.set("iceberg.mr.serialized.table." + tbl.name(), SerializationUtil.serializeToBase64(tbl));
            return tbl;
        });
        ExecutorService workerPool = ThreadPools.newFixedThreadPool("iceberg-plan-worker-pool", conf.getInt(SystemConfigs.WORKER_THREAD_POOL_SIZE.propertyKey(), ThreadPools.WORKER_THREAD_POOL_SIZE));
        try {
            List<InputSplit> list = this.planInputSplits(table, conf, workerPool);
            return list;
        }
        finally {
            workerPool.shutdown();
        }
    }

    private List<InputSplit> planInputSplits(Table table, Configuration conf, ExecutorService workerPool) {
        ArrayList<InputSplit> splits = Lists.newArrayList();
        boolean applyResidual = !conf.getBoolean("skip.residual.filtering", false);
        InputFormatConfig.InMemoryDataModel model = (InputFormatConfig.InMemoryDataModel)conf.getEnum("iceberg.mr.in.memory.data.model", (Enum)InputFormatConfig.InMemoryDataModel.GENERIC);
        long fromVersion = conf.getLong("iceberg.mr.snapshot.id.interval.from", -1L);
        Scan<Scan<IncrementalAppendScan, FileScanTask, CombinedScanTask>, FileScanTask, CombinedScanTask> scan = fromVersion != -1L ? IcebergInputFormat.applyConfig(conf, IcebergInputFormat.createIncrementalAppendScan(table, conf)) : IcebergInputFormat.applyConfig(conf, IcebergInputFormat.createTableScan(table, conf));
        scan = scan.planWith(workerPool);
        boolean allowDataFilesWithinTableLocationOnly = conf.getBoolean(HiveConf.ConfVars.HIVE_ICEBERG_ALLOW_DATAFILES_IN_TABLE_LOCATION_ONLY.varname, HiveConf.ConfVars.HIVE_ICEBERG_ALLOW_DATAFILES_IN_TABLE_LOCATION_ONLY.defaultBoolVal);
        Path tableLocation = new Path(conf.get("iceberg.mr.table.location"));
        String[] groupingPartitionColumns = conf.getStrings("iceberg.mr.grouping.partition.columns");
        IcebergInputFormat.generateInputSplits(scan, table, groupingPartitionColumns, taskGroup -> {
            if (applyResidual && model == InputFormatConfig.InMemoryDataModel.HIVE) {
                IcebergInputFormat.checkResiduals(taskGroup);
            }
            if (allowDataFilesWithinTableLocationOnly) {
                IcebergInputFormat.validateFileLocations(taskGroup, tableLocation);
            }
            splits.add(new IcebergSplit(conf, (ScanTaskGroup<FileScanTask>)taskGroup));
        });
        if (scan instanceof DataTableScan) {
            HiveIcebergStorageHandler.checkAndSkipIoConfigSerialization(conf, table);
        }
        return splits;
    }

    private static void validateFileLocations(ScanTaskGroup<FileScanTask> split, Path tableLocation) {
        for (FileScanTask fileScanTask : split.tasks()) {
            if (FileUtils.isPathWithinSubtree((Path)new Path(((DataFile)fileScanTask.file()).path().toString()), (Path)tableLocation)) continue;
            throw new AuthorizationException("The table contains paths which are outside the table location");
        }
    }

    private static void generateInputSplits(Scan<?, FileScanTask, CombinedScanTask> scan, Table table, String[] groupingPartitionColumns, Consumer<ScanTaskGroup<FileScanTask>> consumer) {
        block17: {
            if (groupingPartitionColumns == null) {
                try (CloseableIterable<CombinedScanTask> tasksIterable = scan.planTasks();){
                    tasksIterable.forEach(consumer);
                    break block17;
                }
                catch (IOException e) {
                    throw new UncheckedIOException(String.format("Failed to close table scan: %s", scan), e);
                }
            }
            Types.StructType groupingKeyType = Partitioning.groupingKeyType(table.schema().select(groupingPartitionColumns), table.specs().values());
            try (CloseableIterable<FileScanTask> taskIterable = scan.planFiles();){
                ArrayList<FileScanTask> tasks = Lists.newArrayList(taskIterable);
                List<ScanTaskGroup<FileScanTask>> partitionScanTaskGroups = TableScanUtil.planTaskGroups(tasks, scan.targetSplitSize(), scan.splitLookback(), scan.splitOpenFileCost(), groupingKeyType);
                partitionScanTaskGroups.forEach(consumer);
            }
            catch (IOException e) {
                throw new UncheckedIOException(String.format("Failed to close table scan: %s", scan), e);
            }
        }
    }

    private static void checkResiduals(ScanTaskGroup<FileScanTask> task) {
        task.tasks().forEach(fileScanTask -> {
            Expression residual = fileScanTask.residual();
            if (residual != null && !residual.equals(Expressions.alwaysTrue())) {
                throw new UnsupportedOperationException(String.format("Filter expression %s is not completely satisfied. Additional rows can be returned not satisfied by the filter expression", residual));
            }
        });
    }

    public RecordReader<Void, T> createRecordReader(InputSplit split, TaskAttemptContext context) {
        return split instanceof IcebergMergeSplit ? new IcebergMergeRecordReader() : new IcebergRecordReader();
    }
}

