/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.operator.preprocessing.discretization;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessSetupError;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.annotation.ResourceConsumptionEstimator;
import com.rapidminer.operator.ports.Port;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.SimpleMetaDataError;
import com.rapidminer.operator.preprocessing.PreprocessingModel;
import com.rapidminer.operator.preprocessing.discretization.AbstractDiscretizationOperator;
import com.rapidminer.operator.preprocessing.discretization.DiscretizationModel;
import com.rapidminer.operator.preprocessing.discretization.FrequencyDiscretization;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeSingle;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.OperatorResourceConsumptionHandler;
import com.rapidminer.tools.math.MathFunctions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

public class MinimalEntropyDiscretization
extends AbstractDiscretizationOperator {
    public static final String PARAMETER_USE_LONG_RANGE_NAMES = "use_long_range_names";
    public static final String PARAMETER_REMOVE_USELESS = "remove_useless";

    public MinimalEntropyDiscretization(OperatorDescription description) {
        super(description);
    }

    @Override
    protected void checkSelectedSubsetMetaData(ExampleSetMetaData subsetMetaData) {
        switch (subsetMetaData.containsSpecialAttribute("label")) {
            case YES: {
                AttributeMetaData labelMD = subsetMetaData.getAttributeByRole("label");
                if (labelMD.isNominal()) break;
                this.getExampleSetInputPort().addError(new SimpleMetaDataError(ProcessSetupError.Severity.ERROR, (Port)this.getExampleSetInputPort(), "attribute_has_wrong_type", labelMD.getName(), Ontology.VALUE_TYPE_NAMES[1]));
                break;
            }
            case NO: {
                this.getExampleSetInputPort().addError(new SimpleMetaDataError(ProcessSetupError.Severity.ERROR, (Port)this.getExampleSetInputPort(), "special_missing", "label"));
                break;
            }
            default: {
                this.getExampleSetInputPort().addError(new SimpleMetaDataError(ProcessSetupError.Severity.WARNING, (Port)this.getExampleSetInputPort(), "special_unknown", "label"));
            }
        }
    }

    @Override
    public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException {
        HashMap<Attribute, double[]> rangesMap = new HashMap<Attribute, double[]>();
        double[][] ranges = this.getRanges(exampleSet);
        int attributeIndex = 0;
        for (Attribute attribute : exampleSet.getAttributes()) {
            if (!attribute.isNumerical()) continue;
            ranges[attributeIndex][ranges[attributeIndex].length - 1] = Double.POSITIVE_INFINITY;
            rangesMap.put(attribute, ranges[attributeIndex]);
            ++attributeIndex;
        }
        DiscretizationModel model = new DiscretizationModel(exampleSet, this.getParameterAsBoolean(PARAMETER_REMOVE_USELESS));
        int numberOfDigits = -1;
        if (!this.getParameterAsBoolean("automatic_number_of_digits")) {
            numberOfDigits = this.getParameterAsInt("number_of_digits");
        }
        model.setRanges(rangesMap, "range", this.getParameterAsInt("range_name_type"), numberOfDigits);
        return model;
    }

    private Double getMinEntropySplitpoint(LinkedList<double[]> truncatedExamples, Attribute label) {
        HashSet<Double> candidateSplitpoints = new HashSet<Double>();
        Iterator it = truncatedExamples.iterator();
        int[] totalLabelDistribution = new int[label.getMapping().size()];
        while (it.hasNext()) {
            int labelIndex;
            double[] attributeLabelPair = (double[])it.next();
            candidateSplitpoints.add(attributeLabelPair[0]);
            int n = labelIndex = (int)attributeLabelPair[1];
            totalLabelDistribution[n] = totalLabelDistribution[n] + 1;
        }
        double[] totalFrequencies = new double[label.getMapping().size()];
        for (int i = 0; i < label.getMapping().size(); ++i) {
            totalFrequencies[i] = (double)totalLabelDistribution[i] / (double)truncatedExamples.size();
        }
        double totalEntropy = 0.0;
        for (int i = 0; i < label.getMapping().size(); ++i) {
            totalEntropy -= totalFrequencies[i] * MathFunctions.ld(totalFrequencies[i]);
        }
        double minClassInformationEntropy = totalEntropy;
        double bestSplitpoint = Double.NaN;
        double bestSplitpointEntropy1 = Double.POSITIVE_INFINITY;
        double bestSplitpointEntropy2 = Double.POSITIVE_INFINITY;
        int k1 = 0;
        int k2 = 0;
        Iterator it1 = candidateSplitpoints.iterator();
        while (it1.hasNext()) {
            double currentSplitpoint = (Double)it1.next();
            int s1 = 0;
            int s2 = 0;
            k1 = 0;
            k2 = 0;
            int[] labelDistribution1 = new int[label.getMapping().size()];
            int[] labelDistribution2 = new int[label.getMapping().size()];
            for (double[] attributeLabelPair : truncatedExamples) {
                double valueToCompare = attributeLabelPair[0];
                int labelIndex = (int)attributeLabelPair[1];
                if (valueToCompare <= currentSplitpoint) {
                    ++s1;
                    int n = labelIndex;
                    labelDistribution1[n] = labelDistribution1[n] + 1;
                    continue;
                }
                ++s2;
                int n = labelIndex;
                labelDistribution2[n] = labelDistribution2[n] + 1;
            }
            double[] frequencies1 = new double[label.getMapping().size()];
            double[] frequencies2 = new double[label.getMapping().size()];
            for (int i = 0; i < label.getMapping().size(); ++i) {
                frequencies1[i] = (double)labelDistribution1[i] / (double)s1;
                frequencies2[i] = (double)labelDistribution2[i] / (double)s2;
                if (labelDistribution1[i] > 0) {
                    ++k1;
                }
                if (labelDistribution2[i] <= 0) continue;
                ++k2;
            }
            double entropy1 = 0.0;
            for (int i = 0; i < label.getMapping().size(); ++i) {
                entropy1 -= frequencies1[i] * MathFunctions.ld(frequencies1[i]);
            }
            double entropy2 = 0.0;
            for (int i = 0; i < label.getMapping().size(); ++i) {
                entropy2 -= frequencies2[i] * MathFunctions.ld(frequencies2[i]);
            }
            double classInformationEntropy = (double)s1 / (double)truncatedExamples.size() * entropy1 + (double)s2 / (double)truncatedExamples.size() * entropy2;
            if (!(classInformationEntropy < minClassInformationEntropy)) continue;
            minClassInformationEntropy = classInformationEntropy;
            bestSplitpoint = currentSplitpoint;
            bestSplitpointEntropy1 = entropy1;
            bestSplitpointEntropy2 = entropy2;
        }
        double gain = totalEntropy - minClassInformationEntropy;
        double delta = MathFunctions.ld(Math.pow(3.0, label.getMapping().size()) - 2.0) - ((double)label.getMapping().size() * totalEntropy - (double)k1 * bestSplitpointEntropy1 - (double)k2 * bestSplitpointEntropy2);
        if (gain >= MathFunctions.ld(truncatedExamples.size() - 1) / (double)truncatedExamples.size() + delta / (double)truncatedExamples.size()) {
            return bestSplitpoint;
        }
        return null;
    }

    private ArrayList getSplitpoints(LinkedList<double[]> startPartition, Attribute label) {
        LinkedList border = new LinkedList();
        ArrayList<Double> result = new ArrayList<Double>();
        border.addLast(startPartition);
        while (!border.isEmpty()) {
            LinkedList currentPartition = (LinkedList)border.removeFirst();
            Double splitpoint = this.getMinEntropySplitpoint(currentPartition, label);
            if (splitpoint == null) continue;
            result.add(splitpoint);
            double splitValue = splitpoint;
            LinkedList<double[]> newPartition1 = new LinkedList<double[]>();
            LinkedList<double[]> newPartition2 = new LinkedList<double[]>();
            for (double[] attributeLabelPair : currentPartition) {
                if (attributeLabelPair[0] <= splitValue) {
                    newPartition1.addLast(attributeLabelPair);
                    continue;
                }
                newPartition2.addLast(attributeLabelPair);
            }
            border.addLast(newPartition1);
            border.addLast(newPartition2);
        }
        return result;
    }

    private double[][] getRanges(ExampleSet exampleSet) throws UserError {
        double[][] ranges = new double[exampleSet.getAttributes().size()][];
        Attribute label = exampleSet.getAttributes().getLabel();
        if (label == null) {
            throw new UserError((Operator)this, 105);
        }
        int a = 0;
        for (Attribute attribute : exampleSet.getAttributes()) {
            if (attribute.isNumerical()) {
                Iterator reader = exampleSet.iterator();
                LinkedList<double[]> startPartition = new LinkedList<double[]>();
                while (reader.hasNext()) {
                    Example example = (Example)reader.next();
                    double[] attributeLabelPair = new double[]{example.getValue(attribute), example.getValue(label)};
                    startPartition.addLast(attributeLabelPair);
                }
                ArrayList splitpointsOfAttribute = this.getSplitpoints(startPartition, label);
                Iterator it = splitpointsOfAttribute.iterator();
                ranges[a] = new double[splitpointsOfAttribute.size() + 1];
                int i = 0;
                while (it.hasNext()) {
                    ranges[a][i] = (Double)it.next();
                    ++i;
                }
                ranges[a][ranges[a].length - 1] = exampleSet.getStatistics(attribute, "maximum");
                Arrays.sort(ranges[a]);
            }
            ++a;
        }
        return ranges;
    }

    @Override
    public boolean isSupportingAttributeRoles() {
        return true;
    }

    @Override
    public Class<? extends PreprocessingModel> getPreprocessingModelClass() {
        return DiscretizationModel.class;
    }

    @Override
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> types = super.getParameterTypes();
        types.add(new ParameterTypeBoolean(PARAMETER_REMOVE_USELESS, "Indicates if useless attributes, i.e. those containing only one single range, should be removed.", true));
        types.add(new ParameterTypeCategory("range_name_type", "Indicates if long range names including the limits should be used.", DiscretizationModel.RANGE_NAME_TYPES, 0));
        ParameterTypeSingle type = new ParameterTypeBoolean("automatic_number_of_digits", "Indicates if the number of digits should be automatically determined for the range names.", true);
        type.registerDependencyCondition(new EqualTypeCondition(this, "range_name_type", DiscretizationModel.RANGE_NAME_TYPES, false, 2));
        types.add(type);
        type = new ParameterTypeInt("number_of_digits", "The minimum number of digits used for the interval names (-1: determine minimal number automatically).", -1, Integer.MAX_VALUE, -1);
        type.registerDependencyCondition(new BooleanParameterCondition(this, "automatic_number_of_digits", false, false));
        types.add(type);
        return types;
    }

    @Override
    public ResourceConsumptionEstimator getResourceConsumptionEstimator() {
        return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(this.getExampleSetInputPort(), MinimalEntropyDiscretization.class, this.attributeSelector);
    }

    static {
        MinimalEntropyDiscretization.registerDiscretizationOperator(FrequencyDiscretization.class);
    }
}

