/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.operator.preprocessing.filter;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.Partition;
import com.rapidminer.example.set.SplittedExampleSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.annotation.ResourceConsumptionEstimator;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.preprocessing.AbstractDataProcessing;
import com.rapidminer.operator.tools.AttributeSubsetSelector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.OperatorResourceConsumptionHandler;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Set;

public class RemoveDuplicates
extends AbstractDataProcessing {
    private static final String PARAMETER_TREAT_MISSING_VALUES_AS_DUPLICATES = "treat_missing_values_as_duplicates";
    private AttributeSubsetSelector subsetSelector = new AttributeSubsetSelector(this, this.getExampleSetInputPort());

    public RemoveDuplicates(OperatorDescription description) {
        super(description);
    }

    @Override
    protected MetaData modifyMetaData(ExampleSetMetaData metaData) throws UndefinedParameterError {
        metaData.getNumberOfExamples().reduceByUnknownAmount();
        return metaData;
    }

    @Override
    public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
        int[] partition = new int[exampleSet.size()];
        Set<Attribute> compareAttributes = this.subsetSelector.getAttributeSubset(exampleSet, false);
        if (compareAttributes.isEmpty()) {
            throw new UserError((Operator)this, 153, 1, 0);
        }
        HashMap<Integer, List<Integer>> buckets = new HashMap<Integer, List<Integer>>();
        for (int i = 0; i < exampleSet.size(); ++i) {
            Example example = exampleSet.getExample(i);
            int hash = 0;
            for (Attribute attribute : compareAttributes) {
                long bits = Double.doubleToLongBits(example.getValue(attribute));
                hash = hash * 31 + (int)(bits ^ bits >>> 32);
            }
            if (!buckets.containsKey(hash)) {
                buckets.put(hash, Collections.singletonList(i));
                continue;
            }
            List bucketExampleIndicesList = (List)buckets.get(hash);
            for (Integer exampleIndex : bucketExampleIndicesList) {
                boolean equal = true;
                Example compExample = exampleSet.getExample(exampleIndex);
                for (Attribute attribute : compareAttributes) {
                    if (this.getParameterAsBoolean(PARAMETER_TREAT_MISSING_VALUES_AS_DUPLICATES) && Double.isNaN(example.getValue(attribute)) && Double.isNaN(compExample.getValue(attribute)) || example.getValue(attribute) == compExample.getValue(attribute)) continue;
                    equal = false;
                    break;
                }
                if (!equal) continue;
                partition[i] = 1;
            }
            if (partition[i] != 0) continue;
            if (bucketExampleIndicesList.size() == 1) {
                ArrayList<Integer> newList = new ArrayList<Integer>(bucketExampleIndicesList);
                newList.add(i);
                buckets.put(hash, newList);
                continue;
            }
            bucketExampleIndicesList.add(i);
        }
        SplittedExampleSet result = new SplittedExampleSet(exampleSet, new Partition(partition, 2));
        result.selectSingleSubset(0);
        return result;
    }

    @Override
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> types = super.getParameterTypes();
        types.addAll(this.subsetSelector.getParameterTypes());
        ParameterTypeBoolean type = new ParameterTypeBoolean(PARAMETER_TREAT_MISSING_VALUES_AS_DUPLICATES, "If set to true, treats missing values as duplicates", false);
        type.setExpert(false);
        types.add(type);
        return types;
    }

    @Override
    public boolean writesIntoExistingData() {
        return false;
    }

    @Override
    public ResourceConsumptionEstimator getResourceConsumptionEstimator() {
        return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(this.getInputPort(), RemoveDuplicates.class, null);
    }
}

