/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.operator.preprocessing.filter;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.annotation.ResourceConsumptionEstimator;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.preprocessing.AbstractDataProcessing;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.OperatorResourceConsumptionHandler;
import java.util.LinkedList;
import java.util.List;

public class TFIDFFilter
extends AbstractDataProcessing {
    public static final String PARAMETER_CALCULATE_TERM_FREQUENCIES = "calculate_term_frequencies";

    public TFIDFFilter(OperatorDescription description) {
        super(description);
    }

    @Override
    protected MetaData modifyMetaData(ExampleSetMetaData metaData) throws UndefinedParameterError {
        for (AttributeMetaData amd : metaData.getAllAttributes()) {
            if (amd.isSpecial() || !amd.isNumerical()) continue;
            amd.getMean().setUnkown();
            amd.setValueSetRelation(SetRelation.UNKNOWN);
        }
        return metaData;
    }

    @Override
    public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
        if (exampleSet.size() < 1) {
            throw new UserError((Operator)this, 110, "1");
        }
        if (exampleSet.getAttributes().size() == 0) {
            throw new UserError((Operator)this, 106, new Object[0]);
        }
        double[] termFrequencySum = new double[exampleSet.size()];
        LinkedList<Attribute> attributes = new LinkedList<Attribute>();
        for (Attribute attribute : exampleSet.getAttributes()) {
            if (!attribute.isNumerical()) continue;
            attributes.add(attribute);
        }
        int[] documentFrequencies = new int[attributes.size()];
        int exampleCounter = 0;
        for (Example example : exampleSet) {
            int i = 0;
            for (Attribute attribute : attributes) {
                double value = example.getValue(attribute);
                int n = exampleCounter;
                termFrequencySum[n] = termFrequencySum[n] + value;
                if (value > 0.0) {
                    int n2 = i;
                    documentFrequencies[n2] = documentFrequencies[n2] + 1;
                }
                ++i;
            }
            ++exampleCounter;
            this.checkForStop();
        }
        double[] inverseDocumentFrequencies = new double[documentFrequencies.length];
        for (int i = 0; i < attributes.size(); ++i) {
            inverseDocumentFrequencies[i] = Math.log((double)exampleSet.size() / (double)documentFrequencies[i]);
        }
        boolean calculateTermFrequencies = this.getParameterAsBoolean(PARAMETER_CALCULATE_TERM_FREQUENCIES);
        exampleCounter = 0;
        for (Example example : exampleSet) {
            int i = 0;
            for (Attribute attribute : attributes) {
                double value = example.getValue(attribute);
                if (termFrequencySum[exampleCounter] == 0.0 || Double.isNaN(inverseDocumentFrequencies[i])) {
                    example.setValue(attribute, 0.0);
                } else {
                    double tf = value;
                    if (calculateTermFrequencies) {
                        tf /= termFrequencySum[exampleCounter];
                    }
                    double idf = inverseDocumentFrequencies[i];
                    example.setValue(attribute, tf * idf);
                }
                ++i;
            }
            ++exampleCounter;
            this.checkForStop();
        }
        return exampleSet;
    }

    @Override
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> types = super.getParameterTypes();
        ParameterTypeBoolean type = new ParameterTypeBoolean(PARAMETER_CALCULATE_TERM_FREQUENCIES, "Indicates if term frequency values should be generated (must be done if input data is given as simple occurence counts).", true);
        type.setExpert(false);
        types.add(type);
        return types;
    }

    @Override
    public boolean writesIntoExistingData() {
        return true;
    }

    @Override
    public ResourceConsumptionEstimator getResourceConsumptionEstimator() {
        return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(this.getInputPort(), TFIDFFilter.class, null);
    }
}

