/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.operator.io;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.io.AbstractExampleSource;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.RandomGenerator;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class XrffExampleSource
extends AbstractExampleSource {
    public static final String PARAMETER_DATA_FILE = "data_file";
    public static final String PARAMETER_ID_ATTRIBUTE = "id_attribute";
    public static final String PARAMETER_DATAMANAGEMENT = "datamanagement";
    public static final String PARAMETER_DECIMAL_POINT_CHARACTER = "decimal_point_character";
    public static final String PARAMETER_SAMPLE_RATIO = "sample_ratio";
    public static final String PARAMETER_SAMPLE_SIZE = "sample_size";

    public XrffExampleSource(OperatorDescription description) {
        super(description);
    }

    @Override
    public ExampleSet createExampleSet() throws OperatorException {
        String idName = this.getParameterAsString(PARAMETER_ID_ATTRIBUTE);
        Attribute label = null;
        Attribute id = null;
        Attribute weight = null;
        boolean instanceWeightsUsed = false;
        MemoryExampleTable table = null;
        try {
            Document document = null;
            try {
                document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(this.getParameterAsInputStream(PARAMETER_DATA_FILE));
            }
            catch (SAXException e1) {
                throw new IOException(e1.getMessage());
            }
            catch (ParserConfigurationException e1) {
                throw new IOException(e1.getMessage());
            }
            Element datasetElement = document.getDocumentElement();
            if (!datasetElement.getTagName().equals("dataset")) {
                throw new IOException("Outer tag of XRFF file must be <dataset>.");
            }
            Element headerElement = this.retrieveSingleNode(datasetElement, "header");
            Element attributesElement = this.retrieveSingleNode(headerElement, "attributes");
            LinkedList<Attribute> attributeList = new LinkedList<Attribute>();
            NodeList attributes = attributesElement.getChildNodes();
            for (int i = 0; i < attributes.getLength(); ++i) {
                Element labelsElement;
                Node node = attributes.item(i);
                if (!(node instanceof Element)) continue;
                Element attribute = (Element)node;
                String tagName = attribute.getTagName();
                if (!tagName.equals("attribute")) {
                    throw new IOException("Only tags <attribute> are allowed inside <attributes>, was " + tagName);
                }
                String name = attribute.getAttribute("name");
                if (name == null) {
                    throw new IOException("The tag <attribute> needs a 'name' attribute.");
                }
                String classAttribute = attribute.getAttribute("class");
                boolean isClass = classAttribute != null && classAttribute.equals("yes");
                String valueType = attribute.getAttribute("type");
                if (valueType == null) {
                    throw new IOException("The tag <attribute> needs a 'type' attribute.");
                }
                Attribute att = this.createAttribute(name, valueType);
                if (att.isNominal() && (labelsElement = this.retrieveSingleNode(attribute, "labels", false)) != null) {
                    NodeList labels = labelsElement.getChildNodes();
                    for (int j = 0; j < labels.getLength(); ++j) {
                        Node labelNode = labels.item(j);
                        if (!(labelNode instanceof Element)) continue;
                        String labelTagName = labelNode.getNodeName();
                        if (!labelTagName.equals("label")) {
                            throw new IOException("Only tags <label> are allowed inside <labels>, was " + labelTagName);
                        }
                        String labelValue = labelNode.getTextContent();
                        att.getMapping().mapString(labelValue);
                    }
                }
                if (isClass) {
                    label = att;
                }
                if (idName != null && name.equals(idName)) {
                    id = att;
                }
                attributeList.add(att);
            }
            weight = AttributeFactory.createAttribute("weight", 4);
            attributeList.add(weight);
            table = new MemoryExampleTable(attributeList);
            DataRowFactory factory = new DataRowFactory(this.getParameterAsInt(PARAMETER_DATAMANAGEMENT), this.getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0));
            Attribute[] attributeArray = new Attribute[attributeList.size()];
            attributeList.toArray(attributeArray);
            Element bodyElement = this.retrieveSingleNode(datasetElement, "body");
            Element instancesElement = this.retrieveSingleNode(bodyElement, "instances");
            NodeList instances = instancesElement.getChildNodes();
            int maxRows = this.getParameterAsInt(PARAMETER_SAMPLE_SIZE);
            double sampleProb = this.getParameterAsDouble(PARAMETER_SAMPLE_RATIO);
            RandomGenerator random = RandomGenerator.getRandomGenerator(this);
            int counter = 0;
            for (int i = 0; i < instances.getLength(); ++i) {
                Node node = instances.item(i);
                if (!(node instanceof Element)) continue;
                Element instance = (Element)node;
                String tagName = instance.getTagName();
                if (!tagName.equals("instance")) {
                    throw new IOException("Only tags <instance> are allowed inside <instances>, was " + tagName);
                }
                NodeList values = instance.getChildNodes();
                int elementCount = 0;
                for (int j = 0; j < values.getLength(); ++j) {
                    if (!(values.item(j) instanceof Element)) continue;
                    ++elementCount;
                }
                if (elementCount != attributeList.size() - 1) {
                    throw new IOException("Number of values must be the same than the number of attributes.");
                }
                String[] valueArray = new String[attributeList.size()];
                int index = 0;
                for (int j = 0; j < values.getLength(); ++j) {
                    Node valueNode = values.item(j);
                    if (!(valueNode instanceof Element)) continue;
                    Element valueElement = (Element)valueNode;
                    String valueTagName = valueElement.getTagName();
                    if (!valueTagName.equals("value")) {
                        throw new IOException("Only tags <value> are allowed inside <instance>, was " + valueTagName);
                    }
                    valueArray[index++] = valueNode.getTextContent();
                }
                String weightString = instance.getAttribute("weight");
                if (weightString != null && weightString.length() > 0) {
                    valueArray[valueArray.length - 1] = weightString;
                    instanceWeightsUsed = true;
                } else {
                    valueArray[valueArray.length - 1] = "1.0";
                }
                if (maxRows <= -1 || counter < maxRows) {
                    ++counter;
                    if (maxRows == -1 && random.nextDouble() > sampleProb) continue;
                    table.addDataRow(factory.create(valueArray, attributeArray));
                    continue;
                }
                break;
            }
        }
        catch (IOException e) {
            throw new UserError((Operator)this, 302, this.getParameterAsString(PARAMETER_DATA_FILE), e.getMessage());
        }
        ExampleSet result = table.createExampleSet(label, weight, id);
        if (!instanceWeightsUsed) {
            result.getAttributes().remove(weight);
            result.getExampleTable().removeAttribute(weight);
        }
        return result;
    }

    private Element retrieveSingleNode(Element element, String nodeName) throws IOException {
        return this.retrieveSingleNode(element, nodeName, true);
    }

    private Element retrieveSingleNode(Element element, String nodeName, boolean exceptionOnFail) throws IOException {
        NodeList headerElements = element.getElementsByTagName(nodeName);
        if (headerElements.getLength() == 0) {
            if (exceptionOnFail) {
                throw new IOException("A dataset must define a <" + nodeName + "> section for attribute meta data description.");
            }
            return null;
        }
        if (headerElements.getLength() > 1) {
            if (exceptionOnFail) {
                throw new IOException("A dataset must not define more than one <" + nodeName + "> section.");
            }
            return null;
        }
        return (Element)headerElements.item(0);
    }

    private Attribute createAttribute(String name, String type) {
        int valueType = 1;
        if (type.toLowerCase().equals("numeric")) {
            valueType = 2;
        } else if (type.toLowerCase().equals("real")) {
            valueType = 4;
        } else if (type.toLowerCase().equals("integer")) {
            valueType = 3;
        } else if (type.toLowerCase().equals("string")) {
            valueType = 5;
        } else if (type.toLowerCase().equals("date")) {
            valueType = 10;
        }
        return AttributeFactory.createAttribute(name, valueType);
    }

    @Override
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> types = super.getParameterTypes();
        types.add(new ParameterTypeFile(PARAMETER_DATA_FILE, "The path to the data file.", "xrff", false));
        types.add(new ParameterTypeString(PARAMETER_ID_ATTRIBUTE, "The (case sensitive) name of the id attribute"));
        types.add(new ParameterTypeCategory(PARAMETER_DATAMANAGEMENT, "Determines, how the data is represented internally.", DataRowFactory.TYPE_NAMES, 0));
        types.add(new ParameterTypeString(PARAMETER_DECIMAL_POINT_CHARACTER, "Character that is used as decimal point.", "."));
        ParameterTypeDouble type = new ParameterTypeDouble(PARAMETER_SAMPLE_RATIO, "The fraction of the data set which should be read (1 = all; only used if sample_size = -1)", 0.0, 1.0, 1.0);
        type.setExpert(false);
        types.add(type);
        types.add(new ParameterTypeInt(PARAMETER_SAMPLE_SIZE, "The exact number of samples which should be read (-1 = use sample ratio; if not -1, sample_ratio will not have any effect)", -1, Integer.MAX_VALUE, -1));
        types.addAll(RandomGenerator.getRandomGeneratorParameters(this));
        return types;
    }
}

