package com.rapidminer.krimp;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Tools;
import com.rapidminer.krimp.comparators.StandardCandidateComparator;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.associations.FrequentItemSet;
import com.rapidminer.operator.learner.associations.FrequentItemSets;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.ExampleSetPrecondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.tools.Ontology;

public class KRIMP extends Operator {

	public static final String PARAMETER_PRUNE = "employ pruning";

	public static final String PARAMETER_MIN_SUPPORT = "min_support";

	private final InputPort exampleSetInput = getInputPorts().createPort(
			"example set");
	private final InputPort frequentSetsInput = getInputPorts().createPort(
			"frequent sets", FrequentItemSets.class);

	private final OutputPort exampleSetOutput = getOutputPorts().createPort(
			"example set");
	private final OutputPort codedSetsOutput = getOutputPorts().createPort(
			"coding set");
	private final OutputPort freqSetsOutput = getOutputPorts().createPort(
			"frequent sets of the code table");

	public KRIMP(OperatorDescription description) {
		super(description);

		exampleSetInput.addPrecondition(new ExampleSetPrecondition(
				exampleSetInput, Ontology.BINOMINAL));
		getTransformer().addGenerationRule(codedSetsOutput, CodeTable.class);
		getTransformer().addGenerationRule(freqSetsOutput,
				FrequentItemSets.class);
		getTransformer().addPassThroughRule(exampleSetInput, exampleSetOutput);
	}

	@Override
	public void doWork() throws OperatorException {

		boolean prune = getParameterAsBoolean(PARAMETER_PRUNE);
		ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);
		FrequentItemSets candidateSets = frequentSetsInput
				.getData(FrequentItemSets.class);

		double minSup = getParameterAsDouble(PARAMETER_MIN_SUPPORT);
		int minTotalSup = (int) Math.ceil(minSup * exampleSet.size());
		// check
		Tools.onlyNominalAttributes(exampleSet, "StandardCodeTable");
		// pre-computing data properties
		ExampleSet workingSet = preprocessExampleSet(exampleSet);

		Database database = new Database(workingSet);
		candidateSets.sortSets(new StandardCandidateComparator());

		CodeTable actualCT = CodeTable.getStandardCodeTable(database);
		for (FrequentItemSet candidateSet : candidateSets) {
			if (candidateSet.getNumberOfItems() > 1) {
				CodeTable candCT = actualCT.addToCodingSet(candidateSet,
						database);
				if (candCT.getCompressedSize() < actualCT.getCompressedSize()) {
					if (prune)
						actualCT = prune(candCT, actualCT, database);
					else
						actualCT = candCT;

				}
			}
		}
		FrequentItemSets freqSetsOutp = new FrequentItemSets(
				database.getNumberOfTransactions());
		for (FrequentItemSet freqItemSet : actualCT.getItemSets()) {
			if (actualCT.getUsage(freqItemSet) > 0
					&& freqItemSet.getFrequency() >= minTotalSup)
				freqSetsOutp.addFrequentSet(freqItemSet);
		}
		exampleSetOutput.deliver(exampleSet);
		codedSetsOutput.deliver(actualCT);
		freqSetsOutput.deliver(freqSetsOutp);
	}

	/**
	 * Prunes the code table </code>newCT<code>.
	 * 
	 * @param newCT
	 *            a code table, that has been build by adding or removing an
	 *            itemset from </code> oldCT<code>
	 * @param oldCT
	 *            a code table
	 * @param db
	 *            the given database
	 * @return the pruned code table
	 */
	private CodeTable prune(CodeTable newCT, CodeTable oldCT, Database db) {
		SortedSet<FrequentItemSet> pruneSet = getPruneSet(newCT, oldCT);
		while (!pruneSet.isEmpty()) {
			FrequentItemSet pruneCand = pruneSet.first();
			pruneSet.remove(pruneCand);
			CodeTable pruneCT = newCT.removeFromCodingSet(pruneCand);
			if (pruneCT.getCompressedSize() < newCT.getCompressedSize()) {
				pruneSet.addAll(getPruneSet(pruneCT, newCT));
				newCT = pruneCT;
			}
		}
		return newCT;

	}

	/**
	 * Calculates the itemsets of </code>newCT
	 * <code>, whose usages have been going down in comparison to their usages in </code>
	 * oldCT<code>.
	 * 
	 * @param newCT
	 *            a code table, that has been build by adding or removing an
	 *            itemset from </code> oldCT<code>
	 * @param oldCT
	 *            a code table
	 * @return the candidate pruning itemsets
	 */
	private SortedSet<FrequentItemSet> getPruneSet(CodeTable newCT,
			CodeTable oldCT) {
		Set<FrequentItemSet> tempSet = new HashSet<FrequentItemSet>(
				newCT.getItemSets());
		tempSet.retainAll(oldCT.getItemSets());
		for (FrequentItemSet itemSet : new HashSet<FrequentItemSet>(tempSet)) {
			if (itemSet.getNumberOfItems() == 1)
				tempSet.remove(itemSet);
			else if (newCT.getUsage(itemSet) >= oldCT.getUsage(itemSet)) {
				tempSet.remove(itemSet);
			}
		}
		SortedSet<FrequentItemSet> pruneSet = new TreeSet<FrequentItemSet>(
				new PruneSetComparator(newCT));
		pruneSet.addAll(tempSet);
		return pruneSet;
	}

	private ExampleSet preprocessExampleSet(ExampleSet exampleSet) {
		// precomputing data properties
		ExampleSet workingSet = (ExampleSet) exampleSet.clone();

		// remove unusuable attributes
		int oldAttributeCount = workingSet.getAttributes().size();
		removeNonBooleanAttributes(workingSet);
		int newAttributeCount = workingSet.getAttributes().size();
		if (oldAttributeCount != newAttributeCount) {
			int removeCount = oldAttributeCount - newAttributeCount;
			String message = null;
			if (removeCount == 1)
				message = "Removed 1 non-binominal attribute, creation of a standard code table is only supported for the positive values of binominal attributes.";
			else
				message = "Removed "
						+ removeCount
						+ " non-binominal attributes, creation of a standard code table is only supported for the positive values of binominal attributes.";
			logWarning(message);
		}

		return workingSet;
	}

	/**
	 * Removes every non boolean attribute.
	 * 
	 * @param exampleSet
	 *            exampleSet, which attributes are tested
	 */
	private void removeNonBooleanAttributes(ExampleSet exampleSet) {
		// removing non boolean attributes
		Collection<Attribute> deleteAttributes = new ArrayList<Attribute>();

		for (Attribute attribute : exampleSet.getAttributes()) {
			if (!attribute.isNominal() || (attribute.getMapping().size() != 2)) {
				deleteAttributes.add(attribute);
			}
		}
		for (Attribute attribute : deleteAttributes) {
			exampleSet.getAttributes().remove(attribute);
		}
	}

	@Override
	public List<ParameterType> getParameterTypes() {
		List<ParameterType> types = super.getParameterTypes();
		ParameterType type = new ParameterTypeBoolean(
				PARAMETER_PRUNE,
				"Indicates if post-acceptance pruning should be deployed. This improves the compression rate" +
				" by removal of less used frequent patterns from the code table when a new candidate item " +
				"set is accepted. However, the computation time might slightly increase.",
				true);
		type.setExpert(false);
		types.add(type);
		types.add(new ParameterTypeDouble(
				PARAMETER_MIN_SUPPORT,
				"The minimal support necessary in order to be a frequent item (set). " +
				"Affects only the frequent sets output.",
				0.0d, 1.0d, 0.95d));
		return types;
	}

	private class PruneSetComparator implements Comparator<FrequentItemSet> {

		CodeTable candCT;

		public PruneSetComparator(CodeTable candCT) {
			this.candCT = candCT;
		}

		@Override
		public int compare(FrequentItemSet o1, FrequentItemSet o2) {
			return candCT.getUsage(o1) - candCT.getUsage(o2);
		}

	}

}
