/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.m4.core;

import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Vector;

import edu.udo.cs.miningmart.exception.M4Exception;

/**
 * @author Timm Euler
 * @see edu.udo.cs.miningmart.m4.EstimatedStatistics
 */
public class EstimatedStatistics implements edu.udo.cs.miningmart.m4.EstimatedStatistics {

	// the concept these estimations apply to
	private Concept myConcept;
	
	// the estimated values
	private long numberOfRows;
	private Map attributesToInfos; // a Map from attribute names to AttributeInfo objects
	
	/**
	 * This constructor creates an EstimatedStatistics
	 * object for the given Concept. If the concept is of type DB then
	 * its actual statistics are computed and copied to the estimated values.
	 * Otherwise all estimated values are set to be unknown. This means that
	 * any inference or estimation has to be done elsewhere.
	 */
	public EstimatedStatistics(edu.udo.cs.miningmart.m4.Concept theConcept) throws M4Exception {
		this.myConcept = (Concept) theConcept;
		this.numberOfRows = VALUE_INT_UNKNOWN;
		
		if (theConcept == null) {
			throw new M4Exception("Constructor of EstimatedStatistics: got NULL concept!");
		}	

		this.initiateValueLists(theConcept); // prepares the map		
		
		// maybe some values are known:
		Columnset theCs = (Columnset) theConcept.getCurrentColumnSet();
		if (theCs != null) {
			// for a concept of type DB the statistics have to be computed:
			if (theConcept.getType().equals(Concept.TYPE_DB)) {
				
				theCs.updateStatistics(); // updates only where necessary (one hopes)
				
				this.takePrecomputedStatistics(theCs);
			}		
		}
	}

	/**
	 * Returns the concept this EstimatedStatistics object applies to.
	 */
	public edu.udo.cs.miningmart.m4.Concept getConcept() {
		return this.myConcept;
	}
	
	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#getValueList(String)
	 */
	public Vector getValueList(String nameOfAttribute) {
		if (nameOfAttribute == null) {
			return null;
		}
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		Vector theValues = new Vector();
		Vector valInfos = ai.getValues();
		Iterator it = valInfos.iterator();
		while (it.hasNext()) {
			ValueInfo vi = (ValueInfo) it.next();
			theValues.add(vi.getTheValue());
		}
		return theValues;
	}
	
	public void setValueList(String nameOfAttribute, Vector theValues) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		if (theValues != null && ai != null) {
			Vector theValueInfos = new Vector();
			Iterator it = theValues.iterator();
			while (it.hasNext()) {
				String myValue = (String) it.next();
				ValueInfo vi = new ValueInfo();
				vi.setTheValue(myValue);
				theValueInfos.add(vi);
			}
			ai.setValues(theValueInfos);
		}
	}

	private AttributeInfo getAttribInfo(String nameOfAttribute) {
		return (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
	}
	
	public void copyValueList( String nameOfDestinationAttribute,
			                   edu.udo.cs.miningmart.m4.EstimatedStatistics from,
							   String nameOfSourceAttribute) {
		AttributeInfo toAi = (AttributeInfo) this.attributesToInfos.get(nameOfDestinationAttribute);
		EstimatedStatistics coreFrom = (EstimatedStatistics) from;
		AttributeInfo fromAi = coreFrom.getAttribInfo(nameOfSourceAttribute);
		Iterator it = fromAi.getValues().iterator();
		Vector copyOfList = new Vector();
		while (it.hasNext()) {
			ValueInfo vi = (ValueInfo) it.next();
			copyOfList.add(vi.copy());
		}
		toAi.setValues(copyOfList);
	}
	
	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#removeValue(String, String)
	 */
	public void removeValue(String value, String nameOfAttribute) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		if (ai == null) return; 
		Vector valInfos = ai.getValues();
		ValueInfo toRemove = null;
		Iterator it = valInfos.iterator();
		while (it.hasNext()) {
			ValueInfo vi = (ValueInfo) it.next();
			if (vi.getTheValue().equals(value)) {
				toRemove = vi;
			}
		}
		if (toRemove != null) {
			ai.getValues().remove(toRemove);
		}
	}

	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#getNumberOfMissingValues(String)
	 */
	public int getNumberOfMissingValues(String nameOfAttribute) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		return ai.getNumberOfMissingValues();
	}

	/**
	 * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#setNumberOfMissingValues(String, int)
	 */
	public void setNumberOfMissingValues(String nameOfAttribute, int number) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		if (ai != null)
			ai.setNumberOfMissingValues(number);
	}
	
	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#getBiggestValue(String)
	 */
	public double getBiggestValue(String nameOfAttribute) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		return ai.getMaximum();
	}

	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#setBiggestValue(String, double)
	 */
	public void setBiggestValue(String nameOfAttribute, double value) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		if (ai != null)
			ai.setMaximum(value);
	}

	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#setLowestValue(String, double)
	 */
	public void setLowestValue(String nameOfAttribute, double value) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		if (ai != null)
			ai.setMinimum(value);
	}
	
	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#getLowestValue(String)
	 */
	public double getLowestValue(String nameOfAttribute) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		return ai.getMinimum();
	}

	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#readAvailableStatisticsFromDb()
	 */
	public boolean readAvailableStatisticsFromDb() throws M4Exception {
		
		// only do this for MINING concepts!!
		if (this.getConcept().getType().equals(Concept.TYPE_DB)) {
			return false;
		}
		
		// it is only possible if there is a columnset:
		Columnset myCs = (Columnset) this.getConcept().getCurrentColumnSet();
		if (myCs == null) {
			return false;
		}
		
		// do not recompute statistics, only use what is stored in M4 schema:
		if (myCs.statisticsExist()) {
			this.takePrecomputedStatistics(myCs);
			return true;
		}
		return false;
	}
	
	// take any real statistics that have been computed for the given
	// columnset (and have been stored in M4) 
	private void takePrecomputedStatistics(Columnset theCs) throws M4Exception {

		// from the columnset statistics use only the COUNT:
		this.numberOfRows = theCs.getStatisticsAll();
	
		// for the rest go through the attributes of this concept,
		// maybe some have a column with statistics:
		Iterator it = this.getConcept().getAllBaseAttributes().iterator();
		while (it.hasNext()) {
			BaseAttribute myBa = (BaseAttribute) it.next();
			Column myCol = (Column) myBa.getCurrentColumn();
			// some columns may not be connected to BAs:
			if (myCol != null) { 
                AttributeInfo theAttrInfo = (AttributeInfo) this.attributesToInfos.get(myBa.getName());
                if (theAttrInfo == null) {
                	throw new M4Exception("EstimatedStatistics, constructor: found unknown (?) BaseAttribute '" + myBa.getName() + "'!");
                }
                Collection colStats1 = myCol.getBasicColStats();
                if (colStats1 != null && ( ! colStats1.isEmpty())) {
                	Iterator csIt = colStats1.iterator();
                	boolean moreThanOne = false;						
                	while (csIt.hasNext()) {
                		if (moreThanOne) { // seems to happen sometimes... :-(
                			break;
                			// throw new M4Exception("EstimatedStatistics, constructor: found more than one ColStatist1 object for column '" + myCol.getName() + "'!");
                		}
                		moreThanOne = true;
                		ColumnStatistics1 colstat1 = (ColumnStatistics1) csIt.next();
                		Integer noOfMiss = colstat1.getNrOfMissingValuesI();
                		if (noOfMiss != null) {
                			theAttrInfo.setNumberOfMissingValues(noOfMiss.intValue());
                		}
                		String max = colstat1.getMaximum();
                		if (max != null) {
                			try {
                				double m = Double.parseDouble(max);
                    			theAttrInfo.setMaximum(m);
                			}
                			catch (NumberFormatException nfe) {
                				theAttrInfo.setMaximum(VALUE_DOUBLE_UNKNOWN);
                			}
                		}
                		String min = colstat1.getMinimum();
                		if (min != null) {
                			try {
                				double m = Double.parseDouble(min);
                    			theAttrInfo.setMinimum(m);
                			}
                			catch (NumberFormatException nfe) {
                				theAttrInfo.setMinimum(VALUE_DOUBLE_UNKNOWN);
                			}
                		}							
                	}
                }
                Vector someValues = theAttrInfo.getValues();
                Collection colStats2 = myCol.getDistributionStatistics();
                if (colStats2 != null && ( ! colStats2.isEmpty())) {
                	// By taking the actual statistics we overwrite the estimated ones:
                	someValues.clear();
                	
                	Iterator csIt = colStats2.iterator();
                	while (csIt.hasNext()) {
                		ValueInfo valInfo = new ValueInfo();
                		someValues.add(valInfo);
                		ColumnStatistics2 colstat2 = (ColumnStatistics2) csIt.next();
                		String value = colstat2.getDistributionValue();
                		if (value != null) {
                			valInfo.setTheValue(value);
                		}
                		int count = colstat2.getDistributionCount();
                		if ((count != VALUE_INT_UNKNOWN) && (count > 0)) {
                			valInfo.setNumberOfOccurrences(count);
                		}
                		if (count == 0) {
						// 	only for debugging:
                			System.out.println("Warning: value '" + value + "' in column '" + 
                					myCol.getName() +
									"' occurs 0 times, it's not used in estimated statistics!");
						// 	removing this information:
                			someValues.remove(valInfo);
                		}
                	}
				}
			}
		}
	}
	
	private void initiateValueLists(edu.udo.cs.miningmart.m4.Concept theConcept) throws M4Exception {
		this.attributesToInfos = new HashMap();
		Collection bas = theConcept.getAllBaseAttributes();
		if (bas != null && ( ! bas.isEmpty())) {
			Iterator it = bas.iterator();
			while (it.hasNext()) {
				BaseAttribute ba = (BaseAttribute) it.next();
				this.attributesToInfos.put(ba.getName(), new AttributeInfo());
			}
		}
	}

	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#getNumberOfRows()
	 */
	public long getNumberOfRows() {
		return numberOfRows;
	}

	/**
     * @see edu.udo.cs.miningmart.m4.EstimatedStatistics#setNumberOfRows(int)
	 */
	public void setNumberOfRows(long numberOfRows) {
		this.numberOfRows = numberOfRows;
	}
	
	public void setNumberOfOccurrences(String nameOfAttribute, String value, int number) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		if (ai != null) {
			Vector valInfos = ai.getValues();
			ValueInfo toRemove = null;
			Iterator it = valInfos.iterator();
			while (it.hasNext()) {
				ValueInfo vi = (ValueInfo) it.next();
				if (vi.getTheValue().equals(value)) {
					if (number == 0) {
						toRemove = vi;
					}
					else {
						vi.setNumberOfOccurrences(number);
					}
				}
			}
			if (toRemove != null) {
				ai.getValues().remove(toRemove);
			}
		}
	}
	
	public int getNumberOfOccurrences(String nameOfAttribute, String value) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		if (ai != null) {
			Vector valInfos = ai.getValues();
			if (valInfos != null) {
				Iterator it = valInfos.iterator();
				while (it.hasNext()) {
					ValueInfo vi = (ValueInfo) it.next();
					if (vi.getTheValue().equals(value)) {
						return vi.getNumberOfOccurrences();
					}
				}
			}
		}
		return 0; // value not found -> no occurrences of this value
	}
	
	public edu.udo.cs.miningmart.m4.EstimatedStatistics copy(
			edu.udo.cs.miningmart.m4.Concept toConcept) 
	throws M4Exception {
		
		EstimatedStatistics copy = new EstimatedStatistics(toConcept);
		copy.setNumberOfRows(this.getNumberOfRows());
		Iterator entryIt = this.attributesToInfos.entrySet().iterator();
		while (entryIt.hasNext()) {
			Map.Entry myEntry = (Map.Entry) entryIt.next();
			String nameOfAttrib = (String) myEntry.getKey();
			AttributeInfo ai = (AttributeInfo) myEntry.getValue();
			copy.addAttribInfo(nameOfAttrib, ai.copy());
		}		
		return copy;
	}

	public void addValueInformation(String nameOfAttribute, String value, int noOfOccurrences) {
		AttributeInfo ai = (AttributeInfo) this.attributesToInfos.get(nameOfAttribute);
		if (ai != null) {
			ai.addValueInfo(value, noOfOccurrences);
	
			// if the new value is numeric it may be the new MIN or MAX:
			double dVal = this.getNumeric(value);
			if ( ! this.doublesAreEqual(dVal, VALUE_DOUBLE_UNKNOWN)) {
				if ( ! this.doublesAreEqual(this.getBiggestValue(nameOfAttribute), VALUE_DOUBLE_UNKNOWN))
					this.setBiggestValue(nameOfAttribute, Math.max(dVal, this.getBiggestValue(nameOfAttribute)));
				if ( ! this.doublesAreEqual(this.getLowestValue(nameOfAttribute), VALUE_DOUBLE_UNKNOWN))
					this.setLowestValue(nameOfAttribute, Math.min(dVal, this.getLowestValue(nameOfAttribute)));
			}
		}
	}
	
	private boolean doublesAreEqual(double a, double b) {
		Double aA = new Double(a);
		Double bB = new Double(b);
		return (aA.compareTo(bB) == 0);
	}
	
	private double getNumeric(String value) {
		try {
			double d = Double.parseDouble(value);
			return d;
		}
		catch (NumberFormatException nfe) {
			return VALUE_DOUBLE_UNKNOWN;
		}
	}
	
	private void addAttribInfo(String nameOfAttribute, AttributeInfo ai) {
		if (this.attributesToInfos == null) {
			this.attributesToInfos = new HashMap();
		}
		if (ai != null && nameOfAttribute != null) {
			this.attributesToInfos.put(nameOfAttribute, ai);
		}
	}
	
	public void addAttribute(String nameOfAttribute) {
		if (this.attributesToInfos == null) {
			this.attributesToInfos = new HashMap();
		}
		this.attributesToInfos.put(nameOfAttribute, new AttributeInfo());
	}
	
	/* Objects of this class store information about a single attribute.
	 */ 
	private class AttributeInfo {
		private double minimum;
		private double maximum;
		private int numberOfMissingValues;
		private Vector values;
		
		public AttributeInfo() {
			this.maximum = VALUE_DOUBLE_UNKNOWN;
			this.minimum = VALUE_DOUBLE_UNKNOWN;
			this.numberOfMissingValues = VALUE_INT_UNKNOWN;
			this.values = new Vector();
		}

		public double getMaximum() {
			return maximum;
		}
		public void setMaximum(double maximum) {
			this.maximum = maximum;
		}
		public double getMinimum() {
			return minimum;
		}
		public void setMinimum(double minimum) {
			this.minimum = minimum;
		}
		public int getNumberOfMissingValues() {
			return this.numberOfMissingValues;
		}
		public void setNumberOfMissingValues(int numberOfMissingValues) {
			this.numberOfMissingValues = numberOfMissingValues;
		}
		public Vector getValues() {
			return this.values;
		}
		public void setValues(Vector values) {
			this.values = values;
		}
		public void setValueInfo(String value, int noOfOccurrences) {
			Iterator it = this.values.iterator();
			ValueInfo toRemove = null;
			while (it.hasNext()) {
				ValueInfo vi = (ValueInfo) it.next();
				if (vi.getTheValue().equals(value)) {
					if (noOfOccurrences > 0) {
						vi.setNumberOfOccurrences(noOfOccurrences);
					}
					else if (noOfOccurrences == 0) {
						toRemove = vi;
					}
				}
			}
			if (toRemove != null) {
				this.values.remove(toRemove);
			}
		}
		public void addValueInfo(String value, int noOfOccurrences) {
			// first check if this value is not yet present:
			Iterator it = this.values.iterator();
			while (it.hasNext()) {
				ValueInfo vi = (ValueInfo) it.next();
				if (vi.getTheValue().equals(value)) {
					vi.setNumberOfOccurrences(noOfOccurrences);
					return;
				}
			}
			ValueInfo vi = new ValueInfo();
			vi.setTheValue(value);
			vi.setNumberOfOccurrences(noOfOccurrences);
			this.values.add(vi);
		}
		public AttributeInfo copy() {
			AttributeInfo copy = new AttributeInfo();
			copy.setMaximum(this.getMaximum());
			copy.setMinimum(this.getMinimum());
			copy.setNumberOfMissingValues(this.getNumberOfMissingValues());
			Vector copyOfValues = new Vector();
			Iterator it = this.getValues().iterator();
			while (it.hasNext()) {
				ValueInfo vi = (ValueInfo) it.next();
				ValueInfo copyVi = vi.copy();
				copyOfValues.add(copyVi);
			}
			copy.setValues(copyOfValues);
			return copy;
		}
	}
	
	/* Objects of this class store information about a single value of
	 * a single attribute.
	 */ 
	private class ValueInfo {

		private String theValue;
		private int numberOfOccurrences;
		
		public ValueInfo() {
			this.theValue = VALUE_DISCRETE_UNKNOWN;
			this.numberOfOccurrences = VALUE_INT_UNKNOWN;
		}
		
		public int getNumberOfOccurrences() {
			return numberOfOccurrences;
		}
		public void setNumberOfOccurrences(int numberOfOccurrences) {
			this.numberOfOccurrences = numberOfOccurrences;
		}
		public String getTheValue() {
			return theValue;
		}
		public void setTheValue(String theValue) {
			// do some cleaning:
			theValue = theValue.trim();
			try {
				Double.parseDouble(theValue);
				// if this works, see if too many 0s are there:
				while (theValue.indexOf(".") > -1 && theValue.endsWith("0")) {
					theValue = theValue.substring(0, theValue.length()-1);
				}
				if (theValue.endsWith(".")) {
					theValue = theValue.substring(0, theValue.length()-1);
				}
			}
			catch (NumberFormatException e) {
				// not a numeric value, ok
			}
			this.theValue = theValue;
		}
		public ValueInfo copy() {
			ValueInfo copy = new ValueInfo();
			copy.setNumberOfOccurrences(this.getNumberOfOccurrences());
			copy.setTheValue(this.getTheValue());
			return copy;
		}
	}
}
