/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.schemamatching;

import java.util.Collection;
import java.util.Iterator;
import java.util.Vector;

import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Concept;
import edu.udo.cs.miningmart.m4.ParameterObject;
import edu.udo.cs.miningmart.m4.Relation;
import edu.udo.cs.miningmart.m4.Value;
import edu.udo.cs.miningmart.m4.core.ConceptualDatatypes;

/**
 * Abstract superclass of schema matching methods in the
 * MiningMart environment.
 * 
 * Whenever a concrete subclass of this subclass is implemented,
 * you can add a name representing its matcher to the String array
 * edu.udo.cs.miningmart.m4.M4Interface.AVAILABLE_MATCHERS,
 * and add a call to an instantiation of that class to the method 
 * edu.udo.cs.miningmart.m4.core.M4InterfaceImpl.findConnection(...)
 * at the place indicated there. Then the matcher implemented by your
 * subclass is automatically available in the GUI.
 * 
 * @author Timm Euler
 * @version $Id: MmSchemaMatcher.java,v 1.10 2006/09/27 15:00:03 euler Exp $
 */
public abstract class MmSchemaMatcher {

	protected static final double THRESHOLD = 0.5d;
	
	/**
	 * Returns the double value that this matcher considers as
	 * the similarity between ba1 and ba2, based only on information
	 * related to these BaseAttributes.
	 *  
	 * @param ba1 One BaseAttribute
	 * @param ba2 Another BaseAttribute
	 * @return a similarity value between 0 and 1
	 * @throws SchemaMatchException
	 */
	public abstract double getDirectSimilarity(BaseAttribute ba1, BaseAttribute ba2) throws SchemaMatchException;

	/**
	 * Returns the double value that this matcher considers as
	 * the similarity between con1 and con2, based only on the 
	 * information in these concepts but not on dependent attributes
	 * (like BaseAttributes).	 
	 *  
	 * @param con1 One Concept
	 * @param con2 Another Concept
	 * @return a similarity value between 0 and 1
	 * @throws SchemaMatchException
	 */
	public abstract double getDirectSimilarity(Concept con1, Concept con2) throws SchemaMatchException;

	/**
	 * Returns the double value that this matcher considers as
	 * the similarity between ba1 and ba2, taking the similarity
	 * of their respective concepts into accounts as well.
	 *  
	 * @param ba1 One BaseAttribute
	 * @param ba2 Another BaseAttribute
	 * @return a similarity value between 0 and 1
	 * @throws SchemaMatchException
	 */
	public abstract double getSimilarityInvolvingConcepts(BaseAttribute ba1, BaseAttribute ba2) throws SchemaMatchException;
	
	/**
	 * Returns the similarity based on the conceptual data types of the
	 * given BaseAttributes
	 * @param ba1 One BaseAttribute
	 * @param ba2 Another BaseAttribute
	 * @return a similarity value between 0 and 1
	 * @throws SchemaMatchException
	 */
	public double getDatatypeSimilarity(BaseAttribute ba1, BaseAttribute ba2) 
	throws SchemaMatchException {
		if (ba1 == null || ba2 == null)
			return 0d;
		try {
			String type1 = ba1.getConceptualDataTypeName();
			String type2 = ba2.getConceptualDataTypeName();
			if (type1.equalsIgnoreCase(type2)) {
				return 1d;
			}
			if ( ConceptualDatatypes.checkDatatypeCompatibility(type1, type2)
				 ||	
				 ConceptualDatatypes.checkDatatypeCompatibility(type2, type1)) {
				return 0.9d;
			}
			return 0d;
		}
		catch (M4Exception m4e) {
			throw new SchemaMatchException("M4 error matching data types of BaseAttributes '" +
					ba1.getName() + "' and '" + ba2.getName() + "': " + m4e.getMessage());
		}
	}
	
	/**
	 * Returns the double value that this matcher considers as
	 * the similarity between the two given Strings.
	 *  
	 * @param str1 One String
	 * @param str2 Another String
	 * @return a similarity value
	 * @throws SchemaMatchException
	 */
	public abstract double getNameSimilarity(String str1, String str2) throws SchemaMatchException;

	
	/**
	 * Returns the double value that this matcher considers as
	 * the similarity between the two given Strings.
	 *  
	 * @param str1 One String
	 * @param str2 Another String
	 * @return a similarity value
	 * @throws SchemaMatchException
	 */
	public abstract double getStringSimilarity(String str1, String str2) throws SchemaMatchException;
	
	/**
	 * Returns the similarity between the given relationships, which
	 * is not computed based on their names but on the similarity
	 * of the concepts they link.
	 * @param rel1 a relation
	 * @param rel2 another relation
	 * @return a similarity value between 0 and 1
	 * @throws SchemaMatchException
	 */
	public double getSimilarity(Relation rel1, Relation rel2) throws SchemaMatchException {
		if (rel1 == null || rel2 == null)
			return 0d;
		try {
			Concept from1 = rel1.getTheFromConcept();
			Concept to1 = rel1.getTheToConcept();
			Concept from2 = rel2.getTheFromConcept();
			Concept to2 = rel2.getTheToConcept();
			double fromSimilarity = this.getSimilarity(from1, from2);
			double toSimilarity = this.getSimilarity(to1, to2);
			double firstSim = (fromSimilarity + toSimilarity) / 2;
			
			if (rel1.isOneToManyRelation() && rel2.isOneToManyRelation()) {
				return firstSim;
			}
			
			fromSimilarity = this.getSimilarity(from1, to2);
			toSimilarity = this.getSimilarity(from2, to1);
			double secondSim = (fromSimilarity + toSimilarity) / 2;
			double bestSim = Math.max(firstSim, secondSim);
			
			if (rel1.isManyToManyRelation() && rel2.isManyToManyRelation()) {
				return bestSim;
			}
			else {
				return bestSim * 0.6; // malus for diffence in cardinalities
			}
		}
		catch (M4Exception m4e) {
			throw new SchemaMatchException("M4 error matching Relations '" +
					rel1.getName() + "' and '" + rel2.getName() + "': " + m4e.getMessage());
		}
	}
	
	/**
	 * Returns the global (overall) similarity of the two Concepts,
	 * considering any indirect or direct attributes, for example their
	 * Features.
	 * 
	 * @param con1 A concept
	 * @param con2 Another concept
	 * @return a similarity value
	 * @throws SchemaMatchException
	 */
	public double getSimilarity(Concept con1, Concept con2) throws SchemaMatchException {
		if (con1 == null || con2 == null)
			return 0d;
		try {
			boolean conceptNamesMatch = false;
			double conceptSimilarity = this.getDirectSimilarity(con1, con2);
			conceptNamesMatch = (conceptSimilarity >= THRESHOLD);
			
			// calculate the ratio of the number of features that match
			// to the number of features that can match:
			Collection<BaseAttribute> basOfCon1 = con1.getAllBaseAttributes();
			Collection<BaseAttribute> basOfCon2 = con2.getAllBaseAttributes();
			
			MatchingResult[][] baMatrix = this.getSimilarityMatrix(basOfCon1, basOfCon2);
			Collection baMapping = this.getSimilarMatchingsGreedy(baMatrix, true);
			
			// int maximumNumberOfMatches = Math.min(basOfCon1.size(), basOfCon2.size());
			// int countOfMatchingFeatures = baMapping.size();
			/*
			int maximumNumberOfMatches = basOfCon1.size() * basOfCon2.size();
			int countOfMatchingFeatures = 0;
			Iterator it1 = con1.getAllBaseAttributes().iterator();
			while (it1.hasNext()) {
				BaseAttribute myBa1 = (BaseAttribute) it1.next();
				Iterator it2 = con2.getAllBaseAttributes().iterator();
				while (it2.hasNext()) {
					BaseAttribute myBa2 = (BaseAttribute) it2.next();
					if (this.objectsAreSimilar(myBa1, myBa2)) {
						countOfMatchingFeatures++;
					}
				}
			}
			*/
			// paranoia check:
			// if (countOfMatchingFeatures > maximumNumberOfMatches) {
			// 	throw new M4Exception("MmSchemaMatcher.getSimilarity: found more matches than possible!");
			// }
			// double ratio = ((double) countOfMatchingFeatures) / ((double) maximumNumberOfMatches);
			
			double baBasedSim = this.getGlobalSimilarity(basOfCon1, basOfCon2, baMapping);
			
			if ( ! conceptNamesMatch) {
				return (0.75 * baBasedSim);
			}
			return baBasedSim;
		}
		catch (M4Exception m4e) {
			throw new SchemaMatchException("MmSchemaMatcher.getSimilarity(Concept, Concept): M4Exception caught: " + m4e.getMessage());
		}
	}
	
	/**
	 * Computes a global similarity from a matrix of local similarities.
	 * This is applicable for BaseAttributes (local) in Concepts (global),
	 * or for Concepts (local) in a data model (global).
	 * 
	 * @param singleObjects1
	 * @param singleObjects2
	 * @param singleMappings
	 * @return a similarity value between 0 and 1
	 * @throws SchemaMatchException
	 */
	public <U extends ParameterObject> double getGlobalSimilarity(
			Collection<U> singleObjects1,
			Collection<U> singleObjects2,
			Collection<MatchingResult<U>> singleMappings)
	throws SchemaMatchException {

		if (singleObjects1 == null || singleObjects2 == null || singleMappings == null)
			return 0d;
		
		// very simple approach; compute the sum of similarities of each concept
		// mapping, divided by the number of concepts that could 
		// have been matched:
		int numberOfPossibleMatches = Math.min(singleObjects1.size(), singleObjects2.size());
		int differenceInNoOfObjects = Math.abs(singleObjects1.size() - singleObjects2.size());
		if (singleMappings.size() > numberOfPossibleMatches) {
			// this would possibly result in a similarity value larger than 1
			throw new SchemaMatchException("MmSchemaMatcher.getGlobalSimilarity: got more mappings than single objects!");
		}
		double sum = 0d;
		for (MatchingResult<U> singleMapping : singleMappings) {
			sum += singleMapping.getSimilarity();
		}
		double globalSim = sum / numberOfPossibleMatches;
		
		// we give a malus for differing numbers of single objects,
		// so that in a global search even better-matching collections of
		// single objects may be found:
		if (differenceInNoOfObjects == 0)
			return globalSim;
		else {
			double penaltyFactor = Math.pow(0.95, (double) differenceInNoOfObjects);
			return globalSim * penaltyFactor;
		}
	}
	
	/**
	 * Returns a similarity matrix between the objects of the given collection.
	 * The only object types allowed are BaseAttributes, Concepts and Relations.
	 * 
	 * @param oneDimension one collection
	 * @param secondDimension second collection
	 * @param theMatcherToUse the matcher that computes the similarity between
	 *  an object of the first and an object of the second collection
	 * @return a two-dimensional array of MatchingResult objects
	 * @throws SchemaMatchException
	 */
	public <U extends ParameterObject> MatchingResult<U>[][] getSimilarityMatrix(
			Collection<U> oneDimension,
			Collection<U> secondDimension)
	throws SchemaMatchException {
		if (oneDimension == null || secondDimension == null) 
			return null;
		
		int n = oneDimension.size();
		int m = secondDimension.size();
		MatchingResult<U>[][] result = new MatchingResult[n][m];
		Iterator it1 = oneDimension.iterator();
		int n_index = 0;
		int m_index = 0;
		while (it1.hasNext()) {
			U myObj1 = (U) it1.next();
			Iterator it2 = secondDimension.iterator();
			m_index = 0;
			while (it2.hasNext()) {
				U myObj2 = (U) it2.next();
				result[n_index][m_index] = new MatchingResult<U>();
				result[n_index][m_index].setObjectOfFirstSchema(myObj1);
				result[n_index][m_index].setObjectOfSecondSchema(myObj2);
				double sim = Double.NaN;
				if (myObj1 instanceof BaseAttribute && myObj2 instanceof BaseAttribute) {
					// this method involves name and type 
					// if the matcher is instanceof NameAndTypeBasedSchemaMatcher:
					sim = this.getDirectSimilarity((BaseAttribute) myObj1, (BaseAttribute) myObj2);
				}
				else if (myObj1 instanceof Concept && myObj2 instanceof Concept) {
					sim = this.getSimilarity((Concept) myObj1, (Concept) myObj2);
				}
				else if (myObj1 instanceof Relation && myObj2 instanceof Relation) {
					sim = this.getSimilarity((Relation) myObj1, (Relation) myObj2);
				}
				else if (myObj1 instanceof Value && myObj2 instanceof Value) {
					String name1 = ((Value) myObj1).getValue();
					String name2 = ((Value) myObj2).getValue();
					if (name1 != null && name2 != null)
						sim = this.getStringSimilarity(name1, name2);
				}
				else throw new SchemaMatchException("Error trying to create a similarity matrix: unknown or uncompatible objects!");
				result[n_index][m_index].setSimilarity(sim);
				m_index++;
			}
			n_index++;
		}
		return result;
	}
	
	/**
	 * Returns a matrix (instance of class edu.udo.miningmart.schemamatching.MatchingResult)
	 * of n x m Dimension, where n and m are the number of features of each of the
	 * given concepts. An entry of the matrix represents the direct similarity
	 * that this matcher gives comparing the first concept's feature to the
	 * second concept's feature.
	 *   
	 * @param con1 A concept
	 * @param con2 Another concept
	 * @return An two-dimensional array of <code>MatchingResult</code>s (direct similarity matrix)
	 * @throws SchemaMatchException
	 */
	public MatchingResult<BaseAttribute>[][] getSimilarityMatrix(Concept con1, Concept con2) 
	throws SchemaMatchException {
		if (con1 == null || con2 == null)
			return null;
		try {
			Collection<BaseAttribute> coll1 = con1.getAllBaseAttributes();
			Collection<BaseAttribute> coll2 = con2.getAllBaseAttributes();
			
			// The following lines are the result of some strange compiler problem,
			// the direct way works under linux but not solaris:
			MatchingResult[][] mr = this.getSimilarityMatrix(coll1, coll2);
			MatchingResult<BaseAttribute>[][] mr2 = mr; 
			return mr2;
		}
		catch (M4Exception m4e) {
			throw new SchemaMatchException("MmSchemaMatcher.getSimilarityMatrix(Concept, Concept): M4Exception caught: " + m4e.getMessage());
		}
	}
	
	/**
	 * Returns TRUE if this matcher thinks the two given objects are similar.
	 * They are never similar if they are not of the same type. The allowed
	 * types are BaseAttribute, Concept and Relation.
	 * 
	 * @param obj1 one object
	 * @param obj2 another object
	 * @return true if the objects are similar, false otherwise
	 * @throws SchemaMatchException
	 */
	public boolean objectsAreSimilar(ParameterObject obj1, ParameterObject obj2)
	throws SchemaMatchException {

		if (obj1 == null || obj2 == null)
			return false;
		
		double sim;
		if (obj1 instanceof BaseAttribute && obj2 instanceof BaseAttribute) {
			// this method involves name and type 
			// if the matcher is instanceof NameAndTypeBasedSchemaMatcher:
			sim = this.getDirectSimilarity((BaseAttribute) obj1, (BaseAttribute) obj2);
		}
		else if (obj1 instanceof Concept && obj2 instanceof Concept) {
			sim = this.getSimilarity((Concept) obj1, (Concept) obj2);
		}
		else if (obj1 instanceof Relation && obj2 instanceof Relation) {
			sim = this.getSimilarity((Relation) obj1, (Relation) obj2);
		}
		else return false;
		return (sim >= THRESHOLD);
	}
	
	/**
	 * Returns all matchings from the given matrix (that exceed the
	 * threshold similarity, if that parameter is TRUE).
	 * 
	 * @param similarityMatrix a similarity matrix
	 * @return a collection of MatchingResults
	 * @throws SchemaMatchException
	 */
	public <U extends ParameterObject> Collection<MatchingResult<U>> getSimilarMatchingsGreedy(
			MatchingResult<U>[][] similarityMatrix,
			boolean useThreshold) {
		
		if (similarityMatrix == null)
			return null;
		
		Collection<MatchingResult<U>> ret = new Vector<MatchingResult<U>>();
		
		// first find the best match of any element to any other
		// element, then remove the corresponding part from the 
		// matrix and continue. A greedy algo.
		while (similarityMatrix.length > 0 && similarityMatrix[0].length > 0) {
			// find best row and column:
			double bestOfAllMatchingSimilarities = -1.0;
			int bestRow = 0;
			int bestColumn = 0;
			int currentNoOfRows = similarityMatrix.length;
			int noOfColumns = similarityMatrix[0].length;
			for (int n = 0; n < similarityMatrix.length; n++) {
				for (int m = 0; m < similarityMatrix[n].length; m++) {
					if (similarityMatrix[n][m].getSimilarity() >= bestOfAllMatchingSimilarities) {
						bestOfAllMatchingSimilarities = similarityMatrix[n][m].getSimilarity();
						bestRow = n;
						bestColumn = m;
					}
				}	
			}
			
			// do the match:
			MatchingResult<U> result = similarityMatrix[bestRow][bestColumn];
			if ( (result.getSimilarity() >= THRESHOLD)
					||
				  ( ! useThreshold))
				ret.add(result);
				
			// if the threshold is used but not exceeded, it won't be exceeded later:
			if (useThreshold && result.getSimilarity() < THRESHOLD) {
				return ret;
			}
			
			// remove this match from the matrix:
			MatchingResult<U>[][] newMatrix = new MatchingResult[currentNoOfRows - 1][noOfColumns - 1];
			int rowIndexForNewMatrix = 0;
			for (int n = 0; n < similarityMatrix.length; n++) {
				int colIndexForNewMatrix = 0;
				if (n != bestRow) {
					for (int m = 0; m < similarityMatrix[n].length; m++) {
						if (m != bestColumn) {
							newMatrix[rowIndexForNewMatrix][colIndexForNewMatrix] = similarityMatrix[n][m];
							colIndexForNewMatrix++;
						}
					}
					rowIndexForNewMatrix++;
				}
			}
			similarityMatrix = newMatrix;			
		}
		return ret;
	}
}
/*
 * $Log: MmSchemaMatcher.java,v $
 * Revision 1.10  2006/09/27 15:00:03  euler
 * New version 1.1
 *
 * Revision 1.9  2006/04/11 14:10:17  euler
 * Updated license text.
 *
 * Revision 1.8  2006/04/06 16:31:16  euler
 * Prepended license remark.
 *
 * Revision 1.7  2006/03/02 16:49:59  euler
 * Many bugfixes
 *
 * Revision 1.6  2006/01/03 16:19:09  euler
 * Bugfixes
 *
 * Revision 1.5  2006/01/03 13:57:28  euler
 * Changed hierarchy of matchers, added Editdistance matcher.
 *
 * Revision 1.4  2006/01/03 13:10:49  euler
 * Added a comment.
 *
 * Revision 1.3  2006/01/03 10:55:53  euler
 * Fixed wrong imports.
 *
 * Revision 1.2  2006/01/03 10:44:20  euler
 * Bugfix
 *
 * Revision 1.1  2006/01/03 09:54:33  hakenjos
 * Initial version!
 *
 */
