/**
 * [DStream.java]
 * 
 * Reference: Chen et al., "Density-Based Clustering for Real-Time Stream Data", KDD 2007
 * 
 * @author Yunsu Kim
 * Data Management and Data Exploration Group, RWTH Aachen University
 */
package moa.clusterers.dstream;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;

import moa.cluster.Clustering;
import moa.clusterers.AbstractClusterer;
import moa.core.Measurement;
import moa.options.FloatOption;
import weka.core.Instance;

public class DStream extends AbstractClusterer {

	private static final long serialVersionUID = 1L;
	
	private static boolean debug = true;

	public FloatOption lambdaOption = new FloatOption("lambda", 'l',
			"Decay factor", 0.998, 0, 1);
	public FloatOption denseControlOption = new FloatOption("denseControl", 'd',
			"Controlling parameter for dense grid threshold", 3.0, 1, Double.MAX_VALUE);
	public FloatOption sparseControlOption = new FloatOption("sparseControl", 's',
			"Controlling parameter for sparse grid threshold", 0.8, 0, 1);
	public FloatOption betaOption = new FloatOption("beta", 'b',
			"Controlling parameter for cool time of removed grids", 0.3, 0, Double.MAX_VALUE);
	public FloatOption gridSizeOption = new FloatOption("gridSize", 'g',
			"Interval length of a grid for each dimension" +
			"(assuming that the instances are normalized to [0,1] in advance)", 0.05, 0, 1);
	
	protected long tc;
	protected long gap;
	protected double lambda;
	
	protected double Cm, Cl;
	protected int d;
	protected double len;
	protected int p, N;
	
	protected HashMap<int[], Grid> grid_list;
	protected HashMap<int[], Long> tm_list;
	protected HashMap<Double, GridCluster> cluster_list;
	protected double classLabelCnt = 0;
	
	protected boolean initialized;


	@Override
	public void resetLearningImpl() {
		lambda = lambdaOption.getValue();
		Cm = denseControlOption.getValue();
		Cl = sparseControlOption.getValue();
		len = gridSizeOption.getValue();
		
		tc = 0;
		grid_list = new HashMap<int[], Grid>();
		tm_list = new HashMap<int[], Long>();
		cluster_list = new HashMap<Double, GridCluster>();
		
		initialized = false;
		
		if (debug) {
			System.out.println("----- DEBUG: DStream:resetLearningImpl() -----\n" +
							   "lambda = " + lambda + "\n" +
							   "Cm = " + Cm + "\n" +
							   "Cl = " + Cl + "\n" +
							   "len = " + len + "\n");			
		}
	}

	@Override
	public void trainOnInstanceImpl(Instance inst) {
		if (!initialized) {
			d = inst.numAttributes();
			
			p = (int)Math.ceil(1.0 / len);
			N = p * d;

			if (N <= Cm) {
				throw new RuntimeException("DStream.resetLearningImpl(): N > Cm should be fulfilled");
			}
			
			gap = (int) Math.floor(log(lambda, Math.max(Cl / Cm, ((double)N - Cm) / ((double)N - Cl))));
			if (gap <= 0) {
				throw new RuntimeException("DStream.resetLearningImpl(): gap <= 0");
			}
			
			Grid.setStatics(lambda, N, d, Cm, Cl);
			initialized = true;
			
			if (debug) {
				System.out.println("----- DEBUG: DStream:trainOnInstanceImpl() (Initialization) -----\n" +
								   "d = " + d + "\n" +
								   "p = " + p + "\n" +
								   "N = " + N + "\n" +
								   "gap = " + gap + "\n");			
			}
		}
		
		// Determine the density grid g that contains x
		int[] gridPos = new int[d];
		for (int i = 0; i < d; i++) {
			gridPos[i] = Grid.POSITION_NULL;	// Initial value
			double value_i = inst.value(i);
			
			for (int j = 0; j < p; j++) {
				if (value_i <= 0 + len * (j + 1)) {
					gridPos[i] = j;
					break;
				}
			}
			
			if (gridPos[i] == Grid.POSITION_NULL) {
				throw new RuntimeException("DStream.trainOnInstanceImpl(): the instance cannot be assigned to any grid "
										 + "(check if it's normalized to [0,1])");
			}
		}
		
		// Get the grid g
		Grid g = grid_list.get(gridPos);
		if (g == null) {
			Long tm = tm_list.get(gridPos);
			if (tm != null) {
				g = new Grid(gridPos, len, tc, tm.longValue());		// Previously deleted
			} else {
				g = new Grid(gridPos, len, tc);						// Totally new
			}
			
			grid_list.put(gridPos, g);
		}
		
		// Update g
		g.mapNewRecord(tc);
		
		// Initial point
		if (tc == gap) {
			initial_clustering();
		}
		
		// Interval point
		if (tc % gap == 0) {
			// Remove sporadic grids
			List<int[]> markedToRemove = new ArrayList<int[]>();
			for (int[] pos : grid_list.keySet()) {
				Grid grid = grid_list.get(pos);
				if (grid.isMarkedAsSporadic()) {
					if (grid.getLastDensityUpdateTime() <= tc - gap) {
						markedToRemove.add(pos);		// Mark to remove
					}
				}
			}
			
			for (int[] pos : markedToRemove) {
				Grid gridToRemove = grid_list.get(pos);
				grid_list.remove(pos);					// Remove from grid_list
				
				double label_grid = gridToRemove.getLabel();
				if (label_grid != Grid.NO_CLASS) {		// Remove from its cluster
					cluster_list.get(label_grid).removeGrid(gridToRemove);
				}
				
				tm_list.put(pos, tc);		// Record its removal time				
			}
			
			System.gc();	// Suggested memory free point
			
			// Detect sporadic grids
			for (int[] pos : grid_list.keySet()) {
				grid_list.get(pos).updateStatus(tc);
			}
			
			if (debug) {
				System.out.println("----- DEBUG: DStream (gap " + (tc / gap) + "): " +
								   markedToRemove.size() + " grids removed, ");			
			}
			
			adjust_clustering();
		}
		
		tc++;
	}
	
	@Override
	public Clustering getClusteringResult() {
		Collection<GridCluster> clusterCollection = cluster_list.values();

		if (debug) {
			System.out.println("----- DEBUG: DStream:getClusteringResult() -----\n" +
							   "number of clusters = " + clusterCollection.size() + "\n");			
		}

		if (clusterCollection.isEmpty()) {
			return new Clustering();
		} else {
			GridCluster[] clusterArray = (GridCluster[]) clusterCollection.toArray();
			return new Clustering(clusterArray);
		}
	}
	
	
	/** Clustering **/
	protected void initial_clustering() {
		for (Grid g : grid_list.values()) {
			g.updateDensity(tc);		// Update the density of all grids in grid_list
			
			if (g.isDenseGrid()) {		// Assign each dense grid to a distinct cluster
				cluster_list.put(classLabelCnt, new GridCluster(g, classLabelCnt));
				classLabelCnt++;
			} else {					// Label all other grids as NO_CLASS
				g.setNoClass();
			}
		}
				
		boolean clusterChange = false;
		do {
			for (GridCluster c : cluster_list.values()) {
				for (Grid g : c.getGrids()) {
					if (g.isOutsideGridIn(c)) {
						double label_g = g.getLabel();
						for (Grid h : grid_list.values()) {
							double label_h = h.getLabel();
							if (h.isNeighboringGridOf(g) && label_h != label_g) {
								if (label_h != Grid.NO_CLASS) {
									GridCluster c_ = cluster_list.get(label_h);	
									if (c.size() > c_.size()) {
										c.mergeCluster(c_);
										// Concurrent problem: remove clusters from cluster_list
									} else {
										c_.mergeCluster(c);
									}
									clusterChange = true;	
								} else if (h.isTransitionalGrid()) {
									c.addGrid(h);
									clusterChange = true;
								}
							}
						}
					}
				}
			}
		} while (clusterChange);
	}
	
	protected void adjust_clustering() {
		
		for (Grid g : grid_list.values()) {
			g.updateDensity(tc);		// Update the density of all grids in grid_list
			
			// Foreach grid g whose attribute is changed
			// since last call to adjust_clustering()
			if (g.loadSavedLevel() != g.currentLevel()) {
				if (g.isSparseGrid()) {
					GridCluster c = cluster_list.get(g.getLabel());
					if (c != null) {
						c.removeGrid(g);
						List<Grid> splitGroup = c.getSplitGroup();
						
						if (splitGroup != null) {
							c.removeGrids(splitGroup);
							cluster_list.put(classLabelCnt, new GridCluster(splitGroup, classLabelCnt));
							classLabelCnt++;
						}
					}
				} else if (g.isDenseGrid()) {
					double label_g = g.getLabel();
					GridCluster c = cluster_list.get(label_g);
					
					// Among all neighboring grids of g,
					// find out the grid h whose cluster ch has the largest size
					Grid h = null;
					GridCluster ch = null;
					for (Grid h_ : grid_list.values()) {
						double label_h = h_.getLabel();
						if (h_.isNeighboringGridOf(g) && label_h != label_g && label_h != Grid.NO_CLASS) {
							GridCluster ch_ = cluster_list.get(label_h);
							if (ch == null) {
								h = h_;
								ch = ch_;
							} else if (ch_.size() > ch.size()) {
								h = h_;
								ch = ch_;
							}
						}
					}
					
					if (ch != null) {
						if (h.isDenseGrid()) {
							if (label_g == Grid.NO_CLASS) {
								
							} else if (c.size() > ch.size()) {
								c.mergeCluster(ch);
							} else {
								ch.mergeCluster(c);
							}
						} else if (h.isTransitionalGrid()) {
							if (label_g == Grid.NO_CLASS) {
								c.removeGrid(g);
								ch.addGrid(g);		// Temporarily assign g to ch
								if (!h.isOutsideGridIn(ch)) {
									ch.removeGrid(g);
									c.addGrid(g);	// Roll-back
								}
							} else if (c.size() >= ch.size()) {
								ch.removeGrid(h);
								c.addGrid(h);
							}
						}
					}
					
				} else if (g.isTransitionalGrid()) {
					GridCluster c_ = null;		// Largest neighboring cluster
					for (GridCluster gc : cluster_list.values()) {
						if (gc.isNeighboringClusterOf(g)) {
							if (c_ == null) {
								c_ = gc;
							} else if (gc.size() > c_.size()) {
								c_ = gc;
							}
						}
					}
					
					if (c_ != null) {
						c_.addGrid(g); 			// Temporarily add
						if (!g.isOutsideGridIn(c_)) {
							c_.removeGrid(g);	// Roll-back
						}
					}
				}
				
				// For the next call to adjust_clustering()
				g.saveLevel();
			}
		}
	}
	

	/** Other overrided functions **/
	
	@Override
	public boolean isRandomizable() {
		return false;
	}

	@Override
	public double[] getVotesForInstance(Instance inst) {
		return null;
	}
	
	@Override
	protected Measurement[] getModelMeasurementsImpl() {
		return null;
	}

	@Override
	public void getModelDescription(StringBuilder out, int indent) {

	}
	
	
	/** Misc **/
	
	protected double log(double base, double num) {
		return Math.log(num) / Math.log(base);
	}
}
