/*
 * File:        icm.cpp 
 * Author:      Bram Kuijvenhoven (bkuijvenhoven@student.tudelft.nl)
 * Date:        2005/04/09 [yyyy/mm/dd]
 * Description: ICM solver
 */

#include "icm.h"

#define SQR(x)   ((x)*(x))
#define MIN(x,y) (((x)<(y))?(x):(y))

CICMSolver::CICMSolver() {
	fOutput = &cout;
	fMaxIterations = 100;
	fEpsilon = fAccuracy = 1e-3;
	fFullOutput = false;
}

CICMSolver::~CICMSolver() {
}

void CICMSolver::BeforeSolve() {
}

void CICMSolver::AfterSolve() {
}

double **CICMSolver::AllocDist() {

	double **dist = new double*[fNumDists];
	for (int k = 0; k < fNumDists; k++)
		dist[k] = new double[fDistSize[k]];
	return dist;

}

void CICMSolver::FreeDist(double **dist) {
	
	for (int k = 0; k < fNumDists; k++)
		delete dist[k];
	delete dist;

}

void CICMSolver::Solve(double **dist) {

	cerr << "CICMSolver::Solve(...) entered" << endl;

	int k, i, j, iteration;

	double **grad, **hdiag, **newDist, **diffDist, **optDist, **tempGrad;
	int      maxDistSize;
	double  *G,*V;
	int     *hull;
	double **originalDist;
	double  *alphas, *phis, *stepSizes; // only used when fFullOutput is enabled

	// alloc grad, hdiag, new/diff/optDist, G, V; determine maxDistSize
	grad     = new double *[fNumDists];
	hdiag    = new double *[fNumDists];
	newDist  = new double *[fNumDists];
	diffDist = new double *[fNumDists];
	optDist  = new double *[fNumDists];
	tempGrad = new double *[fNumDists];
	maxDistSize = 0;
	for (k = 0; k < fNumDists; k++) {
		grad[k]     = new double[fDistSize[k]];
		hdiag[k]    = new double[fDistSize[k]];
		newDist[k]  = new double[fDistSize[k]];
		diffDist[k] = new double[fDistSize[k]];
		optDist[k]  = new double[fDistSize[k]];
		tempGrad[k] = new double[fDistSize[k]];
		if (fDistSize[k] > maxDistSize) maxDistSize = fDistSize[k];
	}
	G = new double[maxDistSize+1];
	V = new double[maxDistSize+1];
	G[0] = 0.0;
	V[0] = 0.0;
	hull = new int[maxDistSize+2];

	// save dist because we might swap dist and optDist an odd number of times and need to distinguish them at the end
	originalDist = dist;

	// storage space for alphas and phis
	if (fFullOutput) {
		alphas    = new double[fMaxIterations];
		phis      = new double[fMaxIterations];
		stepSizes = new double[fMaxIterations];
	} else {
		alphas = NULL; phis = NULL; stepSizes = NULL;
	}

	// starting point
	InitialEstimate(dist);

	// the ICM iterations	
	for (iteration = 0; iteration < fMaxIterations; iteration++) {

		cerr << "Iteration " << iteration;
		if (fFullOutput) {
			double phi = Phi(dist);
			phis[iteration] = phi;
			cerr << "; phi = " << phi;
		}
		
		GradPhi(dist, grad);

		if (FenchelOptimality(dist, grad)) {
			iteration++;
			cerr << endl;
			break;
		}
		
		HessianDiagPhi(dist, hdiag);

		// minimize the approximation of phi using grad and hdiag only
		for (k = 0; k < fNumDists; k++) {
			// calculate the coordinates of the cumulative sum diagram, with coordinates (G[i], V[i])
			for (i = 0; i < fDistSize[k]; i++) {
				G[i+1] = G[i] + hdiag[k][i]; 
				V[i+1] = V[i] + dist[k][i]*hdiag[k][i] - grad[k][i];
			}
			// calculate the left derivatives of the greatest convex minorant
			GreatestConvexMinorant(G, V, fDistSize[k], hull, newDist[k]);
			// fix negative elements to zero
			for (i = 0; i < fDistSize[k]; i++) {
				if (newDist[k][i] < 0.0)
					newDist[k][i] = 0.0;
			}

			// GCM information output
			if (fFullOutput) {
				*fOutput << "iteration(" << (iteration+1) << ").GCM(" << (k+1) << ")::" << endl;
				*fOutput << setw(14) << "G" << setw(14) << "V" << setw(8) << "inHull" << endl;
				for (i = 0; i <= fDistSize[k]; i++) {
					bool inHull = false;
					for (j = 0; hull[j] >= 0; j++)
						if (hull[j] == i) {
							inHull = true;
							break;
						}
					*fOutput << " " << setw(13) << G[i] << " " << setw(13) << V[i] << " " << setw(7) << inHull << endl;
				}
			}
		}
		
		// perform a line search
		double alpha = LineSearch(dist, newDist, diffDist, tempGrad, optDist);
		if (fFullOutput) alphas[iteration] = alpha;

		// dist information output
		if (fFullOutput) {
			for (k = 0; k < fNumDists; k++) {
				*fOutput << "iteration(" << (iteration+1) << ").dists(" << (k+1) << ")::" << endl;
				*fOutput << setw(14) << "start" << setw(14) << "new" << setw(14) << "optimal" << setw(14) << "grad" << setw(14) << "hdiag" << endl;
				for (i = 0; i < fDistSize[k]; i++) {
					*fOutput << " " << setw(13) << dist[k][i] << " " << setw(13) << newDist[k][i] << " " << setw(13) << optDist[k][i] << " " << setw(13) << grad[k][i] << " " << setw(13) << hdiag[k][i] << endl;
				}
			}
			// calc stepSize
			double stepSize = 0.0;
			for (k = 0; k < fNumDists; k++)
				for(i = 0; i < fDistSize[k]; i++)
					stepSize += SQR(optDist[k][i] - dist[k][i]);
			stepSize = sqrt(stepSize);
			stepSizes[iteration] = stepSize;
		}

		cerr << endl;

		// swap dist and optDist
		swap<double **>(dist, optDist);
	}

	// correct dist and optDist if they are swapped due to an odd number of iterations
	if (originalDist != dist) {
		swap<double **>(dist, optDist); // swap them one again
		// copy optDist to dist
		for (k = 0; k < fNumDists; k++)
			for (i = 0; i < fDistSize[k]; i++)
				dist[k][i] = optDist[k][i];
	}

	// write alpha, phi, stepSize
	if (fFullOutput) {
		*fOutput << "conv::" << endl;
		*fOutput << setw(8) << "phi" << setw(12) << "alpha" << setw(12) << "stepSize" << endl;
		for (i = 0; i < iteration; i++) {
			*fOutput << setw(8) << phis[i] << " " << setw(11) << alphas[i] << " " << setw(11) << stepSizes[i] << endl;
		}
	}

	// write solution
	for (k = 0; k < fNumDists; k++) {
		*fOutput << setw(14) << "solution{" << (k+1) <<"}:";
		for (i = 0; i < fDistSize[k]; i++)
			*fOutput << setw(14) << dist[k][i] << endl;
	}

	// free up local arrays
	for (k = 0; k < fNumDists; k++) {
		delete grad[k], delete hdiag[k], delete newDist[k], delete diffDist[k], delete optDist[k], delete tempGrad[k];
	}
	delete grad, delete hdiag, delete newDist, delete diffDist, delete optDist, delete tempGrad;
	delete G, delete V, delete hull;
	if (fFullOutput) { delete alphas, delete phis, delete stepSizes; }

	cerr << "CICMSolver::Solve(...) left" << endl;

}

bool CICMSolver::FenchelOptimality(double **dist, double **grad) {

	int k, i;

	double optProd = 0.0; // the inproduct <dist, grad>
	double inProd;        // the inproduct of <basedist, grad>, where we try suitable basedists
	for (k = 0; k < fNumDists; k++) {
		inProd = 0.0;
		for (i = fDistSize[k]-1; i >=0; i--) {
			inProd += grad[k][i];
			if (inProd < -fEpsilon) {
				cerr << "; failed <basedist, grad>: " << inProd << " < " << (-fEpsilon);
				return false;
			}
			optProd += dist[k][i]*grad[k][i];
		}
	}

	if (optProd > fEpsilon || optProd < -fEpsilon) {
		cerr << "; failed <dist, grad>: abs(" << optProd << ") > " << fEpsilon;
		return false;
	}

	cerr << "; FenchelOptimality succeeded";

	return true;
}

void CICMSolver::GreatestConvexMinorant(double *G, double *V, int distSize, int *hull, double *dist) {

	int i;
	int j;
	int start;
	int hullSize;

	// start with (G[0],V[0]) == (0,0) in the hull
	hullSize = 1;
	hull[0] = 0;

	i = 1;
	while (i <= distSize) {
		start = i;
		// find i such that: G[start] = ... = G[i-1] < G[i]
		i++;
		while (i <= distSize && G[i]-G[start] == 0)
			i++;
		// find minimum of V[start], ..., V[i-1]: Vmin = V[Vmini]
		int    Vmini = start;
		double Vmin  = V[Vmini];
		for (j = start+1; j < i; j++)
			if (V[j] < Vmin) {
				Vmin = V[j];
				Vmini = j;
			}
		// add point with index Vmini to the hull
		j = hullSize - 1;
		double slope;
		while (true) {
			slope = (Vmin-V[hull[j]])/(G[Vmini]-G[hull[j]]);
			if (j <= 0) break;
			if (slope > dist[hull[j]-1]) break;
			j--;
		}
		hullSize = j+2;
		hull[j+1] = Vmini;
		dist[Vmini-1] = slope;
	}
	hull[hullSize] = -1;
	// fill up rest of dist array (that is, for points not in the hull)
	double slope = 0.0;
	for (j = 1; j < hullSize; j++) {
		slope = dist[hull[j]-1];
		for (i = hull[j-1]; i < hull[j]-1; i++)
			dist[i] = slope;
	}
	for (i++; i < distSize; i++)
		dist[i] = slope;

}

// do line search from dist to newDist, using diffDist as temporary variable, storing the result optDist and returning the used alpha
double CICMSolver::LineSearch(double **dist, double **newDist, double **diffDist, double **tempGrad, double **optDist) {

	int k;
	int i;

	// calc diffDist = newDist - dist
	for (k = 0; k < fNumDists; k++)
		for (i = 0; i < fDistSize[k]; i++)
			diffDist[k][i] = newDist[k][i] - dist[k][i];

	// determine maximum value for alpha (i.e. such that dist + alpha * diffDist is still feasible
	double maxAlpha = 1e100; // huge
	for (k = 0; k < fNumDists; k++) {
		if (fDistSize[k] <= 0) continue;

		// make sure the first entry in the k-th subdist is non-negative
		if (diffDist[k][0] < 0)
			maxAlpha = MIN(maxAlpha, -dist[k][0]/diffDist[k][0]);

		// make sure the k-th subdist is monotonically non-decreasing
		for (i = 1; i < fDistSize[k]; i++) {
			double approach = diffDist[k][i-1] - diffDist[k][i];
			if (approach > 0)
				maxAlpha = MIN(maxAlpha, (dist[k][i] - dist[k][i-1])/approach);
		}
	}
	if (!fNeedLagrangian) {
		// make sure that F_{+,p} <= 1
		double Fplus = 0.0;
		double FplusApproach = 0.0; 
		for (k = 0; k < fNumDists; k++) {
			Fplus += dist[k][fDistSize[k]-1];
			FplusApproach += diffDist[k][fDistSize[k]-1];
		}

		if (FplusApproach > 0)
			maxAlpha = MIN(maxAlpha, (1.0 - Fplus)/FplusApproach);
	}

	cerr << "; maxAlpha: " << maxAlpha;

	// bisection algorithm
	double low  = 0.0;
	double high = MIN(maxAlpha, 2.0);
	double alpha = high/2;
	double precision = high*1e-4;
	for (; high - low > precision; ) {
		// calc mid stuff
		alpha = (low+high)/2;
		LinearCombination(dist, diffDist, alpha, optDist); // optDist now is the dist in the mid point
		GradPhi(optDist, tempGrad);                      // tempGrad now is the gradient in the mid point
		double midDirectionalDerivative = 0.0;
		for (k = 0; k < fNumDists; k++)
			for (i = 0; i < fDistSize[k]; i++)
				midDirectionalDerivative += diffDist[k][i]*tempGrad[k][i];
		
		// determine which side to go
		if (midDirectionalDerivative > 0.0) {
			high = alpha;
		} else {
			low = alpha;
		}
	}

	// use found alpha
	LinearCombination(dist, diffDist, alpha, optDist);

	cerr << "; alpha: " << alpha;

	return alpha;

}

// calc tempDist = dist + alpha * diffDist
void CICMSolver::LinearCombination(double **dist, double **diffDist, double alpha, double **tempDist) {

	int k, i;
	for (k = 0; k < fNumDists; k++)
		for (i = 0; i < fDistSize[k]; i++)
			tempDist[k][i] = dist[k][i] + alpha*diffDist[k][i];

}

void CICMSolver::WriteDist(ostream &os, double **dist) {

	int k,i;

	int maxDistSize = 0;
	for (k = 0; k < fNumDists; k++)
		maxDistSize = (fDistSize[k] > maxDistSize)? fDistSize[k]:maxDistSize;

	for (k = 0; k < fNumDists; k++)
		os << " " << setw(9) << k;
	os << endl;
	for (i = 0; i < maxDistSize; i++) {
		for (k = 0; k < fNumDists; k++)
			if (i < fDistSize[k])
				os << " " << setw(9) << dist[k][i];
			else
				os << "          ";
		os << endl;
	}

}
