#ifndef lint
static char SCCSid[] = "@(#) ./iter/gmres/gmres.c 07/23/93";
#endif

/*
    This implements gmres.  It may be called recurrsively as long as 
    all of the user-supplied routines can. 

    This routine is meant to be compatible with execution on a parallel
    processor.  As such, it expects to be given routines for 
    all operations as well as a user-defined pointer to a distributed
    data structure.  THIS IS A DATA-STRUCTURE NEUTRAL IMPLEMENTATION.
  
    A context variable is used to hold internal data (the Hessenberg
    matrix and various parameters).

    Here are the routines that must be provided.  The generic parameters
    are:
         usrP  - user provided pointer.  This code just passes it
	 itP   - Iterative context.  See the generic iterative method
	         information

    Special routines needed only for gmres:

    void   orthog( usrP, itP, it )
        perform the orthogonalization of the vectors VV to VV+it.  A 
        basic version of this, defined in terms of vdot and maxpy, is
        available (in baseorthog.c) called void BasicOrthog;
	The user may use this routine to try alternate approaches.

    The calling sequence is the same as for all of the iterative methods.
    The special values (specific to GMRES) are:

    Some comments on left vs. right preconditioning, and restarts.
    Left and right preconditioning.
    If right preconditioning is chosen, then the problem being solved
    by gmres is actually
       My =  AB^-1 y = f
    so the initial residual is 
          r = f - Mx
    Note that B^-1 y = x or y = B x, and if x is non-zero, the initial
    residual is
          r = f - A x
    The final solution is then
          x = B^-1 y 

    If left preconditioning is chosen, then the problem being solved is
       My = B^-1 A x = B^-1 f,
    and the initial residual is
       r  = B^-1(f - Ax)

    Restarts:  Restarts are basically solves with x0 not equal to zero.
    Note that we can elliminate an extra application of B^-1 between
    restarts as long as we don't require that the solution at the end
    of a unsuccessful gmres iteration always be the solution x.
 */

#include <math.h>
#include <stdio.h>
#include "tools.h"
#define ITCOUNT it
#define IT it
#include "tools.h"
#include "iter/itctx.h"
#include "iter/itpriv.h"
#include "iter/itfunc.h"
#include "iter/gmresctx.h"
#include "iter/gmres/gmresp.h"
#define GMRES_DELTA_DIRECTIONS 5
#define GMRES_DEFAULT_MAXK 10

/* Forward references */
static double GMRESUpdateHessenberg();
static void   GMRESGetNewVectors();
static void   *GMRESBuildSolution();
static int    GMRESResidual();
       int    ITGMRESDefaultConverged();

/*+
  ITGMRESCreate - Create the iterative context for GMRES.
  Preferred calling sequence ITCreate(ITGMRES).
 +*/
ITCntx *ITGMRESCreate()
{
  ITCntx    *itP;
  ITGMRESCntx *gmresP;
  int  GMRESBasicOrthog();

  itP = NEW(ITCntx); CHKPTRV(itP,0);
  ITSetDefaults( itP );
  gmresP = NEW(ITGMRESCntx); CHKPTRV(gmresP,0);

  itP->MethodPrivate = (void *) gmresP;
  itP->method     = ITGMRES;
  itP->usr_monitor= ITDefaultMonitor;
  itP->converged  = ITGMRESDefaultConverged;
  itP->BuildSolution = GMRESBuildSolution;

  itP->setup      = ITGMRESSetUp;
  itP->solver     = ITGMRESSolve;
  itP->adjustwork = ITGMRESAdjustWork;
  itP->closedown  = ITGMRESDestroy;

  gmresP->haptol    = 1.0e-8;
  gmresP->epsabs    = 1.0e-8;
  gmresP->q_preallocate = 0;
  gmresP->delta_allocate = GMRES_DELTA_DIRECTIONS;
  gmresP->orthog    = GMRESBasicOrthog;
  gmresP->nrs       = 0;
  gmresP->sol_temp  = 0;
  gmresP->max_k     = GMRES_DEFAULT_MAXK;
  
  return itP;
}

/*+
    ITGMRESSetUp - Called after a call to ITGMRESCreate() or 
    ITCREATE(ITGMRES), allocates
    space needed in the GMRES solution. Preferred call sequence is
    ITSetUp(itP,usrP).

    Input Parameters:
.   itP - the iterative context
.   usrP - the user context
+*/
void ITGMRESSetUp( itP, usrP )
ITCntx *itP;
void   *usrP;
{
unsigned int size, hh, hes, rs, cc;
int      max_k, k;
ITGMRESCntx *gmresP = (ITGMRESCntx *)itP->MethodPrivate;

ITCheckDef( itP );

max_k         = gmresP->max_k;
hh            = (max_k + 2) * (max_k + 1);
hes           = (max_k + 1) * (max_k + 1);
rs            = (max_k + 2);
cc            = (max_k + 1);
size          = (hh + hes + rs + 2*cc) * sizeof(double);

gmresP->hh_origin  = (double *) MALLOC( size );
CHKPTR(gmresP->hh_origin);
gmresP->hes_origin = gmresP->hh_origin + hh;
gmresP->rs_origin  = gmresP->hes_origin + hes;
gmresP->cc_origin  = gmresP->rs_origin + rs;
gmresP->ss_origin  = gmresP->cc_origin + cc;

/* Allocate array to hold pointers to user vectors.  Note that we need
   4 + max_k + 1 (since we need it+1 vectors, and it <= max_k) */
gmresP->vecs = (void **) MALLOC( (VEC_OFFSET + 2 + max_k)*sizeof(void *) );
CHKPTR(gmresP->vecs);
gmresP->vecs_allocated = VEC_OFFSET + 2 + max_k;
gmresP->user_work = (void ***)MALLOC( (VEC_OFFSET + 2 + max_k)*sizeof(void *));
CHKPTR(gmresP->user_work);
gmresP->mwork_alloc = (int *) MALLOC( (VEC_OFFSET+2+max_k)*sizeof(int) );
CHKPTR(gmresP->mwork_alloc);

if (gmresP->q_preallocate) {
    gmresP->vv_allocated   = VEC_OFFSET + 2 + max_k;
    gmresP->user_work[0]   = (*itP->vc->obtain_vectors)( 
						 usrP, gmresP->vv_allocated );
    gmresP->mwork_alloc[0] = gmresP->vv_allocated;
    gmresP->nwork_alloc    = 1;
    for (k=0; k<gmresP->vv_allocated; k++)
	gmresP->vecs[k] = gmresP->user_work[0][k];
    }
else {
    gmresP->vv_allocated    = 5;
    gmresP->user_work[0]    = (*itP->vc->obtain_vectors)( usrP, 5 );
    gmresP->mwork_alloc[0]  = 5;
    gmresP->nwork_alloc     = 1;
    for (k=0; k<gmresP->vv_allocated; k++)
	gmresP->vecs[k] = gmresP->user_work[0][k];
    }
}

/*+
   ITGMRESSolve - Run (restarted) gmres solver.
   Preferred calling sequence ITSOLVE( itP, usrP).

   Input Parameters:
.   itP and usrP - pointers to iterative and user context
 
    Returns:
    number of iterations

    Note:
    Currently has no good way to return convergence failure.
 +*/
int ITGMRESSolve( itP, usrP )
ITCntx *itP;
void      *usrP;
{
int maxit, err;
int restart, its, itcount, it;
ITGMRESCntx *gmresP = (ITGMRESCntx *)itP->MethodPrivate;

it      = 0;
restart = 0;
itcount = 0;
/* Save binv*f */
if (!itP->right_inv) {
    /* inv(b)*f */
    PRE( VEC_RHS, VEC_BINVF );
    }
else 
    COPY( VEC_RHS, VEC_BINVF );
/* Compute the initial (preconditioned) residual */
if (!itP->guess_zero) {
    if (err=GMRESResidual( usrP, itP, restart )) return err;
    }
else     
    COPY( VEC_BINVF, VEC_VV(0) );
    
while (err = GMREScycle( usrP, &its, itcount, restart, itP )) {
    restart = 1;
    itcount += its;
    if( err = GMRESResidual( usrP, itP, restart )) return err;
    if (itcount > itP->max_it) break;
    /* need another check to make sure that gmres breaks out 
       at precisely the number of iterations chosen */
    }
itcount += its;      /* add in last call to GMREScycle */
return itcount;
}

/* -----------------------------------------------------------------*/
/*           Allows user to change work vectors                     */
/* This should really be a generic routine that handles workspaces  */
/* -----------------------------------------------------------------*/
void ITGMRESAdjustWork( itP, usrP )
ITCntx *itP;
void      *usrP;
{
ITGMRESCntx *gmresP;
int          i;

if ( itP->adjust_work_vectors ) {
   gmresP = (ITGMRESCntx *) itP->MethodPrivate;
   for (i=0; i<gmresP->vv_allocated; i++) 
       if ( (*itP->adjust_work_vectors)(usrP,gmresP->user_work[i],
					     gmresP->mwork_alloc[i] ) ) 
	   SETERRC(1,"Could not allocate work vectors in GMRES");
   }
}

/*+
    ITGMRESDestroy - Destroys an iterative context variable obtained
    by a call to ITGMRESCreate() or ITCreate(ITGMRES). Preferred calling
    sequence ITDestroy().

    Input Parameters:
.   itP - the iterative context
.   usrP - the user context
+*/
void ITGMRESDestroy(itP,usrP)
ITCntx *itP;
void   *usrP;
{
ITGMRESCntx *gmresP = (ITGMRESCntx *) itP->MethodPrivate;
int          i;

/* Free the matrix */
FREE( gmresP->hh_origin );

/* Free the pointer to user variables */
FREE( gmresP->vecs );

/* free work vectors */
if ( itP->vc->release_vectors ) {
    for (i=0; i<gmresP->nwork_alloc; i++) 
	(*itP->vc->release_vectors)(usrP,gmresP->user_work[i],
					       gmresP->mwork_alloc[i] );
    }
FREE( gmresP->user_work );
FREE( gmresP->mwork_alloc );
if (gmresP->nrs) 
    FREE( gmresP->nrs );
/* free the context variables */
FREE( gmresP ); 
FREE( itP );
}

/*
    Run gmres, possibly with restart.  Return residual history if requested.
    input parameters:
.        usrP    - User context
.        restart - 1 if restarting gmres, 0 otherwise
.	gmresP  - structure containing parameters and work areas
.	itsSoFar- total number of iterations so far (from previous cycles)

    output parameters:
.        nres    - residuals (from preconditioned system) at each step.
                  If restarting, consider passing nres+it.  If null, 
                  ignored
.        itcount - number of iterations used.  nres[0] to nres[itcount]
                  are defined.  If null, ignored.
    Returns:
    0 on success, 1 on failure (did not converge)

    Notes:
    On entry, the value in vector VEC_VV(0) should be the initial residual
    (this allows shortcuts where the initial preconditioned residual is 0).
 */
int GMREScycle( usrP, itcount, itsSoFar, restart, itP )
void         *usrP;
int          *itcount, itsSoFar;
int          restart; 
ITCntx    *itP;
{
double  res_norm, res, rtol, tt, hapbnd, hist_len= itP->res_hist_size, cerr;
double  *nres = itP->residual_history;
/* Note that hapend is ignored in the code */
int     it, hapend, converged;
ITGMRESCntx *gmresP = (ITGMRESCntx *)(itP->MethodPrivate);
int     max_k = gmresP->max_k;
int     max_it = itP->max_it;

/* Question: on restart, compute the residual?  No; provide a restart 
   driver */

it  = 0;

/* dest . dest */
NORM(VEC_VV(0),&res_norm);
res         = res_norm;
*RS(0)      = res_norm;

/* Do-nothing case: */
if (res_norm == 0.0) {
    if (itcount) *itcount = 0;
    return ERR_NONE;
    }
/* scale VEC_VV (the initial residual) */
SCALE( 1.0/res_norm, VEC_VV(0) );

if (!restart) {
    rtol      = itP->rtol * res_norm;
    itP->ttol = (itP->atol > rtol) ? itP->atol : rtol;
    }
rtol= itP->ttol;
gmresP->it = (it-1);  /* For converged */
while ( !(converged = CONVERGED(res,it+itsSoFar)) && it < max_k && 
        it + itsSoFar < max_it) {
    if (nres && hist_len > it + itsSoFar) nres[it+itsSoFar]   = res;
    if (itP->usr_monitor) {
	gmresP->it = (it - 1);
        (*itP->usr_monitor)( itP, usrP, it + itsSoFar, res );
	}
    if (gmresP->vv_allocated <= it + VEC_OFFSET + 1) {
	/* get more vectors */
	GMRESGetNewVectors( usrP, itP, it+1 );
	}
    MATOP( VEC_VV(it), VEC_VV(it+1), VEC_TEMP_MATOP );

    /* update hessenberg matrix and do Gram-Schmidt */
    (*gmresP->orthog)( usrP, itP, it );

    /* vv(i+1) . vv(i+1) */
    NORM(VEC_VV(it+1),&tt);
    /* save the magnitude */
    *HH(it+1,it)    = tt;
    *HES(it+1,it)   = tt;

    /* check for the happy breakdown */
    hapbnd  = gmresP->epsabs * fabs( *HH(it,it) / *RS(it) );
    if (hapbnd > gmresP->haptol) hapbnd = gmresP->haptol;
    if (tt > hapbnd) {
        SCALE( 1.0/tt, VEC_VV(it+1) );
        }
    else {
        /* We SHOULD probably abort the gmres step
           here.  This happens when the solution is exactly reached. */
        hapend  = 1;
        }
    res = GMRESUpdateHessenberg( itP, it );
    it++;
    gmresP->it = (it-1);  /* For converged */
    }
itP->nmatop   += it;
itP->nvectors += 3 + it * (3 + (it - 1));

if (nres && hist_len > it) nres[it]   = res; 
if (nres) itP->res_act_size = (hist_len < it) ? hist_len : it;
if (itP->usr_monitor) {
    gmresP->it = it - 1;
    (*itP->usr_monitor)( itP, usrP, it + itsSoFar, res );
    }
if (itcount) *itcount    = it;

/*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
 */
if (it == 0) {
    /* exited at the top before doing ANYTHING */
    return 0;
    }

/* Form the solution (or the solution so far) */
BuildGmresSoln( usrP, RS(0), VEC_SOLN, VEC_SOLN, itP, it-1 );

/* Return correct status (Failed on iteration test (failed to converge)) */
return !converged;
}

/*
    BuildGmresSoln - create the solution from the starting vector and the
    current iterates.

    Input parameters:
        nrs - work area of size it + 1.
	vs  - index of initial guess
	vdest - index of result.  Note that vs may == vdest (replace
	        guess with the solution).

     This is an internal routine that knows about the GMRES internals.
 */
BuildGmresSoln( usrP, nrs, vs, vdest, itP, it )
void      *usrP;
double    *nrs;
void      *vs, *vdest;
int       it;
ITCntx *itP;
{
double  tt;
int     ii, k, j;
ITGMRESCntx *gmresP = (ITGMRESCntx *)(itP->MethodPrivate);

/* Solve for solution vector that minimizes the residual */

/* If it is < 0, no gmres steps have been performed */
if (it < 0) {
    if (vdest != vs) {
	COPY( vs, vdest );
	}
    return;
    }
nrs[it] = *RS(it) / *HH(it,it);
for (ii=1; ii<=it; ii++) {
    k   = it - ii;
    tt  = *RS(k);
    for (j=k+1; j<=it; j++)
        tt  = tt - *HH(k,j) * nrs[j];
    nrs[k]   = tt / *HH(k,k);
    }

/* Accumulate the correction to the solution of the preconditioned problem
   in TEMP */
SET( 0.0, VEC_TEMP );
BasicMultiMaxpy( usrP, &VEC_VV(0), it, nrs, VEC_TEMP, itP->vc->axpy );

/* If we preconditioned on the right, we need to solve for the correction to
   the unpreconditioned problem */
if (itP->right_inv) {
    if (vdest != vs) {
	PRE( VEC_TEMP, vdest );
	DAXPY( 1.0, vs, vdest );
	}
    else {
	PRE( VEC_TEMP, VEC_TEMP_MATOP );
	DAXPY( 1.0, VEC_TEMP_MATOP, vdest );
	}
    }
else {
    if (vdest != vs) {
	COPY( VEC_TEMP, vdest );
	DAXPY( 1.0, vs, vdest );
	}
    else 
	DAXPY( 1.0, VEC_TEMP, vdest );
    }
}

/*
   Do the scalar work for the orthogonalization.  Return new residual.
 */
static double GMRESUpdateHessenberg( itP, it )
ITCntx *itP;
int       it;
{
register double *hh, *cc, *ss, tt;
register int    j;
ITGMRESCntx *gmresP = (ITGMRESCntx *)(itP->MethodPrivate);

hh  = HH(0,it);
cc  = CC(0);
ss  = SS(0);

/* Apply all the previously computed plane rotations to the new column
   of the Hessenberg matrix */
for (j=1; j<=it; j++) {
    tt  = *hh;
    *hh = *cc * tt + *ss * *(hh+1);
    hh++;
    *hh = *cc++ * *hh - ( *ss++ * tt );
    }

/*
  compute the new plane rotation, and apply it to:
   1) the right hand side of the Hessenberg system
   2) the new column of the Hessenberg matrix
  thus obtaining the updated value of the residual
*/
tt        = sqrt( *hh * *hh + *(hh+1) * *(hh+1) );
*cc       = *hh / tt;
*ss       = *(hh+1) / tt;
*RS(it+1) = - ( *ss * *RS(it) );
*RS(it)   = *cc * *RS(it);
*hh       = *cc * *hh + *ss * *(hh+1);
return fabs( *RS(it+1) );
}

/* 
    This routine computes the initial residual without making any assumptions
    about the solution.
 */
static int GMRESResidual( usrP, itP, restart )
void   *usrP;
ITCntx *itP;
int    restart;
{
ITGMRESCntx *gmresP = (ITGMRESCntx *)(itP->MethodPrivate);
int          ITCOUNT = 0;

/* compute initial residual: f - M*x */
/* (inv(b)*a)*x or (a*inv(b)*b)*x into dest */
if (itP->right_inv) {
    /* we want a * binv * b * x, or just a * x for the first step */
    /* a*x into temp */
    MM( VEC_SOLN, VEC_TEMP );
    }
else {
    /* else we do binv * a * x */
    MATOP( VEC_SOLN, VEC_TEMP, VEC_TEMP_MATOP );
    }
/* This is an extra copy for the right-inverse case */
COPY( VEC_BINVF, VEC_VV(0) );
DAXPY( -1.0, VEC_TEMP, VEC_VV(0) );
      /* inv(b)(f - a*x) into dest */
return 0;
}

/*@
    ITGMRESSetDirections - Sets the number of search directions 
    for GMRES before restart.

    Input Parameters:
.   itP - the iterative context
.   max_k - the number of directions
@*/
void ITGMRESSetDirections( itP, max_k )
ITCntx *itP;
int    max_k;
{
ITGMRESCntx *gmresP = (ITGMRESCntx *)itP->MethodPrivate;
if (itP->method != ITGMRES) return;
gmresP->max_k = max_k;
}

/*@
  ITGMRESDefaultConverged - Default code to determine convergence 
  for GMRES.

  Input Parameters:
. itP   - iterative context
. usrP  - user's context
. n     - iteration number
. rnorm - 2-norm residual value (may be estimated).

  Returns:
  1 if the iteration has converged, 0 otherwise.
 @*/
int ITGMRESDefaultConverged(itP,usrP,n,rnorm)
ITCntx *itP;
void   *usrP;
int    n;
double rnorm;
{
if ( rnorm <= itP->ttol ) return(1);
else return(0);
}

/*
   This routine allocates more work vectors, starting from VEC_VV(it).
 */
static void GMRESGetNewVectors( usrP, itP, it )
void *usrP;
ITCntx *itP;
int       it;
{
ITGMRESCntx *gmresP = (ITGMRESCntx *)itP->MethodPrivate;
int nwork = gmresP->nwork_alloc;
int k, nalloc;

nalloc = gmresP->delta_allocate;
/* Adjust the number to allocate to make sure that we don't exceed the
   number of available slots */
if (it + VEC_OFFSET + nalloc >= gmresP->vecs_allocated)
    nalloc = gmresP->vecs_allocated - it - VEC_OFFSET;
/* CHKPTR(nalloc); */
if (nalloc == 0) return;

gmresP->vv_allocated += nalloc;
gmresP->user_work[nwork] = 
    (*itP->vc->obtain_vectors)( usrP, nalloc );
gmresP->mwork_alloc[nwork] = nalloc;
for (k=0; k<nalloc; k++)
    gmresP->vecs[it+VEC_OFFSET+k] = gmresP->user_work[nwork][k];
gmresP->nwork_alloc++;
}

/*
  GMRESBuildSolution -Build the solution for GMRES 
 */
static void *GMRESBuildSolution( itP, usrP, ptr )
ITCntx *itP;
void      *usrP, *ptr;
{
ITGMRESCntx *gmresP = (ITGMRESCntx *)itP->MethodPrivate; 

if (ptr == 0) {
    /* if (!gmresP->sol_temp)  need to allocate */
    ptr = gmresP->sol_temp;
    }
if (!gmresP->nrs) {
    /* allocate the work area */
    gmresP->nrs = (double *)
	               MALLOC( (unsigned)(gmresP->max_k * sizeof(double) ) );
    }

BuildGmresSoln( usrP, gmresP->nrs, VEC_SOLN, ptr, itP, gmresP->it );
return ptr;
}
