/* @(#)cypher.c	11.1 13 May 1995 17:27:11 */
/*
 * cypher - Noll Lightning Cypher based on SHS
 *
 * This file was written by:
 *
 *	 Landon Curt Noll  (chongo@toad.com)	chongo <was here> /\../\
 *
 * This code has been placed in the public domain.  Please do not
 * copyright this code.
 *
 * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH  REGARD  TO
 * THIS  SOFTWARE,  INCLUDING  ALL IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS.  IN NO EVENT SHALL  LANDON  CURT
 * NOLL  BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM  LOSS  OF
 * USE,  DATA  OR  PROFITS, WHETHER IN AN ACTION OF CONTRACT,
 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR  IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * WARNING: This code, and the algorithm that it claims to implement
 *	    are under-going peer review.  During peer review, both
 *	    source and algorithm may be modified extensively.
 *
 * WARNING: Certain US Regulations may restrict and/or prohibit 
 *	    the exportation of this software.  Please consult the
 *	    proper Federal authorities and/or seek legal advice
 *	    if you desire to export this software!
 *
 * See krypt.c for version and modification history.
 */

#include <stdio.h>
#include <string.h>
#include "cypher.h"
#include "align.h"
#include "endian.h"

char *nlc_what="@(#)";	/* #(@) if checked in */

/*
 * The SHS f()-functions.  The f1 and f3 functions can be optimized
 * to save one boolean operation each - thanks to Rich Schroeppel,
 * rcs@cs.arizona.edu for discovering this.
 *
 * f1: ((x&y) | (~x&z)) == (z ^ (x&(y^z)))
 * f3: ((x&y) | (x&z) | (y&z)) == ((x&y) | (z&(x|y)))
 */
#define f1(x,y,z)       (z ^ (x&(y^z)))		/* Rounds  0-19 */
#define f2(x,y,z)       (x^y^z)			/* Rounds 20-39 */
#define f3(x,y,z)       ((x&y) | (z&(x|y)))	/* Rounds 40-59 */
#define f4(x,y,z)       (x^y^z)			/* Rounds 60-79 */

/* The SHS Mysterious Constants */
#define K1      0x5A827999L	/* Rounds  0-19 */
#define K2      0x6ED9EBA1L	/* Rounds 20-39 */
#define K3      0x8F1BBCDCL	/* Rounds 40-59 */
#define K4      0xCA62C1D6L	/* Rounds 60-79 */

/* SHS initial values */
#define h0init  0x67452301L
#define h1init  0xEFCDAB89L
#define h2init  0x98BADCFEL
#define h3init  0x10325476L
#define h4init  0xC3D2E1F0L

/* 32-bit rotate left - kludged with shifts */
#define LEFT_ROT(X,n)  (((X)<<(n)) | ((X)>>(32-(n))))

/*
 * The initial expanding function.  The hash function is defined over an
 * 80-word expanded input array W, where the first 16 are copies of the input
 * data, and the remaining 64 are defined by
 *
 *      W[i] = W[i-16] ^ W[i-14] ^ W[i-8] ^ W[i-3]
 *
 * This implementation generates these values on the fly in a circular
 * buffer - thanks to Colin Plumb (colin@nyx10.cs.du.edu) for this
 * optimization.
 */
#define exor(W,i) (W[i&15] ^= (W[(i-14)&15] ^ W[(i-8)&15] ^ W[(i-3)&15]))

/*
 * The prototype SHS sub-round.  The fundamental sub-round is:
 *
 *      a' = e + LEFT_ROT(a,5) + f(b,c,d) + k + data;
 *      b' = a;
 *      c' = LEFT_ROT(b,30);
 *      d' = c;
 *      e' = d;
 *
 * but this is implemented by unrolling the loop 5 times and renaming the
 * variables ( e, a, b, c, d ) = ( a', b', c', d', e' ) each iteration.
 * This code is then replicated 20 times for each of the 4 functions, using
 * the next 20 values from the W[] array each time.
 */
#define subRound(a, b, c, d, e, f, k, data) \
    (e += LEFT_ROT(a,5) + f(b,c,d) + k + data, b = LEFT_ROT(b,30))

/*
 * mod_f{1,2,3,4} - convert 4 digest words into a ring word via modulus
 *
 * input:
 *	dig	pointer to at least 4 (digest) words
 *	a	dig[a] is the 1st arg
 *	b	dig[b] is the 2nd arg
 *	c	dig[c] is the 3rd arg
 *	d	dig[d] is the 4th arg
 *
 * returns:
 *	a ring word
 *
 * These functions use the SHS f()-functions above to convert 3 words into
 * a single word value.  We reduce from 4 to 3 words by computing the
 * modulus of 2 of the 4 input values.  To prevent a modulus by 0 error,
 * we set the low order bit of the modulus.
 */
#define mod_f1(dig, a, b, c, d) \
    f1(((dig[a])%((dig[b])|0x1)), (dig[c]), (dig[d]))
#define mod_f2(dig, a, b, c, d) \
    f2((dig[a]), ((dig[b])%((dig[c])|0x1)), (dig[d]))
#define mod_f3(dig, a, b, c, d) \
    f3((dig[a]), (dig[b]), ((dig[c])%((dig[d])|0x1)))
#define mod_f4(dig, a, b, c, d) \
    f4(((dig[d])%((dig[a])|0x1)), (dig[b]), (dig[c]))

/*
 * mul_f{1,2,3,4} - convert 4 digest words into a ring word via multiplication
 *
 * input:
 *	dig	pointer to at least 4 (digest) words
 *	a	dig[a] is the 1st arg
 *	b	dig[b] is the 2nd arg
 *	c	dig[c] is the 3rd arg
 *	d	dig[d] is the 4th arg
 *
 * returns:
 *	a ring word
 *
 * These functions use the SHS f()-functions above to convert 3 words into
 * a single word value.  We reduce from 4 to 3 words by computing the
 * product of 2 of the 4 input values.  To avoid products that are a
 * multiple of a significant power of 2 (i.e., low order bits of the
 * product are all 0's), we force the multiplier to be odd by setting
 * its low order bit.
 */
#define mul_f1(dig, a, b, c, d) \
    f1(((dig[a])*((dig[b])|0x1)), (dig[c]), (dig[d]))
#define mul_f2(dig, a, b, c, d) \
    f2((dig[a]), ((dig[b])*((dig[c])|0x1)), (dig[d]))
#define mul_f3(dig, a, b, c, d) \
    f3((dig[a]), (dig[b]), ((dig[c])*((dig[d])|0x1)))
#define mul_f4(dig, a, b, c, d) \
    f4(((dig[d])*((dig[a])|0x1)), (dig[b]), (dig[c]))

/* external variables */
extern char *program;			/* our name */

/* static variables */
static ULONG zero[NLC_CHUNKWORDS];	/* block of zeros */

/* forward declare functions */
static void nlcHash P((ULONG*, ULONG*));
static void nlcProcess P((NLC_INFO*, ULONG*, ULONG*));
static void nlcTwist P((NLC_INFO*, ULONG*, ULONG*));
static ULONG in[NLC_CHUNKWORDS];


/*
 * nlcHash - perform the SHS hash
 *
 * input:
 *	digest		input hash state
 *	W		input chunk (NLC_CHUNKBYTES bytes) of data to hash
 *
 * changes:
 *	digest		output state
 *	W		the input chunk is scrambled in a non-useful way
 *
 * NOTE: This code is not a full SHS hash.  It is the internal transform
 *	 used to hash individual blocks (nlcHash).  
 *
 *	 A full SHS hash would chain calls together, making the output 
 *	 state the input state of the next block.  Finally a full SHS 
 *	 hash would append a 0x80 byte and place the 64 bit count on the 
 *	 end of the last chunk.
 */
static void
nlcHash(digest, W)
    ULONG *digest;
    ULONG *W;
{
    ULONG A, B, C, D, E;	/* Local vars */

    /* Set up first buffer and local data buffer */
    A = digest[0];
    B = digest[1];
    C = digest[2];
    D = digest[3];
    E = digest[4];

    /* Heavy mangling, in 4 sub-rounds of 20 iterations each. */
    subRound(A, B, C, D, E, f1, K1, W[ 0]);
    subRound(E, A, B, C, D, f1, K1, W[ 1]);
    subRound(D, E, A, B, C, f1, K1, W[ 2]);
    subRound(C, D, E, A, B, f1, K1, W[ 3]);
    subRound(B, C, D, E, A, f1, K1, W[ 4]);
    subRound(A, B, C, D, E, f1, K1, W[ 5]);
    subRound(E, A, B, C, D, f1, K1, W[ 6]);
    subRound(D, E, A, B, C, f1, K1, W[ 7]);
    subRound(C, D, E, A, B, f1, K1, W[ 8]);
    subRound(B, C, D, E, A, f1, K1, W[ 9]);
    subRound(A, B, C, D, E, f1, K1, W[10]);
    subRound(E, A, B, C, D, f1, K1, W[11]);
    subRound(D, E, A, B, C, f1, K1, W[12]);
    subRound(C, D, E, A, B, f1, K1, W[13]);
    subRound(B, C, D, E, A, f1, K1, W[14]);
    subRound(A, B, C, D, E, f1, K1, W[15]);
    subRound(E, A, B, C, D, f1, K1, exor(W,16));
    subRound(D, E, A, B, C, f1, K1, exor(W,17));
    subRound(C, D, E, A, B, f1, K1, exor(W,18));
    subRound(B, C, D, E, A, f1, K1, exor(W,19));

    subRound(A, B, C, D, E, f2, K2, exor(W,20));
    subRound(E, A, B, C, D, f2, K2, exor(W,21));
    subRound(D, E, A, B, C, f2, K2, exor(W,22));
    subRound(C, D, E, A, B, f2, K2, exor(W,23));
    subRound(B, C, D, E, A, f2, K2, exor(W,24));
    subRound(A, B, C, D, E, f2, K2, exor(W,25));
    subRound(E, A, B, C, D, f2, K2, exor(W,26));
    subRound(D, E, A, B, C, f2, K2, exor(W,27));
    subRound(C, D, E, A, B, f2, K2, exor(W,28));
    subRound(B, C, D, E, A, f2, K2, exor(W,29));
    subRound(A, B, C, D, E, f2, K2, exor(W,30));
    subRound(E, A, B, C, D, f2, K2, exor(W,31));
    subRound(D, E, A, B, C, f2, K2, exor(W,32));
    subRound(C, D, E, A, B, f2, K2, exor(W,33));
    subRound(B, C, D, E, A, f2, K2, exor(W,34));
    subRound(A, B, C, D, E, f2, K2, exor(W,35));
    subRound(E, A, B, C, D, f2, K2, exor(W,36));
    subRound(D, E, A, B, C, f2, K2, exor(W,37));
    subRound(C, D, E, A, B, f2, K2, exor(W,38));
    subRound(B, C, D, E, A, f2, K2, exor(W,39));

    subRound(A, B, C, D, E, f3, K3, exor(W,40));
    subRound(E, A, B, C, D, f3, K3, exor(W,41));
    subRound(D, E, A, B, C, f3, K3, exor(W,42));
    subRound(C, D, E, A, B, f3, K3, exor(W,43));
    subRound(B, C, D, E, A, f3, K3, exor(W,44));
    subRound(A, B, C, D, E, f3, K3, exor(W,45));
    subRound(E, A, B, C, D, f3, K3, exor(W,46));
    subRound(D, E, A, B, C, f3, K3, exor(W,47));
    subRound(C, D, E, A, B, f3, K3, exor(W,48));
    subRound(B, C, D, E, A, f3, K3, exor(W,49));
    subRound(A, B, C, D, E, f3, K3, exor(W,50));
    subRound(E, A, B, C, D, f3, K3, exor(W,51));
    subRound(D, E, A, B, C, f3, K3, exor(W,52));
    subRound(C, D, E, A, B, f3, K3, exor(W,53));
    subRound(B, C, D, E, A, f3, K3, exor(W,54));
    subRound(A, B, C, D, E, f3, K3, exor(W,55));
    subRound(E, A, B, C, D, f3, K3, exor(W,56));
    subRound(D, E, A, B, C, f3, K3, exor(W,57));
    subRound(C, D, E, A, B, f3, K3, exor(W,58));
    subRound(B, C, D, E, A, f3, K3, exor(W,59));

    subRound(A, B, C, D, E, f4, K4, exor(W,60));
    subRound(E, A, B, C, D, f4, K4, exor(W,61));
    subRound(D, E, A, B, C, f4, K4, exor(W,62));
    subRound(C, D, E, A, B, f4, K4, exor(W,63));
    subRound(B, C, D, E, A, f4, K4, exor(W,64));
    subRound(A, B, C, D, E, f4, K4, exor(W,65));
    subRound(E, A, B, C, D, f4, K4, exor(W,66));
    subRound(D, E, A, B, C, f4, K4, exor(W,67));
    subRound(C, D, E, A, B, f4, K4, exor(W,68));
    subRound(B, C, D, E, A, f4, K4, exor(W,69));
    subRound(A, B, C, D, E, f4, K4, exor(W,70));
    subRound(E, A, B, C, D, f4, K4, exor(W,71));
    subRound(D, E, A, B, C, f4, K4, exor(W,72));
    subRound(C, D, E, A, B, f4, K4, exor(W,73));
    subRound(B, C, D, E, A, f4, K4, exor(W,74));
    subRound(A, B, C, D, E, f4, K4, exor(W,75));
    subRound(E, A, B, C, D, f4, K4, exor(W,76));
    subRound(D, E, A, B, C, f4, K4, exor(W,77));
    subRound(C, D, E, A, B, f4, K4, exor(W,78));
    subRound(B, C, D, E, A, f4, K4, exor(W,79));

    /* Build message digest */
    digest[0] += A;
    digest[1] += B;
    digest[2] += C;
    digest[3] += D;
    digest[4] += E;
}


/*
 * nlcCypher - encrypt or decrypt data via the NLC algorithm
 *
 * input:
 *	dig		NLC state (containing a possible partial data chunk)
 *	buffer		input data to append
 *	count		byte length of buffer
 *
 * changes:
 *	dig		new NLC state
 *
 * This function will not modify buffer.  It does call nlcProcess which
 * in turn may write data (plaintext or cyphertext) to dig->stream.
 *
 * This code does not assume that the buffer size is a multiple of
 * NLC_CHUNKSIZE bytes long.  This code handles partial chunk between
 * calls to nlcCypher().
 */
void
nlcCypher(dig, buffer, count)
    NLC_INFO *dig;
    BYTE *buffer;
    ULONG count;
{
    ULONG datalen = dig->datalen;

    /*
     * Catch the case of a non-empty data buffer
     */
    if (datalen > 0) {

	/* determine the size we need to copy */
	ULONG cpylen = NLC_CHUNKSIZE - datalen;

	/* case: new data will not fill the buffer */
	if (cpylen > count) {
	    memcpy((char *)dig->data+datalen, (char *)buffer, count);
	    dig->datalen = datalen+count;
	    return;

	/* case: buffer will be filled */
	} else {
	    memcpy((char *)dig->data+datalen, (char *)buffer, cpylen);
	    if (dig->output_sex == BYTE_ORDER) {
		nlcProcess(dig, dig->data, dig->out);
	    } else {
		SWAP_BYTE_SEX(in, dig->data);
		nlcProcess(dig, in, dig->out);
	    }
	    buffer += cpylen;
	    count -= cpylen;
	    dig->datalen = 0;
	}
    }

    /*
     * Process data in NLC_CHUNKSIZE chunks
     */
    if (count >= NLC_CHUNKSIZE) {
	nlcFullCypher(dig, buffer, count);
	buffer += (count/NLC_CHUNKSIZE)*NLC_CHUNKSIZE;
	count %= NLC_CHUNKSIZE;
    }

    /*
     * Handle any remaining bytes of data.
     * This should only happen once on the final lot of data
     */
    if (count > 0) {
	memcpy((char *)dig->data, (char *)buffer, count);
    }
    dig->datalen = count;
}


/*
 * nlcFullCypher - encrypt or decrypt data via the NLC algorithm
 *
 * input:
 *	dig		NLC state (containing a possible partial data chunk)
 *	buffer		input data to append
 *	count		byte length of buffer (must be a NLC_CHUNKSIZE multiple)
 *
 * changes:
 *	dig		new NLC state
 *
 * This function will not modify buffer.  It does call nlcProcess which
 * in turn may write data (plaintext or cyphertext) to dig->stream.
 *
 * This function assumes that count is a multiple of NLC_CHUNKSIZE and that
 * no partial chunk is left over from a previous call.
 */
void
nlcFullCypher(dig, buffer, count)
    NLC_INFO *dig;
    BYTE *buffer;
    ULONG count;
{
    /*
     * Process data in NLC_CHUNKSIZE chunks
     */
    while (count >= NLC_CHUNKSIZE) {
#if defined(MUST_ALIGN)
	if ((long)buffer & (sizeof(ULONG)-1)) {
	    memcpy((char *)in, (char *)buffer, NLC_CHUNKSIZE);
	    if (dig->output_sex == BYTE_ORDER) {
		nlcProcess(dig, in, dig->out);
	    } else {
		SWAP_BYTE_SEX(in, in);
		nlcProcess(dig, in, dig->out);
	    }
	} else {
	    if (dig->output_sex == BYTE_ORDER) {
		nlcProcess(dig, (ULONG *)buffer, dig->out);
	    } else {
		SWAP_BYTE_SEX(in, buffer);
		nlcProcess(dig, in, dig->out);
	    }
	}
#else
	if (dig->output_sex == BYTE_ORDER) {
	    nlcProcess(dig, (ULONG *)buffer, dig->out);
	} else {
	    SWAP_BYTE_SEX(in, (ULONG *)buffer);
	    nlcProcess(dig, in, dig->out);
	}
#endif
	buffer += NLC_CHUNKSIZE;
	count -= NLC_CHUNKSIZE;
    }
}


/*
 * nlcInitCypher - initialize the NLC state
 *
 * input:
 *	dig		NLC state to initialize
 *	dcrypt		0 => perform encryption, 1 => perform decryption
 *	output_sex	process in LITTLE_ENDIAN or BIG_ENDIAN mode
 *
 * changes:
 *	dig		initialized NLC state
 */
void
nlcInitCypher(dig, dcrypt, output_sex, out)
    NLC_INFO *dig;
    int dcrypt;			/* 0 => encrypt, 1 => decrypt */
    int output_sex;		/* LITTLE_ENDIAN or BIG_ENDIAN */
    FILE *out;			/* where to write output */
{
    /* note if we will encrypt to decrypt */
    dig->dcrypt = dcrypt;

    /* note the output byte sex */
    dig->output_sex = output_sex;

    /* disable output mode to prevent key disclosure */
    dig->output_ok = 0;

    /* Set the h-vars to their initial values */
    dig->digest[0] = h0init;
    dig->digest[1] = h1init;
    dig->digest[2] = h2init;
    dig->digest[3] = h3init;
    dig->digest[4] = h4init;

    /* set the output stream */
    dig->stream = out;

    /* Initialise byte count */
    dig->datalen = 0;

    /* clear out buffers */
    memset((char *)dig->data, 0, sizeof(dig->data));
    memset((char *)dig->ring, 0, sizeof(dig->ring));
    memset((char *)dig->sum_dig, 0, sizeof(dig->sum_dig));
}


/*
 * nlcKey - process our key
 *
 * input:
 *	dig		initialized NLC state by nlcInitCypher()
 *	key_str		pointer to the NLC key (not a string, may contain \0's)
 *	key_len		length of key_str
 *
 * changes:
 *	dig		post key NLC state
 *
 * This function must be called after nlcInitCypher().
 * This call must be made before any real data is digested.
 */
void
nlcKey(dig, key_str, key_len)
    NLC_INFO *dig;			/* current digest state */
    BYTE *key_str;			/* key or NULL */
    UINT key_len;			/* length of key_str */
{
    ULONG *digest = dig->digest;	/* current digest state */
    ULONG tmp[NLC_CHUNKWORDS];		/* temp data chunk */
    int orig_dcrypt;			/* original -r flag mode */
    int i;
    int j;

    /*
     * always process the key in encrypt mode
     */
    orig_dcrypt = dig->dcrypt;
    dig->dcrypt = 0;

    /*
     * process the key
     */
    if (key_str != NULL) {
	nlcCypher(dig, key_str, key_len);
    }

    /* pad the remainder of the block with zeros */
    if (dig->datalen > 0) {
	nlcCypher(dig, (BYTE *)zero, NLC_CHUNKSIZE - dig->datalen);
    }

    /* twist the cypher state a few turns with a zero chunk */
    memset((char *)tmp, 0, NLC_CHUNKSIZE);
    nlcTwist(dig, tmp, dig->out);

    /* clear the byte count */
    dig->datalen = 0;

    /* note that we will output the next block */
    dig->output_ok = 1;

    /*
     * restore original cypher direction
     */
    dig->dcrypt = orig_dcrypt;
}


/*
 * nlcTwist - twist the NLC state machine a few turns
 *
 * input:
 *	dig		NLC state
 *	inbuf		a chunk (NLC_CHUNKSIZE bytes) to input data
 *	output		where to place the resulting data chunk
 *
 * changes:
 *	dig		twisted NLC state
 *	output		result of chained feedback on inbuf
 *
 * This function will twist the NLC state machine be performing 5
 * back to back chained nlcProcess() calls.  Output is disabled
 * during this call.  While inbuf is not modified, output contains
 * the last chunk that would have been written.  The subsequent NLC 
 * state represents the NLC state after what would have been the
 * final write.
 */
static void
nlcTwist(dig, inbuf, outbuf)
    NLC_INFO *dig;		/* current digest state */
    ULONG *inbuf;		/* input buffer (text or cypher) */
    ULONG *outbuf;		/* output buffer (cypher or text) */
{
    int i;
    ULONG *digest = dig->digest;	/* current digest state */
    ULONG *ring = dig->ring;		/* current ring state */
    ULONG *sum_digest = dig->sum_dig;	/* sum of previous digest states */
    ULONG tmp[NLC_CHUNKWORDS];		/* tmp data chunk */
    int orig_output_ok;			/* original output state */

    /*
     * disable output
     */
    orig_output_ok = dig->output_ok;	/* preserve existing state */
    dig->output_ok = 0;

    /*
     * twist the cypher a few times
     */
    nlcProcess(dig, inbuf, outbuf);
    nlcProcess(dig, outbuf, tmp);
    nlcProcess(dig, tmp, outbuf);
    nlcProcess(dig, outbuf, tmp);
    nlcProcess(dig, tmp, outbuf);

    /*
     * restore previous output state
     */
    dig->output_ok = orig_output_ok;
}


/*
 * nlcProcess - process a chunk of data via the NLC algorithm
 *
 * input:
 *	dig		NLC state
 *	inbuf		a chunk (NLC_CHUNKSIZE bytes) to input data
 *	output		where to place the resulting data chunk
 *
 * changes:
 *	dig		twisted NLC state
 *	output		result of chained feedback on inbuf
 *
 * If dig->dcrypt is 0, then inbuf is treated as a chunk of plaintext
 * and output is the resulting cyphertext.  If dig->dcrypt is 1, then 
 * inbuf is treated as a chunk of cyphertext and output is the resulting 
 * plaintext.
 *
 * If dig->output_ok is non-zero, then the output chunk will be written
 * to dig->stream.
 */
static void
nlcProcess(dig, inbuf, outbuf)
    NLC_INFO *dig;		/* current digest state */
    ULONG *inbuf;		/* input buffer (text or cypher) */
    ULONG *outbuf;		/* output buffer (cypher or text) */
{
    ULONG tmp;				/* swap holder */
    ULONG *digest = dig->digest;	/* current digest state */
    ULONG *ring = dig->ring;		/* current ring state */
    ULONG *sum_digest = dig->sum_dig;	/* sum of previous digest states */
    ULONG xor_digest[NLC_DIGESTWORDS];	/* digest which expands into ring */
    int i;
    int j;
    int k;

    /*
     * We will pre-process the digest from the previous nlcHash()
     * of the previous chunk.
     */
    for (i=0; i < NLC_DIGESTWORDS; ++i) {
	sum_digest[i] = (digest[i] += sum_digest[i]);
    }

    /*
     * reverse feedback cypher
     */
    if (dig->dcrypt) {

	/* xor buffer */
	for (i=0; i < NLC_CHUNKWORDS; ++i) {
	    outbuf[i] = inbuf[i] ^ ring[NLC_CHUNKWORDS+i];
	}

	/* reverse-shuffle bytes */
	if (dig->output_sex == BYTE_ORDER) {
	    for (i=NLC_CHUNKSIZE-1; i >= 0; --i) {
		j = (((BYTE *)ring)[i] & (NLC_CHUNKSIZE-1));
		tmp = ((BYTE *)outbuf)[i];
		((BYTE *)outbuf)[i] = ((BYTE *)outbuf)[j];
		((BYTE *)outbuf)[j] = tmp;
	    }
	} else {
	    for (i=NLC_CHUNKSIZE-1; i >= 0; --i) {
		k = (i & ~0x3) | (3 - (i & 0x3));
		j = (((BYTE *)ring)[k] & (NLC_CHUNKSIZE-1));
		j = (j & ~0x3) | (3 - (j & 0x3));
		tmp = ((BYTE *)outbuf)[k];
		((BYTE *)outbuf)[k] = ((BYTE *)outbuf)[j];
		((BYTE *)outbuf)[j] = tmp;
	    }
	}

	/* transform a copy of our reconstituted text */
	memcpy((char *)inbuf, (char *)outbuf, NLC_CHUNKSIZE);
	nlcHash(digest, inbuf);

    /*
     * forward feedback cypher
     */
    } else {

        /* obtain a copy of input data that we can modify */
	memcpy((char *)outbuf, (char *)inbuf, NLC_CHUNKSIZE);

        /* shuffle bytes */
	if (dig->output_sex == BYTE_ORDER) {
	    for (i=0; i < NLC_CHUNKSIZE; ++i) {
		j = (((BYTE *)ring)[i] & (NLC_CHUNKSIZE-1));
		tmp = ((BYTE *)outbuf)[i];
		((BYTE *)outbuf)[i] = ((BYTE *)outbuf)[j];
		((BYTE *)outbuf)[j] = tmp;
	    }
	} else {
	    for (i=0; i < NLC_CHUNKSIZE; ++i) {
		k = (i & ~0x3) | (3 - (i & 0x3));
		j = (((BYTE *)ring)[k] & (NLC_CHUNKSIZE-1));
		j = (j & ~0x3) | (3 - (j & 0x3));
		tmp = ((BYTE *)outbuf)[k];
		((BYTE *)outbuf)[k] = ((BYTE *)outbuf)[j];
		((BYTE *)outbuf)[j] = tmp;
	    }
	}

	/* xor buffer */
	for (i=0; i < NLC_CHUNKWORDS; ++i) {
	    outbuf[i] ^= ring[NLC_CHUNKWORDS+i];
	}

	/* transform our original text */
	nlcHash(digest, inbuf);
    }

    /*
     * xor the new digest with our previous sum
     */
    for (i=0; i < NLC_DIGESTWORDS; ++i) {
	sum_digest[i] ^= digest[i];
    }

    /*
     * expand the sum digest for next round
     */
    ring[0] = mod_f1(sum_digest, 4, 3, 2, 1);
    ring[1] = mul_f1(sum_digest, 2, 1, 0, 4);
    ring[2] = mod_f2(sum_digest, 0, 4, 3, 2);
    ring[3] = mul_f2(sum_digest, 3, 2, 1, 0);
    ring[4] = mod_f3(sum_digest, 1, 0, 4, 3);
    ring[5] = mul_f3(sum_digest, 4, 3, 2, 1);
    ring[6] = mod_f4(sum_digest, 2, 1, 0, 4);
    ring[7] = mul_f4(sum_digest, 0, 4, 3, 2);
    for (i=8; i < 2*NLC_CHUNKWORDS; ++i) {
        ring[i] = ring[i-8] ^ ring[i-7] ^ ring[i-6];
    }

    /*
     * output our result, if needed
     */
    if (dig->output_ok) {
	if (dig->output_sex != BYTE_ORDER) {
	    SWAP_BYTE_SEX(outbuf, outbuf);
	}
	fwrite((char *)outbuf, NLC_CHUNKSIZE, 1, dig->stream);
    }
    return;
}


/*
 * nlcFinalCypher - perform final cypher processing
 *
 * input:
 *	dig		NLC state (with a possible partial input chunk)
 *
 * changes:
 *	dig		the final NLC state
 *
 * This function will deal with any partial chunk of data that has
 * not yet been processed.
 */
void
nlcFinalCypher(dig)
    NLC_INFO *dig;		/* current digest */
{
    ULONG *digest = dig->digest;	/* current digest state */
    ULONG tmp[NLC_CHUNKWORDS];		/* tmp data chunk */
    NLC_INFO indep;			/* independent state machine */

    /*
     * reverse cypher
     */
    if (dig->dcrypt) {

	/*
	 * zero fill the partial chunk in case of missing input data
	 */
	if (dig->datalen != 0) {
	    nlcCypher(dig, (BYTE *)zero, NLC_CHUNKSIZE-dig->datalen);
	}

    /*
     * forward cypher
     */
    } else {

	/*
	 * If we are left with a final partial data chunk, we form
	 * an independent cypher state machine identical to the
	 * current state expect that our partial data chunk is
	 * zero filled (and this is a full data chunk).
	 *
	 * Next we twist the independent cypher state machine
	 * and load our original final partial data chunk
	 * with the final output state of the independent
	 * cypher state machine.
	 *
	 * Finally we process this, now full chunk.
	 */
        if (dig->datalen > 0) {

	    /* form an independent zero filled copy of the partial chunk */
	    memcpy((char *)tmp, (char *)dig->data, dig->datalen);
	    memset(((char *)tmp)+dig->datalen, 0, NLC_CHUNKSIZE-dig->datalen);

	    /* form an independent cypher state machine with no data */
	    indep = *dig;
	    indep.datalen = 0;

	    /* twist the independent cypher state machine */
	    nlcTwist(&indep, tmp, indep.out);

	    /* fill the real chunk with the expanded data and cypher it */
#if BYTE_ORDER == BIG_ENDIAN
	    /* force the fill data to be little endian */
	    SWAP_BYTE_SEX(indep.out, indep.out);
#endif
	    nlcCypher(dig, (BYTE *)(indep.out)+dig->datalen,
	      NLC_CHUNKSIZE-dig->datalen);
	}

    }

    /*
     * all done with output
     */
    dig->output_ok = 0;
}
