/* Jan 2006. optimisation attempts for string arithmetic. use macros.
   use malloc directly in places. take stuff out of loops.
   March 2006   add more functions for large integers.
*/

#include <fdata.h>
#include <na.h>
#include <dt.h>

typedef struct SDESC US;

#define ss_free(X)      free((X)->ADDR)
#define ss_clear(X)     memset((X)->ADDR,0,(X)->SDLEN);
#define ss_rdrop(D,S,N) {(D)->ADDR=(S)->ADDR;(D)->SDLEN=(S)->SDLEN-(N);}
#define ss_take(D,S,N)  {(D)->ADDR=(S)->ADDR;(D)->SDLEN=N;}

#define ss_slice_obj(D,S)  {(D)->ADDR=TEXT_PTR((S));(D)->SDLEN=MSIZE((S)->ADDR);}
#define ss_eat1(D)         {((D)->ADDR)++;((D)->SDLEN)--;}
#define ss_is_scalar(S,VAL)     ((S)->SDLEN==1 && *(S)->ADDR==VAL)

#define us_strip_bits(S, T, N) \
    { for (T=(S)->ADDR, N=(S)->SDLEN; 0<N--;) *T++ &= 0X0F; }

#define us_ascii_bits(S, T, N) \
    { for (T=(S)->ADDR, N=(S)->SDLEN; 0<N--;) *T++ |= 0X30; }

#define ds_plus(D,L,H)  ds_add('+',D,L,H)
#define ds_minus(D,L,H) ds_add('-',D,L,H)

/* general function for add, subtract. Destroys arguements */
/*
  To perform the '+' and '-' operations on large integers A, B there
are three logical conditions on the operands A,B. These are
A<0, B<0, and  |A|>|B|. Here |A| is the absolute value of A.

  If H = Max(|A|,|B|) and L = Min {|A|,|B|) then it is possible
to tabulate the results in terms of H+L, H-L and an appropriate sign.

    A+B         Result   A-B
    A<0 B<0 |A|>|B|      A<0 B<0 |A|>|B|
    0   0   0   H+L,+    0   0   0   H-L,-
    0   0   1   H+L,+    0   0   1   H-L,+
    0   1   0   H-L,-    0   1   0   H+L,+
    0   1   1   H-L,+    0   1   1   H+L,+
    1   0   0   H-L,+    1   0   0   H+L,-
    1   0   1   H-L,-    1   0   1   H+L,+
    1   1   0   H+L,-    1   1   0   H-L,+
    1   1   1   H+L,-    1   1   1   H-L,-

    These numbers may be encoded (base 2) to give lists:
    f_dt = ++----++ --++++--
    s_dt = ++-++--- -+++-++-
                                                                  */



ds_add(int op, MATRIX dst, MATRIX lo, MATRIX hi)
{
char   *f_dt = "++----++--++++--";
char   *s_dt = "++-++----+++-++-";
US      x, y, z, *zmax, *zmin;
BYTE   *s, *t;
int     fc, bb, cc, n, qadd, qs;

ss_slice_obj(&x, lo);
ss_slice_obj(&y, hi);
if (x.SDLEN == 0 || y.SDLEN == 0)
    return mkobj(dst, TYPT, 0, 0);

if (op == '-') fc = 8;
else if (op == '+') fc = 0;
else return ERROR;

if (*x.ADDR == '-') {    /* sign of LO */
    fc += 4;
    ss_eat1 (&x);
    }

if (*y.ADDR == '-') {    /* sign of HI */
    fc += 2;
    ss_eat1 (&y);
    }

if (ss_comp(&x, &y)) {  /* test |LO|>|HI| */
    fc += 1;
    zmax = &x;
    zmin = &y;
    }
else {                  /* |LO| <= |HI| */
    zmax = &y;
    zmin = &x;
    }

qadd = ('+' == f_dt[fc]);       /* read dispatch tables */
qs   = ('-' == s_dt[fc]);

if (qadd) {   /* add slice */
    cc = ss_add_to (&zmax, &zmin, 0);
    n = zmax->SDLEN + (cc>0) + qs;
    mkobj(dst, TYPT, n, n);
    t = TEXT_PTR(dst);
    if (qs) *t++ = '-';
    if (cc) *t++ = (0X30 | cc);
    for (s = zmax->ADDR, n = zmax->SDLEN; 0<n--; s++, t++) *t = 0X30 | *s;
    }
else {          /* subtract slice */
    bb = ss_sub_from (&zmax, &zmin, 0);
    if (bb) return ERROR;       /* this should not happen */
    ss_dropz(&z, zmax);
    if (z.SDLEN == 1 && 0 == *z.ADDR)
        return mk_ck(dst, 0);
    n = z.SDLEN + qs;
    mkobj(dst, TYPT, n, n);
    t = TEXT_PTR(dst);
    if (qs) *t++ = '-';
    for (s = z.ADDR, n = z.SDLEN; 0<n--; s++, t++) *t = 0X30 | *s;
    }
return OK;
}

/*      z<-z+x*10^offset */

int     ss_add_to(US *z, US *x, int offset)
{
int     a, cc, j;
BYTE   *s, *t;
if (x->SDLEN+offset > z->SDLEN) return ERROR;
t= z->ADDR + z->SDLEN - 1 - offset;
for (s = x->ADDR+x->SDLEN-1, j = cc = 0; j < z->SDLEN-offset ; j++) {
    a = cc + *t;
    if (j< x->SDLEN) a += *s--;
    cc = (a >= 10);
    if (cc) a -= 10;
    *t-- = a;
    }
return cc;
}

/*      z <- z- x/10^n        */

int     ss_sub_from(US *z, US *x, int offset)
{
int     a, bb, j;
BYTE   *s, *t;
if (x->SDLEN-offset > z->SDLEN) return ERROR;
t = z->ADDR + z->SDLEN -1;
for (s = x->ADDR+x->SDLEN-1-offset, j = bb = 0; j < z->SDLEN; j++) {
    a = *t - bb;
    if (j < x->SDLEN-offset) a -= *s--;
    bb = (a < 0);
    if (bb) a += 10;
    *t-- = a;
    }
return bb;
}

int     ss_dropz(US *z, US *x)  /* drop zeros. OK to use z=x */
{
int     i;
US      zz;
for (zz.ADDR = x->ADDR, i = 0; i < x->SDLEN; i++) {
    if (*zz.ADDR & 0X0F) break;
    else zz.ADDR++;
    }
zz.SDLEN = x->SDLEN -i;

if (zz.SDLEN == 0) {
    zz.ADDR = x->ADDR+x->SDLEN-1;
    zz.SDLEN = 1;
    }
*z = zz;
return OK;
}

mk_ck(MATRIX dst, int c)        /* one digit scalar */
{
BYTE   *t;
int     qs =(c<0);
if (mkobj(dst, TYPT, 1+qs, 1+qs)) return ERROR;
t=TEXT_PTR(dst);
if (qs) { *t++ = '-'; c = -c; }
*t = 0X30 | c;
return OK;
}

ss_strip_bits(BYTE *si, int n)  /* strip bits from ASCII digits */
{
for (; 0 < n--; si++) *si &= 0X0F;
return OK;
}

ds_trim(MATRIX dst, MATRIX src) /* delete leading zeros */
{
BYTE   *si = TEXT_PTR(src);
int     nx = MSIZE(src->ADDR);
BYTE   *di;
int     cnt, i, n;

for (cnt = 0, i= nx; 0 < i--; si++) {
    if (*si & 0X0F) break;
    else cnt++;
    }

n = nx - cnt;
if (n == 0) return mk_ck(dst, 0);

if (mkobj(dst, TYPT, n, n)) return ERROR;
for (di =TEXT_PTR(dst), si = TEXT_PTR(src) + cnt; 0 < n--; di++, si++)
    *di = *si | 0X30;

return OK;
}

ds_uminus(MATRIX dst, MATRIX src)    /* unary minus */
{
US      x;
BYTE   *s, *t;
int     n, qs;
ss_slice_obj(&x, src);
if (x.SDLEN == 0) return mkobj(dst, TYPT, 0, 0);
qs = ('-' == *x.ADDR);
if (qs) {
    ss_eat1(&x);
    return mk_sk(dst, TYPT, &x);
    }
mkobj(dst, TYPT, 1+x.SDLEN, 1+x.SDLEN);
t = TEXT_PTR(dst);
*t++ = '-';
for (n = x.SDLEN, s = x.ADDR; 0<n--; s++, t++) *t = *s | 0X30;
return OK;
}

int as_comp(MATRIX lo, MATRIX hi)       /* test lo > hi */
{
extern  int     memcmp();
int     nx = MSIZE(lo->ADDR);
int     ny = MSIZE(hi->ADDR);
BYTE   *si = TEXT_PTR(lo);
BYTE   *zi = TEXT_PTR(hi);
int     k;

if (nx > ny)      k =  1;       /* compute sign lo-hi */
else if (nx < ny) k = -1;
else {
    k = memcmp(si, zi, nx);
    if (k > 0) k = 1;
    else if (k < 0) k = -1;
    }
return k;
}

ds_mult(MATRIX dst, MATRIX lo, MATRIX hi)       /* multiply */
{
extern  BYTE* malloc();
US      x, y, z, tmp;
BYTE   *t, *xi, *yi;
int     j, cc, k, nd, n, sx;
int     n_sign = 1, minz, maxz;

ss_slice_obj(&x, lo);
ss_slice_obj(&y, hi);
if (*x.ADDR == '-') {       /* sign of lo */
    n_sign = -n_sign;
    ss_eat1(&x);
    }

if (*y.ADDR == '-') {       /* sign of hi */
    n_sign = -n_sign;
    ss_eat1(&y);
    }

if (x.SDLEN*y.SDLEN == 0) return mkobj(dst, TYPT, 0, 0);
if (ss_is_scalar(&x, '0') || ss_is_scalar(&y, '0')) return mk_ck(dst, 0);

us_strip_bits(&x, t, n);
us_strip_bits(&y, t, n);

z.SDLEN = n = x.SDLEN+y.SDLEN;
z.ADDR = (BYTE*) malloc(n);
if (z.ADDR == NULL) return ERR_MEM;
ss_clear(&z);
t  = z.ADDR + n-1;

if (x.SDLEN > y.SDLEN)  { minz = y.SDLEN; maxz = x.SDLEN;}
else                    { minz = x.SDLEN; maxz = y.SDLEN;}

/* optimise inner loop */

for (k = 1, cc = 0; k <= n-1; k++) {
    if (k < minz) nd = k;
    else if (k > maxz) nd = n-k;
    else nd = minz;
    xi = x.ADDR + x.SDLEN-1;
    if (k > y.SDLEN) xi -= (k - y.SDLEN);
    yi = y.ADDR;
    if (k < y.SDLEN) yi += (y.SDLEN - k);
    for (sx = cc, j = 0; j < nd; j++) {
        sx += (*xi--) * (*yi++);
        }
    cc = sx/10;
    *t-- = '0' + sx-10*cc;
    }
*t = '0' + cc;

x = z;          /* create object for result */
if (cc == 0) ss_eat1(&x);
n = x.SDLEN + (n_sign < 0);
if (mkobj(dst, TYPT, n, n)) return ERR_MEM;
t = TEXT_PTR(dst);
if (n_sign < 0) *t++ = '-';
memcpy(t, x.ADDR, x.SDLEN);
free(z.ADDR);           /* must do so here */
return OK;
}

/* modified March 2006 to handle negative values of LO */

ds_divmod(int op, MATRIX dst, MATRIX lo, MATRIX hi)     /* Z<-LO op HI */
{
extern  MATRIX  pm_sym;
MATRIX  sys_nlink();
MATRIX  ds_t9();
MATRIX  x9 = NULL;      /* multiplication table */
MATRIX  xr = NULL;      /* remainder */
MATRIX  xq = NULL;      /* quotient */
int     nx = MSIZE(lo->ADDR);
int     ny = MSIZE(hi->ADDR);
BYTE   *di, *si, *t9, *zq;
int     dx, i, ip, k, kz, n, nc9, nt, nq;
int     sl, sh;

if (nx == 0 || ny == 0)
    return mkobj (dst, TYPT, 0, 0);

if (op == '%') {
    xq = dst;
    xr = sys_nlink(pm_sym, "$REMAINDER", NULL, 0);
    }
else if (op == '#') {
    xq = sys_nlink(pm_sym, "$QUOTIENT", NULL, 0);
    xr = dst;
    }
else return ERROR;

sl = ('-' == *TEXT_PTR(lo));
sh = ('-' == *TEXT_PTR(hi));
if (sl) {               /* truncate sign */
    nx--;
    fx_take(dst, lo, 1, -nx);
    sa_swap(lo, dst);
    }
if (sh) {               /* truncate sign */
    if (op == '#') return ERROR;
    ny--;
    fx_take(dst, hi, 1, -ny);
    sa_swap(hi, dst);
    }

k = as_comp(lo, hi);

if (k == 0) {
    return mk_ck(xr, 0) || mk_ck(xq, (sh == sl) ? 1: -1);
    }
else if (k < 0) {
    mk_ck(xq, 0);
    if (sl) ds_minus(xr, hi, lo);
    else sasa (xr, lo);
    return OK;
    }

if (ny == 1) {          /* trivial case. divide by 0 or 1 */
    si = TEXT_PTR(hi);
    if (*si == '0') return ERROR;
    else if (*si == '1') {
        return mk_ck(xr, 0) ||
        (sl == sh) ? sasa(xq, lo) : ds_uminus(xq, lo);
        }
    else return ds_idiv(xq, lo, *si, &k, (sh != sl)) || mk_ck(xr, k);
    }

/* really difficult stuff */

ss_strip_bits(TEXT_PTR(lo), nx);
ss_strip_bits(TEXT_PTR(hi), ny);

x9  = ds_t9("$QR_T9", hi);
t9  = TEXT_PTR(x9);
nc9 = MCOLS(x9->ADDR);
nq  = nx-ny+1;

if (mkobj(xq, TYPT, nq, nq)) return ERROR;
zq = TEXT_PTR(xq);
for (i = 0; i < nq; i++) zq[i] = '0';

for (di = TEXT_PTR(lo);;) {
    si = TEXT_PTR(hi);
    dx = MINDX(lo->ADDR);
    kz = (nx - dx) - ny;
    if (kz < 0) break;          /* test hi < lo */
    else if (kz == 0 && 0 > memcmp(di + dx, si, ny)) break;
    kz -= (0 < memcmp(si, di + dx, ny));        /* decimal place */

    for (ip = 9, i = 0; i < 9; i++) {   /* get the correct digit */
        si = t9 + i*nc9;
        nt = nc9;
        if (*si == 0) {
            si++;
            nt--;
            }
        if ((nx-dx == kz+nt && 0 < memcmp(si, di + dx, nt)) ||
             nx-dx <  kz+nt) {
            ip = i;
            break;
            }
        }

    si = t9 + (ip-1)*nc9;
    nt = nc9;
    if (*si == 0) {
        si++;
        nt--;
        }
    zq[nq-(kz+1)] = '0'+ip;
    ds_decz(lo, si, nt, kz);
    }

k = MINDX(lo->ADDR);
n = nx - k;
if (n == 0)  mk_ck(xr, 0);
else {
     if (mkobj(xr, TYPT, n, n)) return ERROR;
     for (si = TEXT_PTR(xr), di += k; 0 < n--; si++, di++) *si = 0X30 | *di;
     }
sa_swap(lo, xq);
ds_trim(xq, lo);
if (sl != sh) { /* compute xr = hi -xr */
     sa_swap(lo, xr);
     ds_minus(xr, hi, lo);
     ds_uminus(lo, xq);
     sa_swap(xq, lo);
     }
return OK;
}

/* dst<- dst-(10**nz)*(s, ns) */

ds_decz(MATRIX dst, BYTE *src, int n, int nz)  /* compute in situ */
{
int     nx = MSIZE(dst->ADDR);
int     dx = MINDX(dst->ADDR);
BYTE   *si, *di;
int     bb, j, x;

nx = nx - dx;                                   /* precision of dst */
di = TEXT_PTR(dst) + dx + (nx - 1) - nz;        /* skip nz zeros */
si = src + n - 1;

for (nx -= nz, bb = 0; 0 < nx--; di--) {
    x = (*di) - bb;
    if (0 < n--) x -= (*si--);
    if (x < 0) {
        bb = 1;
        x += 10;
        }
    else bb = 0;
    *di = x;
    }

/*      count leading zeros */

for (j = 0, di = TEXT_PTR(dst), nx = MSIZE(dst->ADDR); 0 < nx--; di++) {
    if (*di) break;
    else j++;
    }

MINDX(dst->ADDR) = j;
return OK;
}

MATRIX  ds_t9 (BYTE *name, MATRIX src)  /* multiplication table */
{
extern  MATRIX  pm_sym;
MATRIX  sys_nlink();
MATRIX  p;
int     nx = MSIZE(src->ADDR);
BYTE   *si, *di, *zi;
int     cc, j, k, n, x;

p = sys_nlink(pm_sym, name, NULL, 0);
n = nx+1;
if (p == NULL || mkobj(p, 'T', 9*n, n))  return NULL;
si = TEXT_PTR(src);
di = TEXT_PTR(p);
memcpy(di+1, si, nx);
ss_strip_bits(di, n);
for (k = 2, di += n; k < 10; k++, di += n) {    /* src * 'k' */
    zi = di + n  - 1;
    si = TEXT_PTR(src) + nx - 1;
    for (cc = j = 0; j < nx; j++, si--, zi--) {
        x = cc + k * (*si & 0X0F);
        *zi =  x % 10;
        cc = x / 10;
        }
    *zi = cc;
    }
return p;
}

/* dst <- src % val */
ds_idiv(MATRIX dst, MATRIX src, int k, int *r, int qs)
{
BYTE   *si = TEXT_PTR(src);
int     nx = MSIZE(src->ADDR);
int     x  = si[0] & 0X0F;
BYTE   *di;
int     cc, n;

k = k & 0X0F;
n = nx - (k > x);
if (mkobj(dst, TYPT, n+qs, n+qs)) return ERROR;

di = TEXT_PTR(dst);
if (qs) *di++ = '-';

if (k > x) {
    cc = x;
    si++;
    nx--;
    }
else    cc = 0;

for (; 0 < nx--; di++, si++) {
    x = 10*cc + (*si & 0X0F);
    *di = '0' + x / k;
    cc  = x % k;
    }
*r = (qs) ? k-cc : cc;   /* remainder */
return OK;
}

int     ds_vmod(MATRIX dst, MATRIX lo, MATRIX hi)
{
BYTE   *s  = TEXT_PTR(lo);
int     nc = MSIZE(lo->ADDR);
int    *iy = INT_PTR(hi);
int     n  = MSIZE(hi->ADDR);
BYTE   *t;
int     j, sgn = 0, z, *iz;
if (sasa(dst, hi)) return ERROR;
if ('-' == *s) {s++; nc--; sgn = 1;}
for (iz = INT_PTR(dst); 0 < n--; iy++, iz++) {
    for (t = s, j = nc, z = 0; 0 < j--; t++) {
        z = (10 * z + (*t & 0X0F)) % *iy;
        }
    *iz = (z && sgn) ? *iy - z : z;
    }
return OK;
}








