/*
 * Copyright 1996,97 Thierry Bousch
 * Licensed under the Gnu Public License, Version 2
 *
 * $Id: Apoly.c,v 2.7 1997/04/17 21:11:27 bousch Exp $
 *
 * An alternative implementation of polynomials on a ring, maybe with
 * a denominator.
 */

#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "saml.h"
#include "saml-errno.h"
#include "saml-util.h"
#include "mnode.h"
#include "builtin.h"

typedef struct {
	s_mnode *coeff;		/* Coefficient */
	__u32 e[0];		/* Exponents */
} monomial;

typedef struct {
	struct mnode_header hdr;
	__u16 flags;
	__u16 nlit;
	int length;
	s_mnode *den;
	s_mnode *lit[0];
} apoly_mnode;

#define MONO1(p)	((monomial*) &p->lit[p->nlit])
#define MONOFFSET(n)	((2*sizeof(monomial)-1+(n)*sizeof(__u32)) / sizeof(monomial))
#define IGNORE_DEN	0x1
#define DENOM_ONE	0x2

static void apoly_free (apoly_mnode*);
static gr_string* apoly_stringify (apoly_mnode*);
static s_mnode* apoly_make (s_mnode*);
static s_mnode* apoly_add (apoly_mnode*, apoly_mnode*);
static s_mnode* apoly_mul (apoly_mnode*, apoly_mnode*);
static s_mnode* apoly_div (apoly_mnode*, apoly_mnode*);
static s_mnode* apoly_gcd (apoly_mnode*, apoly_mnode*);
static int apoly_notzero (apoly_mnode*);
static s_mnode* apoly_zero (apoly_mnode*);
static s_mnode* apoly_negate (apoly_mnode*);
static s_mnode* apoly_one (apoly_mnode*);
static s_mnode* literal2apoly (s_mnode*, apoly_mnode*);
static s_mnode* apoly2apoly (apoly_mnode*, apoly_mnode*);

s_mnode* apoly_diff (apoly_mnode*, apoly_mnode*);
s_mnode* apoly_subs (apoly_mnode*, apoly_mnode*, apoly_mnode*);
s_mnode* apoly_sylvester (apoly_mnode*, apoly_mnode*, apoly_mnode*);
int apoly_length (apoly_mnode*);
s_mnode* upoly_eval (std_mnode*, apoly_mnode*);

static unsafe_s_mtype MathType_Apoly = {
	"Alternative Polynomial",
	apoly_free, NULL, apoly_stringify,
	apoly_make, NULL,
	apoly_add, mn_std_sub, apoly_mul, apoly_div, apoly_gcd,
	apoly_notzero, NULL, NULL, mn_std_differ, NULL,
	apoly_zero, apoly_negate, apoly_one, NULL, NULL
};

static inline void u32copy (__u32* to, __u32* from, size_t n)
{
#ifdef __i386__
	asm volatile ("cld; rep; movsl" : /* no output */
	: "c" (n), "D" ((long)to), "S" ((long)from) /* inputs */
	: "cx", "di", "si", "memory" /* clobbered */);
#else
	/* Generic version of the above */
	while (n--)
		*to++ = *from++;
#endif
}

void init_MathType_Apoly (void)
{
	register_mtype(ST_APOLY, &MathType_Apoly);
	register_CV_routine(ST_LITERAL, ST_APOLY, literal2apoly);
	register_CV_routine(ST_APOLY, ST_APOLY, apoly2apoly);
}

int apoly_length (apoly_mnode *P)
{
	/* This is a back door for matrix_determinant() */
	return P->length;
}

static inline size_t apoly_size (int nlit, int terms)
{
	return sizeof(apoly_mnode)
		+ nlit * sizeof(mn_ptr)
		+ terms * MONOFFSET(nlit) * sizeof(monomial);
}

static inline apoly_mnode* apoly_new (int nlit, int terms)
{
	apoly_mnode* P;

	P = (apoly_mnode*) __mnalloc(ST_APOLY, apoly_size(nlit, terms));
	P->nlit = nlit;
	P->length = terms;
	return P;
}

static void apoly_free (apoly_mnode *P)
{
	int i, moff=MONOFFSET(P->nlit);
	monomial *m = MONO1(P);
	s_mnode **lit;

	unlink_mnode(P->den);
	for (i = P->length; i; i--, m += moff)
		unlink_mnode(m->coeff);
	for (lit = P->lit, i = P->nlit; i; i--, lit++)
		unlink_mnode(*lit);
	free(P);
}

static s_mnode* apoly_make (s_mnode *constant)
{
	apoly_mnode *P;
	monomial *m;
	int notzero = mnode_notzero(constant);

	P = apoly_new(0, !!notzero);
	P->flags = IGNORE_DEN;
	P->den = mnode_one(constant);
	if (notzero) {
		m = MONO1(P);
		m->coeff = copy_mnode(constant);
		/* No exponents */
	}
	return (mn_ptr) P;
}

static gr_string* apoly_stringify (apoly_mnode *P)
{
	gr_string *grs, *grsc;
	monomial *m;
	int i, left, nlit, swallow_the_one;
	__u32 expo;
	char *sexpo;

	grs = new_gr_string(0);
	if (P->length == 0) {
		grs = grs_append1(grs, '0');
		return grs;
	}
	left = P->length;
	nlit = P->nlit;
	m = MONO1(P);
	while (left) {
		grsc = mnode_stringify(m->coeff);
		/*
		 * Add a leading sign, if none is present
		 * FIXME: what if grsc->len is zero?
		 */
		if (grsc->s[0] != '+' && grsc->s[0] != '-')
			grsc = grs_prepend1(grsc, '+');
		/*
		 * If the coefficient is "+1" or "-1", remember to
		 * collapse the "1*" sequence when we meet a star.
		 */
		swallow_the_one = 0;
		if (grsc->len == 2 && grsc->s[1] == '1')
			swallow_the_one = 1;

		grs = grs_append(grs, grsc->s, grsc->len);
		free(grsc);
		for (i = 0; i < nlit; i++) {
			if ((expo = m->e[i]) == 0)
				continue;
			grs = grs_append1(grs, '*');
			if (swallow_the_one) {
				swallow_the_one = 0;
				grs->len -= 2;
			}
			grsc = mnode_stringify(P->lit[i]);
			grs = grs_append(grs, grsc->s, grsc->len);
			free(grsc);
			if (expo == 1)
				continue;
			grs = grs_append1(grs, '^');
			sexpo = u32toa(expo);
			grs = grs_append(grs, sexpo, strlen(sexpo));
		}
		m += MONOFFSET(nlit);
		--left;
	}
	return grs;
}

static int sorted_union_literals (
	s_mnode **list1, int len1,
	s_mnode **list2, int len2,
	int *newpos1, int *newpos2, s_mnode **list)
{
	int i1=0, i2=0, k=0;

	while (i1 < len1 && i2 < len2) {
		if (list1[i1] == list2[i2]) {
			list[k] = list1[i1];
			newpos1[i1++] = k;
			newpos2[i2++] = k++;
		}
		else if (list1[i1] < list2[i2]) {
			list[k] = list1[i1];
			newpos1[i1++] = k++;
		}
		else {
			list[k] = list2[i2];
			newpos2[i2++] = k++;
		}
	}
	while (i1 < len1) {
		list[k] = list1[i1];
		newpos1[i1++] = k++;
	}
	while (i2 < len2) {
		list[k] = list2[i2];
		newpos2[i2++] = k++;
	}
	return k;
}

static s_mnode* apoly_add (apoly_mnode *P1, apoly_mnode *P2)
{
	s_mnode **llist, *csum;
	int i, left1, left2, nlit, nocopy1, nocopy2;
	int *newpos1, *newpos2;
	__u32 *exponents1, *exponents2;
	apoly_mnode *P;
	monomial *m, *m1, *m2;

	if (P1->length == 0)
		return copy_mnode((mn_ptr)P2);
	if (P2->length == 0)
		return copy_mnode((mn_ptr)P1);
	newpos1 = alloca(P1->nlit * sizeof(int));
	newpos2 = alloca(P2->nlit * sizeof(int));
	llist = alloca((P1->nlit + P2->nlit) * sizeof(mn_ptr));
	nlit = sorted_union_literals(P1->lit, P1->nlit, P2->lit, P2->nlit,
		newpos1, newpos2, llist);
	if (nlit >= 65536)
		return mnode_error(SE_OVERFLOW, "apoly_add");
	nocopy1 = (P1->nlit == nlit);
	nocopy2 = (P2->nlit == nlit);
	/*
	 * If P is constant (nlit=0) then it will contain at most one term.
	 * Otherwise its length is bounded by P1->length+P2->length.
	 */
	P = apoly_new(nlit, nlit ? (P1->length + P2->length) : 1);
	P->flags = IGNORE_DEN;
	P->den = copy_mnode(P1->den);
	for (i = 0; i < nlit; i++)
		P->lit[i] = copy_mnode(llist[i]);
	m  = MONO1(P);
	m1 = MONO1(P1); left1 = P1->length;
	m2 = MONO1(P2); left2 = P2->length;
	exponents1 = exponents2 = NULL;
	if (!nocopy1) {
		/* Not all literals appear in P1 */
		exponents1 = alloca(nlit * sizeof(__u32));
		memset(exponents1, 0, nlit * sizeof(__u32));
	}
	if (!nocopy2) {
		/* Not all literals appear in P2 */
		exponents2 = alloca(nlit * sizeof(__u32));
		memset(exponents2, 0, nlit * sizeof(__u32));
	}
	while (left1 && left2) {
		if (nocopy1)
			exponents1 = m1->e;
		else {
			for (i = P1->nlit-1; i >= 0; i--)
				exponents1[newpos1[i]] = m1->e[i];
		}
		if (nocopy2)
			exponents2 = m2->e;
		else {
			for (i = P2->nlit-1; i >= 0; i--)
				exponents2[newpos2[i]] = m2->e[i];
		}
		/* Compare lexicographically the two exponent lists */
		for (i = 0; i < nlit; i++)
			if (exponents1[i] != exponents2[i])
				break;
		
		if (i == nlit) {
			/* Proportional terms */
			csum = mnode_add(m1->coeff, m2->coeff);
			if (mnode_notzero(csum)) {
			    m->coeff = csum;
			    u32copy(m->e, exponents1, nlit);
			    m += MONOFFSET(nlit);
			} else
			    unlink_mnode(csum);
			m1 += MONOFFSET(P1->nlit);
			m2 += MONOFFSET(P2->nlit);
			--left1; --left2;
		}
		else if (exponents1[i] < exponents2[i]) {
			m->coeff = copy_mnode(m1->coeff);
			u32copy(m->e, exponents1, nlit);
			m  += MONOFFSET(nlit);
			m1 += MONOFFSET(P1->nlit);
			--left1;
		}
		else {
			m->coeff = copy_mnode(m2->coeff);
			u32copy(m->e, exponents2, nlit);
			m  += MONOFFSET(nlit);
			m2 += MONOFFSET(P2->nlit);
			--left2;
		}
	}
	while (left1) {
		m->coeff = copy_mnode(m1->coeff);
		if (nocopy1)
			u32copy(m->e, m1->e, nlit);
		else {
			memset(m->e, 0, nlit * sizeof(__u32));
			for (i = P1->nlit-1; i >= 0; i--)
				m->e[newpos1[i]] = m1->e[i];
		}
		m  += MONOFFSET(nlit);
		m1 += MONOFFSET(P1->nlit);
		--left1;
	}
	while (left2) {
		m->coeff = copy_mnode(m2->coeff);
		if (nocopy2)
			u32copy(m->e, m2->e, nlit);
		else {
			memset(m->e, 0, nlit * sizeof(__u32));
			for (i = P2->nlit-1; i >= 0; i--)
				m->e[newpos2[i]] = m2->e[i];
		}
		m  += MONOFFSET(nlit);
		m2 += MONOFFSET(P2->nlit);
		--left2;
	}
	i = (m - MONO1(P)) / MONOFFSET(nlit);
	if (i < P->length) {
		P = realloc(P, apoly_size(nlit,i));
		P->length = i;
	}
	return (mn_ptr) P;
}

static s_mnode* apoly_mulmono (apoly_mnode *P,
		monomial *m2, s_mnode **lit2, int nlit2)
{
	apoly_mnode *Q;
	int i, nlit, left, overflow;
	s_mnode **llist;
	int *newpos1, *newpos2;
	__u32 *expo1, *expo2;
	monomial *m, *m1;

	if ((left = P->length) == 0)
		return copy_mnode((mn_ptr)P);
	if (!mnode_notzero(m2->coeff))
		return apoly_zero(P);
	newpos1 = alloca(P->nlit * sizeof(int));
	newpos2 = alloca(nlit2   * sizeof(int));
	llist = alloca((P->nlit + nlit2) * sizeof(mn_ptr));
	nlit = sorted_union_literals(P->lit, P->nlit, lit2, nlit2,
		newpos1, newpos2, llist);
	if (nlit >= 65536)
		return mnode_error(SE_OVERFLOW, "apoly_add");
	Q = apoly_new(nlit, left);
	Q->flags = IGNORE_DEN;
	Q->den = copy_mnode(P->den);
	for (i = 0; i < nlit; i++)
		Q->lit[i] = copy_mnode(llist[i]);
	expo2 = alloca(nlit * sizeof(__u32));
	memset(expo2, 0, nlit * sizeof(__u32));
	for (i = 0; i < nlit2; i++)
		expo2[newpos2[i]] = m2->e[i];
	m = MONO1(Q);
	overflow = 0;
	for (m1 = MONO1(P); left; --left, m1 += MONOFFSET(P->nlit)) {
		m->coeff = mnode_mul(m1->coeff, m2->coeff);
		if (!mnode_notzero(m->coeff)) {
			unlink_mnode(m->coeff);
			continue;
		}
		expo1 = m->e;
		u32copy(expo1, expo2, nlit);
		for (i = P->nlit-1; i >= 0; i--) {
			int i1 = newpos1[i];
			__u32 e1, e2, e3;
			
			e1 = expo1[i1];
			e2 = m1->e[i];
			e3 = e1 + e2;
			if (e3 < e1)
				overflow = 1;
			expo1[i1] = e3;
		}
		m += MONOFFSET(nlit);
	}
	if (overflow) {
		unlink_mnode((mn_ptr)Q);
		return mnode_error(SE_OVERFLOW, "apoly_mul");
	}
	Q->length = (m - MONO1(Q)) / MONOFFSET(nlit);
	return (mn_ptr) Q;
}

static s_mnode* apoly_split_mul (apoly_mnode *P1, monomial *list,
	int length, s_mnode **lit2, int nlit2)
{
	int lena, lenb, moff;
	s_mnode *tmp1, *tmp2, *tmp3;

	if (length == 1)
		return apoly_mulmono(P1, list, lit2, nlit2);
	lena = length / 2;
	lenb = length - lena;
	moff = MONOFFSET(nlit2);
	tmp1 = apoly_split_mul(P1, list, lena, lit2, nlit2);
	tmp2 = apoly_split_mul(P1, list+lena*moff, lenb, lit2, nlit2);
	tmp3 = mnode_add(tmp1, tmp2);
	unlink_mnode(tmp1); unlink_mnode(tmp2);
	return tmp3;
}

static s_mnode* apoly_mul (apoly_mnode *P1, apoly_mnode *P2)
{
	if (P1->length == 0)
		return copy_mnode((mn_ptr)P1);
	if (P2->length == 0)
		return copy_mnode((mn_ptr)P2);
	if (P1->length < P2->length) {
		apoly_mnode *tmp = P1;
		P1 = P2; P2 = tmp;
	}
	return apoly_split_mul(P1, MONO1(P2), P2->length, P2->lit, P2->nlit);
}

static s_mnode* apoly_divmono (apoly_mnode *P1, apoly_mnode *P2)
{
	int i, j, nlit=P1->nlit, left, moff;
	__u32 dexpo[nlit];
	apoly_mnode *Q;
	monomial *m, *m1;
	s_mnode *lit, *q;

	memset(dexpo, 0, nlit * sizeof(__u32));
	m = MONO1(P2);
	for (i = j = 0; i < P2->nlit; i++) {
		if (m->e[i] == 0)
			continue;
		for (lit = P2->lit[i]; j < nlit; j++)
			if (lit == P1->lit[j])
				goto literal_found;

		/* The literal doesn't appear in P1 */
		return apoly_zero(P1);
literal_found:
		dexpo[j] = m->e[i];
	}
	left = P1->length;
	Q = apoly_new(nlit, P1->length);
	Q->flags = IGNORE_DEN;
	Q->den = copy_mnode(P1->den);
	for (i = 0; i < nlit; i++)
		Q->lit[i] = copy_mnode(P1->lit[i]);
	m = MONO1(Q);
	m1 = MONO1(P1);
	moff = MONOFFSET(nlit);
	while (left) {
		for (i = 0; i < nlit; i++) {
			if (m1->e[i] < dexpo[i])
				goto next_term;
			m->e[i] = m1->e[i] - dexpo[i];
		}
		q = mnode_div(m1->coeff, MONO1(P2)->coeff);
		if (!mnode_notzero(q)) {
			unlink_mnode(q);
			goto next_term;
		}
		m->coeff = q;
		m += moff;
next_term:
		m1 += moff;
		--left;
	}
	i = (m - MONO1(Q)) / moff;
	if (i < Q->length) {
		Q = realloc(Q, apoly_size(nlit, i));
		Q->length = i;
	}
	return (mn_ptr) Q;
}

static s_mnode* apoly_div (apoly_mnode *P1, apoly_mnode *P2)
{
	int i1, i2, nlit1, nlit2;
	monomial *m1, *m2;
	s_mnode *tmpa, *tmpb, *lit, *q;
	apoly_mnode *Q;
	__u32 *dexpo;

	if (P2->length == 0)
		return mnode_error(SE_DIVZERO, "apoly_div");
	if (P1->length == 0)
		return copy_mnode((mn_ptr)P1);
	if (P2->length == 1)
		return apoly_divmono(P1, P2);
	/*
	 * General case; we divide the most-significant terms of P1, P2
	 * and recurse.
	 */
	nlit1 = P1->nlit;
	nlit2 = P2->nlit;
	m1 = MONO1(P1) + (P1->length-1)*MONOFFSET(nlit1);
	m2 = MONO1(P2) + (P2->length-1)*MONOFFSET(nlit2);
	dexpo = alloca(nlit1 * sizeof(__u32));
	u32copy(dexpo, m1->e, nlit1);
	for (i1 = i2 = 0; i2 < nlit2; i2++) {
		if (m2->e[i2] == 0)
			continue;
		for (lit = P2->lit[i2]; i1 < nlit1; i1++)
			if (P1->lit[i1] == lit && dexpo[i1] >= m2->e[i2])
				goto literal_found;
		/* Not found, or exponent greater in m2 */
		return apoly_zero(P1);
literal_found:
		dexpo[i1] -= m2->e[i2];
	}
	q = mnode_div(m1->coeff, m2->coeff);
	if (!mnode_notzero(q)) {
		/* Not perfect, but... */
		unlink_mnode(q);
		return apoly_zero(P1);
	}
	Q = apoly_new(nlit1, 1);
	Q->flags = IGNORE_DEN;
	Q->den = copy_mnode(P1->den);
	for (i1 = 0; i1 < nlit1; i1++)
		Q->lit[i1] = copy_mnode(P1->lit[i1]);
	m1 = MONO1(Q);
	m1->coeff = q;
	u32copy(m1->e, dexpo, nlit1);

	tmpa = apoly_mul(P2, Q);
	tmpb = mnode_sub((mn_ptr)P1, tmpa);
	unlink_mnode(tmpa);
	tmpa = mnode_div(tmpb, (mn_ptr)P2);
	unlink_mnode(tmpb);
	tmpb = mnode_add(tmpa, (mn_ptr)Q);
	unlink_mnode(tmpa);
	unlink_mnode((mn_ptr)Q);
	return tmpb;
}

static int apoly_notzero (apoly_mnode *P)
{
	return (P->length != 0);
}

static s_mnode* apoly_zero (apoly_mnode *P)
{
	apoly_mnode *Q = apoly_new(0, 0);
	Q->flags = IGNORE_DEN;
	Q->den = copy_mnode(P->den);
	return (mn_ptr) Q;
}

static s_mnode* apoly_negate (apoly_mnode *P)
{
	apoly_mnode *Q;
	int i, left, nlit, moff;
	monomial *m;

	nlit = P->nlit;
	left = P->length;
	Q = apoly_new(nlit, left);
	Q->flags = IGNORE_DEN;
	Q->den = copy_mnode(P->den);
	for (i = 0; i < nlit; i++)
		Q->lit[i] = copy_mnode(P->lit[i]);
	m = MONO1(Q);
	moff = MONOFFSET(nlit);
	memcpy(m, MONO1(P), left * moff * sizeof(monomial));
	/* Fix the coefficients */
	while (left) {
		m->coeff = mnode_negate(m->coeff);
		m += moff;
		--left;
	}
	return (mn_ptr) Q;
}

static s_mnode* apoly_one (apoly_mnode *P)
{
	apoly_mnode *Q = apoly_new(0, 1);
	s_mnode *one = mnode_one(P->den);
	Q->flags = IGNORE_DEN;
	Q->den = one;
	MONO1(Q)->coeff = copy_mnode(one);
	return (mn_ptr) Q;
}

static s_mnode* literal2apoly (s_mnode *lit, apoly_mnode* model)
{
	apoly_mnode *P;
	monomial *m;
	s_mnode *one;

	if (!model)
		return mnode_error(SE_ICAST, "literal2poly");
	P = apoly_new(1, 1);
	P->flags = IGNORE_DEN;
	P->lit[0] = copy_mnode(lit);
	m = MONO1(P);
	one = mnode_one(model->den);
	P->den = one;
	m->coeff = copy_mnode(one);
	m->e[0] = 1;
	return (mn_ptr) P;
}

static s_mnode* apoly2apoly (apoly_mnode *P, apoly_mnode* model)
{
	apoly_mnode *Q;
	monomial *m1, *m;
	s_mnode *mden, *prom;
	int i, left, nlit, moff;
	
	if (!model)
		return copy_mnode((mn_ptr)P);
	mden = model->den;
	left = P->length;
	nlit = P->nlit;
	moff = MONOFFSET(nlit);
	Q = apoly_new(nlit, left);
	Q->flags = IGNORE_DEN;
	Q->den = copy_mnode(mden);
	for (i = 0; i < nlit; i++)
		Q->lit[i] = copy_mnode(P->lit[i]);
	m1 = MONO1(P);
	m  = MONO1(Q);
	while (left) {
		prom = mnode_promote(m1->coeff, mden);
		if (prom->type == mden->type && mnode_notzero(prom)) {
			m->coeff = prom;
			u32copy(m->e, m1->e, nlit);
			m += moff;
		} else
			unlink_mnode(prom);
		m1 += moff;
		--left;
	}
	Q->length = (m - MONO1(Q)) / moff;
	return (mn_ptr) Q;
}

static s_mnode* extract_literal (apoly_mnode* alit)
{
	monomial* start = MONO1(alit);
	int i, pos;
	__u32 e;

	if (alit->length != 1)
		return NULL;
	pos = -1;
	for (i = alit->nlit - 1; i >= 0; i--) {
		e = start->e[i];
		/* The exponent should be zero or one */
		if (e >= 2)
			return NULL;
		if (e == 0)
			continue;
		/* The exponent is one. This should be the only one */
		if (pos >= 0)
			return NULL;
		/* Register where we found it */
		pos = i;
	}
	/* Now it's either a constant or a literal (times a constant) */
	if (pos < 0)
		return NULL;
	return alit->lit[pos];
}

static s_mnode* multiply_u32 (s_mnode* x, __u32 n)
{
	s_mnode *y, *z, *tmp;

	z = copy_mnode(x);
	if (n == 1)
		return z;
	y = mnode_zero(x);
again:
	if (n & 1) {
		tmp = mnode_add(y, z);
		unlink_mnode(y);
		y = tmp;
	}
	n = n >> 1;
	if (n) {
		tmp = mnode_add(z, z);
		unlink_mnode(z);
		z = tmp;
		goto again;
	}
	unlink_mnode(z);
	return y;
}

s_mnode* apoly_diff (apoly_mnode* P, apoly_mnode* alit)
{
	apoly_mnode *Q;
	monomial *m, *md;
	s_mnode *lit;
	int i, pos, nlit, left, moff, noncst;

	/*
	 * First check that alit is indeed a literal
	 */
	if (alit->hdr.type == ST_LITERAL)
		lit = (mn_ptr)alit;
	else if (alit->hdr.type == ST_APOLY) {
		lit = extract_literal(alit);
		if (lit == NULL)
			return mnode_error(SE_OODOMAIN, "apoly_diff");
	} else
		return mnode_error(SE_TCONFL, "apoly_diff");
	/*
	 * Find its position in P
	 */
	nlit = P->nlit;
	for (pos = nlit - 1; pos >= 0; pos--)
		if (P->lit[pos] == lit)
			break;
	if (pos < 0)
		return apoly_zero(P);
	/*
	 * Count how many terms contain this literal
	 */
	moff = MONOFFSET(nlit);
	m = MONO1(P);
	noncst = 0;
	for (left = P->length; left; left--, m+=moff) {
		if (m->e[pos])
			++noncst;
	}
	Q = apoly_new(nlit, noncst);
	Q->flags = IGNORE_DEN;
	Q->den = copy_mnode(P->den);
	for (i = nlit-1; i >= 0; i--)
		Q->lit[i] = copy_mnode(P->lit[i]);
	left = P->length;
	m = MONO1(P);
	md = MONO1(Q);
	for (left = P->length; left; left--, m+=moff) {
		__u32 e = m->e[pos];
		if (e == 0)
			continue;
		u32copy(md->e, m->e, nlit);
		md->e[pos] = e - 1;
		md->coeff = multiply_u32(m->coeff, e);
		if (!mnode_notzero(md->coeff)) {
			unlink_mnode(md->coeff);
			continue;
		}
		md += moff;
	}
	i = (md - MONO1(Q)) / moff;
	if (i < Q->length) {
		Q = realloc(Q, apoly_size(nlit, i));
		Q->length = i;
	}
	return (mn_ptr) Q;
}

static std_mnode* apoly_to_upoly (apoly_mnode* P, s_mnode* lit)
{
	std_mnode* up;
	monomial *m, *m1;
	int i, j, nlit, left, moff, pos, *nbterms, last_zero;
	__u32 maxexp;
	apoly_mnode *term;

	nlit = P->nlit;
	for (i = nlit - 1; i >= 0; i--)
		if (P->lit[i] == lit)
			break;
	if (i < 0) {
		/* Literal not found */
		up = mstd_alloc(ST_UPOLY, 1);
		up->x[0] = copy_mnode((mn_ptr)P);
		return up;
	}
	pos = i;
	left = P->length;
	moff = MONOFFSET(nlit);
	maxexp = 0;
	for (m = MONO1(P); left; --left, m += moff) {
		if (m->e[pos] > maxexp)
			maxexp = m->e[pos];
	}
	if (maxexp > 1000000) {
		/*
		 * Ouch, the polynomial is horribly sparse. At a minimum,
		 * the dense representation would require more than
		 * eight megabytes of heap on a 32-bit machine. Give up.
		 */
		return NULL;
	}
	nbterms = calloc(maxexp+1, sizeof(int));
	if (!nbterms)
		panic_out_of_memory();
	left = P->length;
	for (m = MONO1(P); left; --left, m += moff)
		++nbterms[m->e[pos]];
	up = mstd_alloc(ST_UPOLY, maxexp+1);
	last_zero = -1;
	for (i = 0; i <= maxexp; i++) {
		if (!nbterms[i]) {
			/*
			 * If we have already encountered a "hole", just
			 * copy the reference, instead of creating a new
			 * Apoly. This optimization should be particulary
			 * interesting on very sparse polynomials.
			 */
			if (last_zero >= 0) {
				up->x[i] = copy_mnode(up->x[last_zero]);
				continue;
			}
			last_zero = i;
		}
		term = apoly_new(nlit-1, nbterms[i]);
		term->flags = IGNORE_DEN;
		for (j = 0; j < pos; j++)
			term->lit[j] = copy_mnode(P->lit[j]);
		for (j = pos+1; j < nlit; j++)
			term->lit[j-1] = copy_mnode(P->lit[j]);
		term->den = copy_mnode(P->den);
		/*
		 * HACK ALERT! We steal term->length to count the number
		 * of terms reaped so far. Hopefully, the counter will
		 * eventually stabilize to nbterms[i].
		 */
		term->length = 0;
		up->x[i] = (s_mnode*) term;
	}
	free(nbterms);
	left = P->length;
	for (m = MONO1(P); left; --left, m += moff) {
		term = (apoly_mnode*) up->x[m->e[pos]];
		m1 = MONO1(term) + (term->length++) * MONOFFSET(nlit-1);
		m1->coeff = copy_mnode(m->coeff);
		for (j = 0; j < pos; j++)
			m1->e[j] = m->e[j];
		for (j = pos+1; j < nlit; j++)
			m1->e[j-1] = m->e[j];
	}
	return up;
}

static std_mnode* apoly_to_upoly_2 (apoly_mnode* P, apoly_mnode* e1)
{
	std_mnode *up;
	s_mnode *lit;
	apoly_mnode *term;
	__u32 maxexp, deg1, tmpd, *term_exp, *te_ptr;
	monomial *m, *m1, *m2;
	int i, j, left, moff, last_zero, *newpos, *nbterms;

	assert(e1->length == 1);
	m1 = MONO1(e1);
	/*
	 * First step: check that e1 is non-constant.
	 * Return (-1) otherwise.
	 */
	for (i = 0; i < e1->nlit; i++)
		if (m1->e[i])
			break;
	if (i == e1->nlit)
		return (std_mnode*)(-1);	/* how ugly. */
	/*
	 * Then, verify that all literals which appear in e1
	 * also appear in P, and store the positions.
	 */
	newpos = alloca((e1->nlit) * sizeof(int));
	for (i = 0; i < e1->nlit; i++) {
		newpos[i] = -1;
		if (m1->e[i] == 0)
			continue;
		lit = e1->lit[i];
		for (j = 0; j < P->nlit; j++)
			if (P->lit[j] == lit)
				break;
		if (j == P->nlit) {
			/*
			 * Hmm. So this literal doesn't appear in P, thus
			 * the decomposition is trivial in this case.
			 */
			up = mstd_alloc(ST_UPOLY, 1);
			up->x[0] = copy_mnode((mn_ptr)P);
			return up;
		}
		newpos[i] = j;
	}
	/*
	 * Now, determine the maximum power of e1 which divides a term of P
	 */
	maxexp = 0;
	left = P->length;
	moff = MONOFFSET(P->nlit);
	te_ptr = term_exp = malloc(left * sizeof(__u32));
	if (!term_exp)
		panic_out_of_memory();
	for (m = MONO1(P); left; left--, m += moff, te_ptr++) {
		deg1 = ~0;
		for (i = 0; i < e1->nlit; i++) {
			tmpd = m->e[newpos[i]] / m1->e[i];
			if (tmpd < deg1)
				deg1 = tmpd;
		}
		*te_ptr = deg1;
		if (deg1 > maxexp)
			maxexp = deg1;
	}
	if (maxexp > 1000000) {
		/* Too sparse! Give up. */
		free(term_exp);
		return NULL;
	}
	up = mstd_alloc(ST_UPOLY, maxexp+1);
	/*
	 * Count the number of terms in each bucket, and allocate them
	 */
	nbterms = calloc(maxexp+1, sizeof(int));
	if (!nbterms)
		panic_out_of_memory();
	for (i = 0; i < P->length; i++)
		++nbterms[term_exp[i]];
	last_zero = -1;
	for (i = 0; i <= maxexp; i++) {
		if (nbterms[i] == 0) {
			if (last_zero >= 0) {
				up->x[i] = copy_mnode(up->x[last_zero]);
				continue;
			}
			last_zero = i;
		}
		term = apoly_new(P->nlit, nbterms[i]);
		term->flags = IGNORE_DEN;
		for (j = 0; j < P->nlit; j++)
			term->lit[j] = copy_mnode(P->lit[j]);
		term->den = copy_mnode(P->den);
		/* Gross hack again */
		term->length = 0;
		up->x[i] = (s_mnode*) term;
	}
	free(nbterms);
	left = P->length;
	te_ptr = term_exp;
	for (m = MONO1(P); left; left--, m += moff, te_ptr++) {
		term = (apoly_mnode*) up->x[*te_ptr];
		m2 = MONO1(term) + (term->length++) * moff;
		m2->coeff = copy_mnode(m->coeff);
		memcpy(m2->e, m->e, (P->nlit)*sizeof(__u32));
		for (i = e1->nlit-1; i >= 0; i--)
			m2->e[newpos[i]] -= (*te_ptr) * m1->e[i];
	}
	free(term_exp);
	return up;
}

s_mnode* apoly_subs (apoly_mnode* P, apoly_mnode* e1, apoly_mnode* e2)
{
	s_mnode *lit, *result;
	std_mnode *up;

	if (e1->hdr.type != ST_APOLY || e2->hdr.type != ST_APOLY)
		return mnode_error(SE_TCONFL, "apoly_subs");
	if (e1->length != 1)
		return mnode_error(SE_OODOMAIN, "apoly_subs");

	lit = extract_literal(e1);
	if (lit) {
		/* e1 is simply a literal */
		up = apoly_to_upoly(P,lit);
	} else {
		/* e1 is an arbitrary monomial */
		up = apoly_to_upoly_2(P,e1);
	}
	if (up == (std_mnode*)(-1))
		return mnode_error(SE_OODOMAIN, "apoly_subs");
	if (up == NULL)
		return mnode_error(SE_TOO_SPARSE, "apoly_subs");
	result = upoly_eval(up, e2);
	unlink_mnode((mn_ptr)up);
	return result;
}

static s_mnode* apoly_gcd_cst (apoly_mnode *P, s_mnode *cst)
{
	s_mnode *gcd, *tmp;
	int left, moff;
	monomial *m;

	left = P->length;
	moff = MONOFFSET(P->nlit);
	gcd = copy_mnode(cst);
	for (m = MONO1(P); left; left--, m += moff) {
		tmp = mnode_gcd(gcd, m->coeff);
		unlink_mnode(gcd);
		gcd = tmp;
	}
	tmp = mnode_promote(gcd, (mn_ptr)P);
	unlink_mnode(gcd);
	return tmp;
}

static s_mnode* apoly_gcd (apoly_mnode *P1, apoly_mnode *P2)
{
	monomial *m;
	s_mnode *lit, *alit, *gcd, *Q1, *Q2, *Q12;

	if (P1->length == 0)
		return copy_mnode((mn_ptr)P2);
	if (P2->length == 0)
		return copy_mnode((mn_ptr)P1);
	if (P1->nlit == 0) {
		assert(P1->length == 1);
		m = MONO1(P1);
		return apoly_gcd_cst(P2, m->coeff);
	}
	if (P2->nlit == 0) {
		assert(P2->length == 1);
		m = MONO1(P2);
		return apoly_gcd_cst(P1, m->coeff);
	}
	/* Pick a literal. Any literal. We'll optimize later. */
	lit = P1->lit[0];
	Q1 = (mn_ptr) apoly_to_upoly(P1, lit);
	if (Q1 == NULL)
		return mnode_error(SE_TOO_SPARSE, "apoly_gcd");
	Q2 = (mn_ptr) apoly_to_upoly(P2, lit);
	if (Q2 == NULL) {
		unlink_mnode(Q1);
		return mnode_error(SE_TOO_SPARSE, "apoly_gcd");
	}
	Q12 = mnode_gcd(Q1, Q2);
	unlink_mnode(Q1); unlink_mnode(Q2);
	if (Q12->type == ST_VOID)
		return Q12;
	alit = mnode_promote(lit, (mn_ptr)P1);
	gcd = upoly_eval((std_mnode*)Q12, (apoly_mnode*)alit);
	unlink_mnode(Q12); unlink_mnode(alit);
	return gcd;
}

s_mnode* apoly_sylvester (apoly_mnode* P1, apoly_mnode* P2, apoly_mnode *alit)
{
	extern s_mnode* upoly_sylvester(std_mnode*,std_mnode*);
	s_mnode *lit, *result;
	std_mnode *P1a, *P2a;

	switch (alit->hdr.type) {
	    case ST_LITERAL:
		lit = (mn_ptr) alit;
		break;
	    case ST_APOLY:
		lit = extract_literal(alit);
		if (lit == NULL)
			return mnode_error(SE_OODOMAIN, "apoly_sylvester");
		break;
	    default:
		return mnode_error(SE_TCONFL, "apoly_sylvester");
	}
	P1a = apoly_to_upoly(P1, lit);
	if (!P1a)
		return mnode_error(SE_TOO_SPARSE, "apoly_to_upoly");
	P2a = apoly_to_upoly(P2, lit);
	if (!P2a) {
		unlink_mnode((mn_ptr)P1a);
		return mnode_error(SE_TOO_SPARSE, "apoly_to_upoly");
	}
	result = upoly_sylvester(P1a, P2a);
	unlink_mnode((mn_ptr)P1a);
	unlink_mnode((mn_ptr)P2a);
	return result;
}
