/*
 * PCPARC.C - parallel communications routines for PPC for distributed
 *          - or multitasked configuration
 *
 * Source Version: 2.0
 * Software Release #92-0043
 *
 */

#include "cpyright.h"

#include "ppc.h"

#ifdef HAVE_MPI

#include <mpi.h>

static MPI_Request
 *_PC_reqs = NULL;

static int
 nrq = 0,
 nrqx = 0;

#endif

static char
 server[MAXLINE];

static int
 server_port = 0,
 PC_n_nodes = 0;

static PROCESS
 *_PC_server_link = NULL;

#ifdef HAVE_MPI
# define HAVE_DPE
#endif

/*--------------------------------------------------------------------------*/

#ifdef HAVE_DPE

/*--------------------------------------------------------------------------*/

/* PC_PUSH_PENDING - push an I/O transaction onto the pending list */

static void PC_push_pending(pp, op, bf, type, ni, vr, req)
   PROCESS *pp;
   int op;
   char *bf, *type;
   size_t ni;
   byte *vr;
   byte *req;
   {PC_pending_msg *pm;

    pm = FMAKE(PC_pending_msg, "PC_PUSH_PENDING:pm");

    pm->bf     = bf;
    pm->type   = SC_strsavef(type, "PC_PUSH_PENDING:type");
    pm->nitems = ni;
    pm->vr     = vr;
    pm->oper   = op;
    pm->req    = pp->n_pending++;
    pm->nxt    = pp->pending;

    pp->pending = pm;

#ifdef HAVE_MPI

    {MPI_Request requ;

     requ = *(MPI_Request *) req;

     SC_REMEMBER(MPI_Request, requ, _PC_reqs, nrq, nrqx, 10);};

#endif

    return;}

/*--------------------------------------------------------------------------*/

#endif

/*--------------------------------------------------------------------------*/

/* PC_POP_PENDING - pop a completed pending message */

static void PC_pop_pending(pp, po, pbf, pty, pni, pvr)
   PROCESS *pp;
   int *po;
   char **pbf, **pty;
   size_t *pni;
   byte **pvr;
   {PC_pending_msg *pm;

    pm = pp->pending;

    if (pm != NULL)
       {pp->pending = pm->nxt;
	pp->n_pending--;

	*po  = pm->oper;
	*pbf = pm->bf;
	*pty = pm->type;
	*pni = pm->nitems;
	*pvr = pm->vr;

	SFREE(pm);};

#ifdef HAVE_MPI

    _PC_reqs[--nrq] = 0;

#endif

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* PC_OPEN_GROUP - open a copy of the named executable on each available
 *               - node
 */

int PC_open_group(argv, pn)
   char **argv;
   int *pn;
   {int i, argc, offs;
    char **args, s[MAXLINE], *t;
    PROCESS *pp;

#ifdef HAVE_PROCESS_CONTROL

    PC_n_nodes = 0;

    offs = 3;
    argc = 0;
    while (argv[argc++] != NULL);

    args = FMAKE_N(char *, argc + offs, "PCPARC.C:args");

    args[0] = SC_strsavef("pcexec", "char*:PC_OPEN_GROUP:args0");
    args[1] = SC_strsavef("-r", "char*:PC_OPEN_GROUP:args1");
    args[2] = SC_strsavef("1", "char*:PC_OPEN_GROUP:args2");

    for (i = 0; i < argc; i++)
        args[i + offs] = argv[i];

    pp = PC_open(args, NULL, "rb+");

    PC_block(pp);
    PC_gets(s, MAXLINE, pp);
    strcpy(server, SC_strtok(s, ",", t));
    server_port = SC_stoi(SC_strtok(NULL, ",\n", t));
    PC_n_nodes  = SC_stoi(SC_strtok(NULL, ",\n", t));
    _PC_debug   = SC_stoi(SC_strtok(NULL, ",\n", t));

    for (i = 0; i < offs; i++)
        SFREE(args[i]);
    SFREE(args);

    if (pn != NULL)
       *pn = PC_n_nodes;

    _PC_server_link = pp;

#endif

    return(TRUE);}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* PC_OPEN_MEMBER - open a copy of the specified executable on this node */

PROCESS *PC_open_member(argv, pnn)
   char **argv;
   int *pnn;
   {PROCESS *pp;
    char *tok, t[MAXLINE], srvr[MAXLINE], *s;
    int port, argc;

#ifdef HAVE_PROCESS_CONTROL

/* if the server is the parent of this process, argv will have the
 * server name and port number just after the first NULL item in argv
 */
    srvr[0] = '\0';
    port    = -1;
    for (argc = 0; argv[argc] != NULL; argc++);

    strncpy(t, argv[--argc], MAXLINE-1);
    t[MAXLINE-1] = '\0';
    tok = SC_strtok(t, ":", s);
    if (strcmp(tok, "HOST") == 0)
       {strcpy(srvr, SC_strtok(NULL, ",\n", s));
	port = SC_stoi(SC_strtok(NULL, ",\n", s));
        argv[argc] = NULL;};

    pp = PC_mk_process(argv, "rb+", PC_CHILD);

# ifdef HAVE_MPI

/* initialize MPI */
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &SC_comm_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &SC_comm_size);

# else

    if (pnn != NULL)
       {if ((srvr[0] == '\0') || (port < 0))
	   {PC_open_group(argv, pnn);

	    strcpy(srvr, server);
	    port = server_port;

	    pp->in  = _PC_server_link->in;
	    pp->out = _PC_server_link->out;}

       else
	  {pp->in  = 0;
	   pp->out = 1;};

	PC_block(pp);
	PC_gets(t, MAXLINE, pp);

	sscanf(t, "%d,%d,%d\n", &pp->acpu, pnn, &_PC_debug);

	PC_printf(pp, "%s,%d,%d\n", srvr, port, getpid());
	pp->data = PC_init_client(srvr, port);

	SC_comm_size = *pnn;};

# endif

/* conditional diagnostic messages */
    if (_PC_debug)
       {sprintf(t, "PC_clnt_log.%d", (int) getpid());
	_PC_diag = fopen(t, "w");
	fprintf(_PC_diag, "\n\n   Node #%d at %s:%d.%d\n",
		pp->acpu, srvr, port, pp->data);
	fflush(_PC_diag);};

#else
	pp = NULL;
#endif

    return(pp);}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* PC_CLOSE_MEMBER - close the member process */

void PC_close_member(pp)
   PROCESS *pp;
   {

#ifdef HAVE_PROCESS_CONTROL

# ifdef HAVE_MPI

    MPI_Finalize();
    PC_close(pp);

# else

    PC_close(pp);

# endif

/* conditional diagnostic messages */
    if (_PC_debug)
       {fclose(_PC_diag);};

#endif

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* _PC_PUT_DATA - put the data out and return the confirmation info */

static int _PC_put_data(pp, bf, type, ni, nb, dn, pni, types, pi)
   PROCESS *pp;
   char *bf, *type;
   size_t ni;
   int nb, dn;
   long *pni;
   char *types;
   int *pi;
   {char *pbf, reply[MAXLINE];
    int nbo, data;

    PC_printf(pp, "%c,%s,%ld,%d\n", PC_FWRITE, type, ni, dn);

    data = pp->data;
    pbf  = bf;
    while (nb > 0)
       {PC_buffer_data_in(pp);
	nbo = write(data, pbf, nb);
	if (nbo < 0)
	   continue;
	else if (_PC_debug)
	   {fprintf(_PC_diag, ".%d", nbo);
	    fflush(_PC_diag);};
	nb  -= nbo;
	pbf += nbo;};

    PC_block(pp);
    PC_gets(reply, MAXLINE, pp);
    sscanf(reply, "%ld,%s,%d\n", pni, types, pi);

    return(TRUE);}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* _PC_EXTRACT_FILTER_INFO - extract the message description information
 *                         - from the message filter array
 */

static void _PC_extract_filter_info(filt, pti, pit, phn, pdn, pdi,
				    pbs, pbz, pnn, pnl, pnp, ppl)
   int *filt, *pti, *pit, *phn, *pdn, *pdi;
   int *pbs, *pbz, *pnn, **pnl, *pnp, **ppl;
   {int type_index, ityp, host_node, default_node, default_id;
    int block_state, buffer_size, more, nn, np;
    int *nl, *pl, *p;
			    
    type_index   = 0;
    default_node = -1;
    default_id   = -1;
    host_node    = -1;
    block_state  = TRUE;
    buffer_size  = 0;

    ityp = 0;
    nl   = NULL;
    pl   = NULL;
    if (filt != NULL)
       {p = filt;
	more = TRUE;
	while (more)
	   {switch (*p++)
	       {case PC_MATCH_TYPE :
		     ityp |= *p++;
		     ityp |= ((type_index << 16) & PC_TYPE_MASK);
		     break;

	        case PC_MATCH_TAG :
		     ityp |= (*p++ & PC_TAG_MASK);
		     break;

	        case PC_MATCH_NODE :
                     nl = p;
                     nn = *p++;
                     p += nn;
		     break;

	        case PC_MATCH_PID :
		     pl = p;
                     np = *p++;
                     p += np;
		     break;

	        case PC_BLOCK_STATE :
                     block_state = *p++;
                     break;

	        case PC_BUFFER_SIZE :
                     buffer_size = *p++;
                     break;

	        default :
		     more = FALSE;
		     break;};};};

    *pti = type_index;
    *pit = ityp;
    *pbs = block_state;
    *pbz = buffer_size;
    *pnn = nn;
    *pnl = nl;
    *pnp = np;
    *ppl = pl;

    if (phn != NULL)
       *phn = host_node;

    if (phn != NULL)
       *phn = default_node;

    if (pdi != NULL)
       *pdi = default_id;

    return;}

/*--------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

/* PC_SIZE_MESSAGE - return the number of items of the specified type
 *                 - in a message 
 *                 -   SP   - destination processor index
 *                 -   TYPE - type of items
 *                 -   TAG  - message tag
 */

long PC_size_message(sp, type, tag)
   int sp;
   char *type;
   int tag;
   {int ni;

#ifdef HAVE_MPI

    MPI_Status stat;

    MPI_Probe(sp, tag, MPI_COMM_WORLD, &stat);
    MPI_Get_count(&stat, MPI_CHAR, &ni);

#else

    ni = 0;

#endif

    return((long) ni);}

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

/* PC_GLMN_MESSAGE - return the global minimum of some quantity */

REAL PC_glmn_message(vi)
   double vi;
   {REAL vo;

#ifdef HAVE_MPI

    MPI_Allreduce(&vi, &vo, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);

#else

   {int i, ip, np;
    REAL vt;
    static int sp[] = {PC_MATCH_NODE, -1,
		       PC_MATCH_TAG, 0,
		       0};

    ip = PC_get_processor_number();
    np = PC_get_number_processors();

    sp[1] = -1;
    PC_out(&vi, SC_REAL_S, 1, NULL, sp);

    vo = vi;

    for (i = 0; i < np; i++)
        {if (i == ip)
	    continue;

	 sp[1] = i;
	 PC_in(&vt, SC_REAL_S, 1, NULL, sp);

	 vo = min(vo, vt);};};

#endif

    return(vo);}

/*---------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* PC_OUT - write data out to the filtered list of nodes
 *        - this does a message passing system's SEND command
 *        - or a LINDA-like PUT
 */

long PC_out(vr, type, ni, pp, filt)
   byte *vr;
   char *type;
   size_t ni;
   PROCESS *pp;
   int *filt;
   {int i, ityp, dn, nb, nbr, is;
    int block, buf_siz;
    int type_index, default_node, default_id, host_node;
    int *nl, *pnl, *pl, nn, np;
    long nis, nib;
    char reply[MAXLINE], types[MAXLINE], *bf;
    PDBfile *vif, *tf;

    PC_ERR_TRAP(-1L);

    vif = pp->vif;

    _PC_extract_filter_info(filt,
			    &type_index, &ityp,
			    &host_node, &default_node, &default_id,
			    &block, &buf_siz,
			    &nn, &nl, &np, &pl);
			    
    if (nl == NULL)
       {nn = 1;
	pnl = &default_node;}
    else
       {pnl = nl;
	nn  = *pnl++;};

/* conditional diagnostic messages */
    if (_PC_debug)
       {fprintf(_PC_diag, "   Write");
	fprintf(_PC_diag, " Attempt(%d,%s,%d)",	(int) ni, type, pp->acpu);
	fflush(_PC_diag);};

/* get the buffer size */
    nbr = PD_sizeof(vif, type, ni, vr);
    nb  = (buf_siz > 0) ? buf_siz : nbr;
    if (nb < nbr)
       PC_error("SPECIFIED BUFFER SIZE TOO SMALL - PC_OUT");

/* allocate the buffer */
    bf = FMAKE_N(char, nb, "PC_OUT:bf");

/* convert the data into a message buffer */
    tf  = PN_open(vif, bf);
    nis = PN_write(tf, type, ni, vr) ? ni : 0;
    PN_close(tf);

/* send the message now */

#ifdef HAVE_MPI

   {MPI_Request requ;

    for (i = 0; i < nn; i++)
        {dn = *pnl++;

	 if (dn == -1)
	    {dn = PC_get_processor_number();
	     MPI_Bcast(bf, nb, MPI_CHAR, dn, MPI_COMM_WORLD);
	     break;}

	 else if (block)
	    {MPI_Send(bf, nb, MPI_CHAR, dn, ityp,
		      MPI_COMM_WORLD);
	     SFREE(bf);}

	 else
	    {MPI_Isend(bf, nb, MPI_CHAR, dn, ityp,
		       MPI_COMM_WORLD, &requ);

	     PC_push_pending(pp, PC_WRITE_MSG,
			     bf, type, ni, vr, &requ);};};};

#else

    PC_unblock(pp);
    while (PC_gets(reply, MAXLINE, pp) != NULL);
    PC_block(pp);

    if (nn == -1)
       {for (dn = 0; dn < PC_n_nodes; dn++)
            _PC_put_data(pp, bf, type, ni, nb, dn, &nib, types, &is);}

    else
       {for (i = 0; i < nn; i++)
            {dn = *pnl++;
	     if (dn == PC_GROUP_LEADER)
  	        dn = host_node;

             _PC_put_data(pp, bf, type, ni, nb, dn, &nib, types, &is);};};

#endif

/* conditional diagnostic messages */
    if (_PC_debug)
       {fprintf(_PC_diag, " Sent(%ld,%s,%d)\n", nis, types, dn);
	fflush(_PC_diag);};

    return(nis);}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* PC_IN - read data in from the filtered list of nodes
 *       - this does a message passing system's RECV command
 *       - or a LINDA-like GET
 *       - return the number of items successfully read in
 */

long PC_in(vr, type, ni, pp, filt)
   byte *vr;
   char *type;
   size_t ni;
   PROCESS *pp;
   int *filt;
   {long nir, nb, nbt, nbr;
    int ip, nis, ityp, nn, np, *nl, *pl;
    int type_index, block, buf_siz, bs;
    char reply[MAXLINE], types[MAXLINE], *bf, *pbf;
    PDBfile *vif, *tf;

    vif = pp->vif;
    nir = ni;

    _PC_extract_filter_info(filt,
			    &type_index, &ityp,
			    NULL, NULL, NULL,
			    &block, &buf_siz,
			    &nn, &nl, &np, &pl);

    if (nl == NULL)
       ip = 0;
    else
       ip  = nl[1];

/* conditional diagnostic messages */
    if (_PC_debug)
       {fprintf(_PC_diag, "   Read");
	fflush(_PC_diag);};

/* get the buffer size and allocate it */
    if (buf_siz > 0)
       nb = buf_siz;
    else
       nb = PC_size_message(ip, type, ityp);

    bf = FMAKE_N(char, nb, "PC_IN:bf");

#ifdef HAVE_MPI

    if (block)
       {MPI_Status stat;

	MPI_Recv(bf, nb, MPI_CHAR, ip, ityp,
		 MPI_COMM_WORLD, &stat);}

    else
       {MPI_Request requ;

	MPI_Irecv(bf, nb, MPI_CHAR, ip, ityp,
		  MPI_COMM_WORLD, &requ);

	PC_push_pending(pp, PC_READ_MSG,
			bf, type, ni, vr, &requ);};

#else

    strcpy(types, type);

    if (block)
       PC_block(pp);
    else
       PC_unblock(pp);

    pbf = bf;
    nbr = nb;
    for (nbr = nb, bs = FALSE; nbr > 0; pbf += nbt, bs = block)
        {nbt = PC_buffer_data_out(pp, pbf, nbr, bs);
	 if (nbt > 0)
	    {if (_PC_debug)
	        {fprintf(_PC_diag, " Recv(%ld,%s)", nbt, types);
		 fflush(_PC_diag);};};

         nbr -= nbt;
         if (nbr > 0)
	    {PC_printf(pp, "%c,%s,%ld,%d\n", PC_FREAD,
		       SC_CHAR_S, nbr, block);

	     PC_gets(reply, MAXLINE, pp);
	     sscanf(reply, "%d,%s\n", &nis, types);

	     if (_PC_debug && block && (nis > 0))
	        {fprintf(_PC_diag, " Expect(%d,%s)", nis, types);
		 fflush(_PC_diag);};};

         if (!block)
            break;};

#endif

/* convert the message to the requested output data */
    if (block)
       {tf  = PN_open(vif, bf);
	nir = PN_read(tf, type, ni, vr);
	PN_close(tf);

	SFREE(bf);};

/* conditional diagnostic messages */
    if (_PC_debug)
       {if (nir > 0)
	   fprintf(_PC_diag, "\n");
	else
           fprintf(_PC_diag, " Nothing\n");
	fflush(_PC_diag);};

    return(nir);}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* PC_WAIT - wait for all pending communications to finish
 *         - complete the work, free the buffers, and
 *         - return the number of pendings honored
 */

long PC_wait(pp)
   PROCESS *pp;
   {int i, np, oper;
    char *type, *bf;
    byte *vr;
    size_t ni;
    PDBfile *vif, *tf;

    vif = pp->vif;

    np = pp->n_pending;

/* conditional diagnostic messages */
    if (_PC_debug)
       {fprintf(_PC_diag, "   Wait");
	fflush(_PC_diag);};

#ifdef HAVE_MPI

   {MPI_Status *stats;

    stats = FMAKE_N(MPI_Status, np, "PC_WAIT:stats");

    MPI_Waitall(np, _PC_reqs, stats);

    SFREE(stats);}

#endif

/* convert the message to the requested output data */
    for (i = 0; i < np; i++)
        {PC_pop_pending(pp, &oper, &bf, &type, &ni, &vr);

	 if (oper == PC_READ_MSG)
	    {tf = PN_open(vif, bf);
	     PN_read(tf, type, ni, vr);
	     PN_close(tf);};

	 SFREE(type);
	 SFREE(bf);};

/* conditional diagnostic messages */
    if (_PC_debug)
       fprintf(_PC_diag, "\n");

    return(np);}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* PC_SYNC_EXECUTION - synchronize the execution of the tasks */

void PC_sync_execution()
   {

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* PC_GET_NUMBER_PROCESSORS - return the number of processors in the 
 *                       - current simulation
 */

int PC_get_number_processors()
   {int n;

    n = SC_comm_size;

    return(n);}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* PC_GET_PROCESSOR_NUMBER - return the index of the current processor */

int PC_get_processor_number()
   {int n;

    n = SC_comm_rank;

    return(n);}

/*--------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
