/* 
** Copyright (C) 1994, 1995 Enterprise Integration Technologies Corp.
**         VeriFone Inc./Hewlett-Packard. All Rights Reserved.
** Kevin Hughes, kev@kevcom.com 3/11/94
** Kent Landfield, kent@landfield.com 4/6/97
** 
** This program and library is free software; you can redistribute it and/or 
** modify it under the terms of the GNU (Library) General Public License 
** as published by the Free Software Foundation; either version 2 
** of the License, or any later version. 
** 
** This program is distributed in the hope that it will be useful, 
** but WITHOUT ANY WARRANTY; without even the implied warranty of 
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
** GNU (Library) General Public License for more details. 
** 
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software 
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA 
*/

/*
** All the nasty string functions live here.
*/

#include "hypermail.h"
#include <ctype.h>

/*
** Malloc() out a string, give it to whoever asked for it.
*/

char *strsav(char *s)
{
    char *p;

    p = (char *) emalloc(strlen(s) + 1);
    strcpy(p, s);
    return p;
}

char *strreplace(char *present, char *new)
{
    char *retval;
    int len;
    if (new == NULL) {
        free(present);
        return NULL;
    }
    len = strlen(new) + 1;
    retval = realloc(present, len);
    return strcpy(retval, new); /* CCC is safe, buffer allocated for it
                                   by length of string 'new' */
}

/*
** strcpymax() - copies a string, but max N bytes. It guarantees the
** destination string to be zero terminated. That is, if the destination
** buffer is X bytes, set N to X.
*/

void strcpymax(char *dest, char *src, int n)
{
  int i;
  if (n) {
    n--; /* decrease one to allow for the termination byte */
    for (i=0; *src && (i<n); i++)
      *dest++=*src++;
  }
  *dest=0;
}

/*
** strcasestr() - case insensitive strstr()
*/

char *strcasestr(char *haystack, char *needle)
{
  int nlen = strlen(needle);
  int hlen = strlen(haystack);

  int i;
  int max;

  max = hlen-nlen;

  for (i=0; i<=max; i++) {
    if (!strncasecmp(haystack, needle, nlen))
      return haystack;
    haystack++;
  }
  return NULL;
}


/*
** Strips the timezone information from long date strings, so more correct
** comparisons can be made between dates when looking for article replies.
*/

char *stripzone(char *date)
{
    int num;
    static char tmpdate[DATESTRLEN];

    if (!strcmp(date, NODATE))
        return (date);

    if (!strchr(date,':'))
        return (date);

    strcpy(tmpdate, date);
    num = strlen(tmpdate);
    while (tmpdate[num] != ':')
        num--;

    num += 3;
    while (tmpdate[num])
        tmpdate[num++] = '\0';

    return (tmpdate);
}

/*
** How many times does the character c appear in string s?
*/

int numstrchr(char *s, char c)
{
    int i;

    for (i = 0; *s != '\0'; s++) {
        if (*s == c)
            i++;
    }
    return i;
}

/*
** Assure a sting is nulled out before using it.
*/

void zero_out(char *str, int len)
{
     register int i;
     for (i = 0; i < len; i++)
          str[i] = '\0';
}


/*
** Grabs whatever happens to be between the outermost double quotes in a line.
** This is for grabbing the values in comments.
*/

char *getvalue(char *line)
{
	int i;
	int len;
	char *c, *d;
	static char tmpline[MAXLINE];

	tmpline[0] = '\0';
	c = (char *) strchr(line, '\"');
	d = (char *) strrchr(line, '\"');
	if (c == NULL)
		return tmpline;
	for (c++, i = 0, len = MAXLINE-1; *c && c != d && i < len; c++)
		tmpline[i++] = *c;
	tmpline[i] = '\0';

	return tmpline;
}

/*
** Grabs the value for a particular variable in a line - really,
** just about anything after an equals sign.
*/

char *getconfvalue(char *line, char *var, char *value)
{
    int i;
    char *c;
    static char tmpvalue[MAXLINE];

    if ((c = strcasestr(line, var)) != NULL) {

        /* Must be able to correctly handle vars which are 
        ** substrings of other vars (like hm_dir and hm_dirmode).
        ** Make sure that there is only whitespace between the
        ** end of 'var' and the equals sign.
        */

        c += strlen(var);

        while (isspace(*c))
                c++;
        if (*c != '=')
                return NULL;
        c++;

        while (isspace(*c) || *c == '\"')
                c++;

        if (*c == '\0')
                return NULL;

        /* Explain my twisted logic here... 
        ** I want to be able to have embedded '"' characters so we have
        ** advanced past the first one it it existed and then need to
        ** grab the whole line. Once the line is grabbed, strip off 
        ** spaces and the last '"' if it exists.
        */

        for (i = 0; *c != '\0' && *c != '\n' && i < MAXLINE; c++)
             tmpvalue[i++] = *c;

        tmpvalue[i--] = '\0';

        while (isspace(tmpvalue[i]) || tmpvalue[i] == '"') {
               tmpvalue[i] = '\0';
               --i;
        }

        strcpy(value, tmpvalue);

        return tmpvalue;
    }
    else
        return NULL;
}

/* 
** Get rid of Re:'s in a subject and strips spaces at the end
** of subjects. Make the subject index much less cluttered.
*/

char *unre(char *subject)
{
    int i;
    char *c, *s;
    static char tmpsubject[SUBJSTRLEN];

    s = subject;  

    /* Get rid of the normal Re:s */
    
    while ((c = strcasestr(s, "Re:")) != NULL) {
        c += 3;

        while (isspace(*c))
            c++;

        s = c;
    }

    /* Get rid of the weird Re:s */

    while ((c = strcasestr(s, "Re[2]:")) != NULL) {
        c += 6;

        while (isspace(*c))
            c++;

        s = c;
    }

    c = s; /* the first position after the last re: */

    strcpymax(tmpsubject, s, SUBJSTRLEN-1);

    i = strlen(tmpsubject) -1;

    while (isspace(tmpsubject[i]) || tmpsubject[i] == '\n') {
        tmpsubject[i] = '\0';
        i--;
    }

    return tmpsubject;
}

/*
** Only gets rid of one re: in a subject, so messages the subject is a reply to
** can be guessed.
*/

void oneunre(char *inreply, char *subject)
{
        int i;
        int len;
        char *c;

	if (strcasestr(subject, "Re: ") || strcasestr(subject, "Re[2]: ")) {
            c = (char *) strchr(subject, ':') + 2;
            if (*c && *c == ' ')
                 c++;
            for (i = 0, len = MAXLINE-1; *c && *c != '\n' && i < len; c++)
                 inreply[i++] = *c;
            inreply[i] = '\0';
        }
}

/*
** Removes the last carriage return from a line.
*/

char *rmcr(char *line)
{
	int i;
	int len;
	static char tmpline[MAXLINE];

	i = 0;
        len = MAXLINE-1;
	while (*line && i < len) {
		if (*line == '\n')
			*line = '\0';
		tmpline[i++] = *line;
		line++;
	}
	tmpline[i] = '\0';

	return tmpline;
}

/*
** Is a line in an article body part of a quoted passage?
*/

int isquote(char *line)
{
    char *lp;

    if (!line) 
        return(0);

    if (*line == '>') 
        return 1;

    lp = line;

#ifdef RECOGNIZE_SUPERCITE_QUOTES
    /*
    ** If there is a ":" in the first column, 
    ** it means the text is quoted. 
    */
    if (*lp == ':') {
        char *cp;
        /* 
        ** Check to make sure that smileys are not
        ** intrepreted as Supercite Quotes.
        */
        cp = lp+1;
        if (*cp && *cp != '-' && *cp != ']' && 
            *cp != '>' && *cp != '(' && *cp != ')' && 
            *cp != '^') 
            return 1;
    }
#endif

    while (*lp && (*lp == ' ' || *lp == '\t'))
        lp++;

    if (!(*lp)) 
        return 0;

#ifdef RECOGNIZE_SUPERCITE_QUOTES
    /*
    ** recognize citations in the form "  Jane>" 
    */

    while (*lp && ((*lp >= 'A' && *lp <= 'Z') || (*lp >= 'a' && *lp <= 'z')))
        lp++;
#endif

    if (*lp == '>') 
        return 1;

    return 0;
}

/*
** Converts <, >, and & to &lt;, &gt; and &amp;.
** It's ugly because it's fast.
*/

char *convchars(char *line)
{
	int i, j, len;
	static char tmpline[MAXLINE*4];

        len = sizeof(tmpline) -1;
	for (i = j = 0; line[i] && i < len; i++) {
		if (line[i] == '<') {
			tmpline[j++] = '&';
			tmpline[j++] = 'l';
			tmpline[j++] = 't';
			tmpline[j++] = ';';
		}
		else if (line[i] == '>') {
			tmpline[j++] = '&';
			tmpline[j++] = 'g';
			tmpline[j++] = 't';
			tmpline[j++] = ';';
		}
		else if (line[i] == '&') {
			tmpline[j++] = '&';
			tmpline[j++] = 'a';
			tmpline[j++] = 'm';
			tmpline[j++] = 'p';
			tmpline[j++] = ';';
		}
		else if (line[i] == '"') {
			tmpline[j++] = '&';
			tmpline[j++] = 'q';
			tmpline[j++] = 'u';
			tmpline[j++] = 'o';
			tmpline[j++] = 't';
			tmpline[j++] = ';';
		}
#if 0
		else if (line[i] == ' ') {
			tmpline[j++] = '&';
			tmpline[j++] = 'n';
			tmpline[j++] = 'b';
			tmpline[j++] = 's';
			tmpline[j++] = 'p';
			tmpline[j++] = ';';
		}
#endif
		else
			tmpline[j++] = line[i];
	}
	tmpline[j] = '\0';

	return tmpline;
}

/*
** Just the opposite of convchars().
** It's ugly because it's also fast.
*/

char *unconvchars(char *line)
{
    static char tmpline[MAXLINE];
    int i, j;

    for (i = j = 0; line[i]; i++)
        if (line[i] == '&') {
            if (line[i + 1] && line[i + 1] == 'l') {
                if (line[i + 2] && line[i + 2] == 't') {
                    if (line[i + 3] && line[i + 3] == ';') {
                        tmpline[j++] = '<';
                        i += 3;
                    }
                }
            }
            else if (line[i + 1] && line[i + 1] == 'g') {
                if (line[i + 2] && line[i + 2] == 't') {
                    if (line[i + 3] && line[i + 3] == ';') {
                        tmpline[j++] = '>';
                        i += 3;
                    }
                }
            }
            else if (line[i + 1] && line[i + 1] == 'a') {
                if (line[i + 2] && line[i + 2] == 'm') {
                    if (line[i + 3] && line[i + 3] == 'p') {
                        if (line[i + 4] && line[i + 4] == ';') {
                            tmpline[j++] = '&';
                            i += 4;
                        }
                    }
                }
            }
            else if (line[i + 1] && line[i + 1] == 'q') {
                if (line[i + 2] && line[i + 2] == 'u') {
                    if (line[i + 3] && line[i + 3] == 'o') {
                        if (line[i + 4] && line[i + 4] == 't') {
                            if (line[i + 5] && line[i + 5] == ';') {
                                tmpline[j++] = '"';
                                i += 5;
                            }
                        }
                    }
                }
            }
#if 0
            else if (line[i + 1] && line[i + 1] == 'n') {
                if (line[i + 2] && line[i + 2] == 'b') {
                    if (line[i + 3] && line[i + 3] == 's') {
                        if (line[i + 4] && line[i + 4] == 'p') {
                            if (line[i + 5] && line[i + 5] == ';') {
                                tmpline[j++] = '"';
                                i += 5;
                            }
                        }
                    }
                }
            }
#endif
            else
                tmpline[j++] = line[i];
        }
        else
            tmpline[j++] = line[i];

    tmpline[j] = '\0';

    return tmpline;
}

/*
** Given a string, replaces all instances of "oldpiece" with "newpiece".
*/

char *replace(char *string, char *oldpiece, char *newpiece)
{
    int i, j, limit;
    char *c;
    char beforestring[MAXLINE], afterstring[MAXLINE];
    static char newstring[MAXLINE];

    if ((c = (char *) strstr(string, oldpiece)) == NULL)
        return string;
    limit = c - string;

    for (i = 0; i < limit; i++)
        beforestring[i] = string[i];
    beforestring[i] = '\0';

    i += strlen(oldpiece);

    for (j = 0; string[i] != '\0'; i++)
        afterstring[j++] = string[i];
    afterstring[j] = '\0';

    if (strstr(afterstring, oldpiece))
        strcpymax(afterstring,replace(afterstring,oldpiece,newpiece), MAXLINE-1);

    sprintf(newstring, "%s%s%s", beforestring, newpiece, afterstring);

    while (strstr(newstring, oldpiece))
        strcpymax(newstring, replace(newstring, oldpiece, newpiece), MAXLINE-1);

    while (strstr(newstring, oldpiece))
        strcpy(newstring, replace(newstring, oldpiece, newpiece));

    return newstring;
}

char *replacechar(char *string, char old, char *new)
{
    register char *sp;
    register char *np;
    register char *rp;
    static char newstring[MAXLINE];

    zero_out(newstring, MAXLINE);

    for (sp = string, np = newstring; *sp; sp++) {
        if (*sp == old) {
            for (rp = new; *rp; )
                *np++ = *rp++;
        }
        else
            *np++ = *sp;
    }

    return newstring;
}

/*
** Generates the mail command to use from the default mail command,
** the current recipient's email address, the current ID of the
** message, and the current subject.
*/

char *makemailcommand(char *mailcommand,char *email,char *id,char *subject)
{
    int hasre;
    static char tmpsubject[SUBJSTRLEN], tmpstring[MAXLINE];
 
    if (!strncasecmp(subject, "Re:", 3) || !strncasecmp(subject, "Re[2]:", 6))
        hasre = 1;
    else
        hasre = 0;
 
    sprintf(tmpsubject, "%s%s", (hasre) ? "" : "Re: ", convchars(subject));
 
    strcpymax(tmpstring, replace(mailcommand, "$TO", email), SUBJSTRLEN);
    strcpymax(tmpstring, replace(tmpstring, "$ID", id), SUBJSTRLEN);
    strcpymax(tmpstring, replace(tmpstring, "$SUBJECT", tmpsubject), SUBJSTRLEN);
    strcpymax(tmpstring, replacechar(tmpstring, '%', "%25"), SUBJSTRLEN);
    strcpymax(tmpstring, replacechar(tmpstring, ' ', "%20"), SUBJSTRLEN);
    strcpymax(tmpstring, replacechar(tmpstring, '+', "%2B"), SUBJSTRLEN);
 
    return tmpstring;
}
 


/*
** RFC 1738
** Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
** reserved characters used for their reserved purposes may be used
** unencoded within a URL.
**
**
**        //<user>:<password>@<host>:<port>/<url-path>
** 
** Some or all of the parts "<user>:<password>@", ":<password>",
** ":<port>", and "/<url-path>" may be excluded.  The scheme specific
** data start with a double slash "//" to indicate that it complies with
** the common Internet scheme syntax.
**
*/

char *parseemail(char *input,    /* string to parse */
                 char *output,   /* output buffer */
                 int outsize,    /* size of output buffer */
                 char *mid,      /* message ID */
                 char *msubject) /* message subject */
{
  char mailbuff[URLSTRLEN];
  char mailaddr[MAILSTRLEN];

  char tempbuff[MAXLINE];
  char *ptr;
  char *lastpos=input;
  int out=0;

  zero_out(mailbuff, URLSTRLEN);

  while (*input) {
      if ((ptr = (char *)strchr(input, '@'))) {
          /* found a @ */
          char *email=ptr-1;
          char content[2];
          int backoff=ptr-input; /* max */

#define VALID_IN_EMAIL_USERNAME   "a-zA-Z0-9_.%-"
#define VALID_IN_EMAIL_DOMAINNAME "a-zA-Z0-9.-"

          /* check left side */
          while (backoff) {
              if (sscanf(email, "%1[" VALID_IN_EMAIL_USERNAME "]", content)) {
                  email--;
                  backoff--;
              }
              else
                  break;
          }
          if (email != ptr-1) { /* bigger chance this is an address */
              email++;
              if (sscanf(ptr+1,"%255[" VALID_IN_EMAIL_DOMAINNAME "]",mailbuff)){

                  /* a valid mail right-end */
                  int size;
         
                  if (lastpos < email) {
                      sprintf(tempbuff, "%.*s", email-lastpos, lastpos);
                      size = strlen(tempbuff);

                      if(out + size > outsize)
                        break;
                      strcpy(output, tempbuff);
                      output += size;
                      out += size;
                  }

                  sprintf(mailaddr,"%.*s@%s", ptr-email, email, mailbuff);

                  if (valid_root_domain(mailaddr)) {

                     sprintf(tempbuff, "<A HREF=\"%s\">%.*s@%s</A>",
                             makemailcommand(mailcommand,mailaddr,mid,msubject),
                             ptr-email, email, mailbuff);

                     size = strlen(tempbuff);

                     if(out + size > outsize)
                        break;
                      strcpy(output, tempbuff);
                      output += size;
                      out += size;
                  
                      input = ptr + strlen(mailbuff) +1;
                      lastpos = input;
                      continue;
                  }
              }
          }
          /* no address, continue from here */
          input = ptr+1;
          continue;
      }
      else 
          input=(char *)strchr(input, '\0');
  }
  if (lastpos < input) {
      int size;
      sprintf(tempbuff, "%.*s", input-lastpos, lastpos);

      size = strlen(tempbuff);

      if(out + size < outsize) {
        strcpy(output, tempbuff);
        output += size;
        out += size;
      }
  }
  return NULL;
}

char *parseurl(char *input, char *output, int outsize)
{
  char urlbuff[URLSTRLEN];
  char tempbuff[MAXLINE];
  int i;
  int out=0; /* the size of the current built string, may never exceed outsize */
  char *url[] = {
         "http://", "gopher://", "file://",
         "ftp://", "wais://", "telnet://",
         "nntp://", "prospero://", "news:",
	/* "mailto:",*//* Can't have mailto: as it will be converted twice */
  };

  while (*input) {
      for (i=0; i<sizeof(url)/sizeof(url[0]); i++) {
           if (!strncasecmp(url[i], input, strlen(url[i]))) {
               input += strlen(url[i]);
               if (sscanf(input, "%255[^] )>&\"\n,[\t\\]", urlbuff)) {
                   /* a valid url */
                   int addedsize;
                   sprintf(tempbuff, "<A HREF=\"%s%s\">%s%s</A>",
                           url[i], urlbuff, url[i], urlbuff);
                   addedsize = strlen(tempbuff);

                   if(out + addedsize > outsize)
                     break;

                   strcpy(output, tempbuff); /* append the tag buffer */
                   out += addedsize; /* add this size */
                   output += addedsize; /* move output pointer */

                   input += strlen(urlbuff);
               }
               else { /* not a valid url */
                   sprintf(output, "%s", url[i]);
                   output += strlen(output);
               }
               continue;
           }
      }
      *output++ = *input++;
      out++; /* increase size counter */
  }
  *output = '\0';

  return NULL;
}


/*
** Converts URLs, email addresses, and message IDs in a line to links,
** mail commands, and articles respectively.
*/

char *convurls(char *line, char *mailid, char *mailsubject)
{
    int i, j, status, subjmatch;
    char inreply[REPYSTRLEN], name[NAMESTRLEN], subject[SUBJSTRLEN];
    static char tmpline2[MAXLINE*12], tmpline3[MAXLINE*12], tmpline4[MAXLINE];

    /* Daniel's note, is this kludge thing actually ever used? */
    if (!strncasecmp(line, "In-Reply-To:", 12) ||
          !strncasecmp(line, "References:", 11) ||
          ((!strncmp(line, "    ", 4) || line[0] == '\t')
          && strchr(line, '<') && strchr(line, '>') && strchr(line, '@'))) {

        strcpymax(inreply, (char *) getreply(line), REPYSTRLEN);
        strcpymax(tmpline4, convchars(line), MAXLINE);

        if (inreply[0] != '\0') {
            status = hashreplylookup(inreply, name, subject, &subjmatch);
            if (status != -1) {
                for (i = j = 0; tmpline4[i] != ' ' &&
                     j < MAXLINE && tmpline4[i] != '\t'; i++)
                     tmpline3[j++] = tmpline4[i];

                while (isspace(tmpline4[i])) {
                     tmpline3[j++] = ' ';
                     i++;
                }
                tmpline3[j] = '\0';

                sprintf(tmpline3, "%s<A HREF=\"%.4d.html\">",tmpline3,status);
                for (j = strlen(tmpline3); tmpline4[i] &&
                     tmpline4[i] != '\n' && j < MAXLINE; i++)
                     tmpline3[j++] = tmpline4[i];

                tmpline3[j] = '\0';
                strcat(tmpline3, "</A>\n");
                return tmpline3;
            }
        }
        return tmpline4;
    }

    strcpymax(tmpline2, convchars(line), sizeof(tmpline2));

    parseurl(tmpline2, /* source */
             tmpline3, /* destination */
             sizeof(tmpline3)); /* size of destination */

    if (use_mailcommand) {
        /* Exclude headers that are not mail type headers */
        if (strcasestr(tmpline3, "Message-ID") || 
            strcasestr(tmpline3, "Supersedes"))
            return tmpline3;

        if (!*tmpline3)
            return tmpline3;

        parseemail(tmpline3,     /* source */
                   tmpline2,     /* dest */
                   sizeof(tmpline2), /* size of destination buffer */
                   mailid,       /* mail's Message-Id: */
                   mailsubject); /* mail's Subject: */
 
        return tmpline2;
    }
    return tmpline3;
}
