/*
    Wn: A Server for the HTTP
    File: wndex/content.c
    Version 2.0.4
    
    Copyright (C) 1996-1998  <by John Franks>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 1, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/

#define WNDEX

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "wndex.h"
#include "content.h"

#define NOTHING		(0)
#define EXPIRES		(1)
#define KEYWORDS	(2)

static void	dometa();

static char	*findword();



void
getcontent( ep)
Entry	*ep;
{

	register char	*cp;

	char	suffix[SMALLBUF],
		buf[SMALLBUF];
	int	i = 0;

	if ( ep->flag & WN_ISURL)
		return;
	mystrncpy( buf, ep->file, SMALLBUF);
	strlower( buf);

	cp = strrchr( buf, '.');

	if ( cp == NULL ) { /*There's no suffix */
		if ( !hascontent( ep))
			addpair( "content", ep->defaultcontent, ep);
		return;
	}
	*cp++ = '\0';
	strcpy( suffix, cp);


	while ( (!hasencoding( ep)) && enclist[i][0] != NULL) {
		if ( streq( enclist[i][0], suffix)) {
			addpair( "encoding", enclist[i][1], ep);
			cp = strrchr( buf, '.');
			if ( cp == NULL	) {
				addpair( "content", ep->defaultcontent, ep);
				strcpy( ep->content, ep->defaultcontent);
				return;
			}
			strcpy( suffix, ++cp);
			break;
		}
		i++;
	}

	if ( hascontent( ep))
		return;

	i = 0;
	while ( mimelist[i][0] != NULL) {
		if ( streq( mimelist[i][0], suffix)) {
			addpair( "content", mimelist[i][1], ep);
			strcpy( ep->content, mimelist[i][1]);
#ifdef PARSE_EXT
			if ( streq( suffix, PARSE_EXT) && 
					!(ep->attributes  & WN_NOPARSE))
				ep->attributes |= WN_PARSE;
#endif
			return;
		}
		i++;
	}


	if ( strcasecmp( suffix, "map") == 0) {
		ep->attributes |=  WN_ISMAP;
		addpair( "content", "text/plain", ep);
		strcpy( ep->content, "text/plain");
		return;
	}

	if ( strcasecmp( suffix - 1, CGI_EXT) == 0) {
		ep->attributes |=  WN_CGI;
		addpair( "content", "text/html", ep);
		strcpy( ep->content, "text/html");
		return;
	}

	addpair( "content", ep->defaultcontent, ep);
	strcpy( ep->content, ep->defaultcontent);
	return;
}


void
loadmime()
{
	
	register char	*cp, *cp2, *slash;
	char	buf[2*SMALLBUF];
	FILE	*mimefp;
	int	i = 0;

	if ( (mimefp = fopen( MIME_TYPE_FILE, "r")) == (FILE *) NULL) {
		if ( verboseflg)
			fprintf(stderr, ERRMSG9, MIME_TYPE_FILE );
		return;
	}

	while ( fgets( buf, 2*SMALLBUF, mimefp)) {
		chop( buf);
		if ( !buf[0] || buf[0] == '#')
			continue;

		if ( (cp = malloc( SMALLBUF)) == NULL) {
			fprintf(stderr, ERRMSG10);
			exit( 2);	
		}
		strcpy( cp, buf);
		while ( isspace( *cp))
			cp++;
		cp2 = cp;

		while ( *cp2 && !isspace( *cp2))
			cp2++;
		if ( (slash = strchr( cp, '/')) == NULL) {
			fprintf(stderr, ERRMSG11, MIME_TYPE_FILE );
			fprintf(stderr, "\tLine = %s\n", buf);
			continue;
		}

		if ( !*cp2)
			continue; /* There are no suffixes, so ignore it */

		if ( slash < cp2) {
			/* Format is "type<space>suffix<space>suffix..." */
				*cp2++ = '\0';
		}
		else {
			/* It's old style "suffix<tab>type" */
			cp2 = cp; /* cp2 now points to suffix list */
			cp = slash;
			while ( (cp > cp2) && !isspace( *cp))
				cp--;
			*cp++ = '\0';  /* cp now points to MIME type */
		}

		while ( isspace( *cp2))
			cp2++;

	/* mimelist[i][0] is the suffix, mimelist[i][1] is the MIME type */

		mimelist[i][0] = cp2;
		mimelist[i][1] = cp;

		/* Handle multiple suffixes for one type */

		while ( *cp2 ) {
			while ( *cp2 && !isspace( *cp2))
				cp2++;
			if ( *cp2 ) {
				*cp2++ = '\0';
				while ( *cp2 && isspace( *cp2))
					cp2++;
				if ( !*cp2)
					break;
				i++;
				if ( i >= MAXMIME) {
					fprintf(stderr, ERRMSG12);
					exit( 2);
				}
				mimelist[i][0] = cp2;
				mimelist[i][1] = mimelist[i-1][1];
			}
		}
		i++;
		if ( i >= MAXMIME) {
			fprintf(stderr, ERRMSG12);
			exit( 2);
		}
	}
	mimelist[i][0] = mimelist[i][1] = NULL;
	fclose( mimefp);
}

/*
 * void getkeytitle( ep) Read the HTML file to get the keywords and/or 
 * title.
 */

void
getkeytitle( ep)
Entry	*ep;
{

	char		*cp,
			*cp2 = NULL;

	int	i = 0;

	FILE	*fp;
	char	filepath[MIDLEN],
		tbuf[MIDLEN],
		buf[MIDLEN];

	if ( ep->flag & WN_ISURL)
		return;

	strcpy( filepath, ep->cachefpath);
	if ( (cp = strrchr( filepath, '/')) == NULL) {
		strcpy( filepath, "./");
		cp = filepath + 1;
	}

	strcpy( ++cp, ep->file);

	if ( (fp = fopen( filepath, "r")) == (FILE *) NULL ) {
		if ( (!quiet) && (!ep->foundtitle) 
				&& (!strstr( ep->cacheline, "&redirect=")) ) {
			fprintf( stderr, ERRMSG14, filepath);
		}
		if ( !ep->foundtitle) {
			sprintf( buf, "File %s", ep->file);
			addpair ("title", buf, ep);
			strcpy( ep->title, buf);
		}
		return;
	}

	while ( fgets( buf, MIDLEN, fp) && i < NUM_TITLE_LINES ) {
		chop( buf);
		i++;
		if ( (!ep->foundtitle) && (cp = findword( buf, "<title")) ) {
			cp += 7;
			tbuf[0] = '\0';
			while ( cp && !(cp2 = findword( cp, "</title>")) ) {
				if ( strlen( tbuf) + strlen( cp) >= MIDLEN) {
					fprintf( stderr, ERRMSG17, tbuf);
					cp = cp2 = NULL;
					break;
				}
				strcat( tbuf, cp);
				if ( *cp)
					strcat( tbuf, " ");
				if ( (cp = fgets( buf, MIDLEN, fp)) == NULL)
					break;
				chop( buf);
				i++;
			}
			if ( cp2 && cp && (cp2 >= cp) ) {
				*cp2 = '\0';
				strcat( tbuf, cp);
				strcpy( buf, cp2 + 8);
				/* copy remainder of line to buf */
			}

			addpair( "title", tbuf, ep);
			strcpy( ep->title, tbuf);
			ep->foundtitle = TRUE;
			
		}

		if ( (cp = findword( buf, "<meta")) )
			dometa( cp, ep);

		if ( ep->foundkey && ep->foundtitle && ep->foundexp)
			break;
		if ( findword( buf, "</head>") ) 
			break;

	}
	if ( !*ep->title && !quiet && verboseflg) {
		fprintf( stderr, ERRMSG15, filepath);
	}
	if ( !*ep->title ) {
		sprintf( buf, "File %s", ep->file);
		addpair ("title", buf, ep);
		strcpy( ep->title, buf);
	}
	fclose( fp);
}


static void
dometa( linebuf, ep)
char	*linebuf;
Entry	*ep;
{
	register char	*cp,
			*cp2;

	int		httpequiv = NOTHING;

	if ( (cp = strchr( linebuf, '=')) == NULL )
		return;
	cp++;
	while ( isspace( *cp) || *cp == '"')
		cp++;
	if ( strncasecmp( cp, "keywords", 8) == 0 )
		httpequiv = KEYWORDS;
	if ( strncasecmp( cp, "expires", 7) == 0 )
		httpequiv = EXPIRES;
	if ( (cp = strchr( cp, '=')) == NULL )
		return;
	cp++;
	while ( isspace( *cp) || *cp == '"')
		cp++;
	if ( (cp2 = strchr( cp, '"')) == NULL )
		return;
	*cp2 = '\0';
	switch( httpequiv) {
	case KEYWORDS:
		if ( ep->foundkey)
			break;
		addpair( "keywords", cp, ep);
		ep->foundkey = TRUE;
		break;

	case EXPIRES:
		if ( ep->foundexp)
			break;
		addpair( "expires", cp, ep);
		ep->foundexp = TRUE;
		break;
	}
}

static char *
findword( line, word)
char	*line,
	*word;
{
	char	*cp,
		wordbuf[SMALLLEN],
		buf[MIDLEN];

	mystrncpy( buf, line, MIDLEN);
	mystrncpy( wordbuf, word, SMALLLEN);
	strlower( buf);
	strlower( wordbuf);
	if ( (cp = strstr( buf, wordbuf)) == NULL)
		return NULL;
	return ( line + ( cp - buf));
}





/*
 * void getmd5( ep) Read the file and calculate base64( MD5(file)).
 */

void
getmd5( ep)
Entry	*ep;
{

	register char	*cp;

	char	filepath[MIDLEN];
	FILE	*fp;


	if ( (ep->md5_attrib == 0 ) || (ep->md5_attrib & UNDO_MD5 ) )
		return;

	*ep->md5 = '\0';

	if ( (ep->attributes & (WN_CGI + WN_DYNAMIC + WN_PARSE + WN_ISMAP)) ||
			(ep->flag & WN_ISURL) || (ep->md5_attrib & NO_MD5) ) {
		if ( !quiet)
			fprintf( stderr, ERRMSG32, ep->file);
		return;
	}

	strcpy( filepath, ep->cachefpath);
	if ( (cp = strrchr( filepath, '/')) == NULL) {
		strcpy( filepath, "./");
		cp = filepath + 1;
	}

	strcpy( ++cp, ep->file);

	if ( (fp = fopen( filepath, "r")) == (FILE *) NULL ) {
		if ( (!quiet) && (!strstr( ep->cacheline, "&redirect=")) ) {
			fprintf( stderr, ERRMSG14, filepath);
		}
		return;
	}
	cp = md5digest( fp);
	mystrncpy( ep->md5, cp, SMALLLEN/2);
	addpair( "md5", ep->md5, ep);

	if ( cp )
		free( cp);

	fclose( fp);

}

/*
 * mystrncpy( s1, s2, n) is an strncpy() which guarantees a null
 * terminated string in s1.  At most (n-1) chars are copied.
 */

char *
mystrncpy( s1, s2, n)
char	*s1,
	*s2;
int	n;
{
	register char	*cp1,
			*cp2;
	cp1 = s1;
	cp2 = s2;
	n--;
	while ( *cp2 && (n > 0)) {
		n--;
		*cp1++ = *cp2++;
	}
	*cp1 = '\0';
	return s1;
}

#ifdef NEED_STRCASECMP

/*
 *  Case insensitive comparison of two strings
 */

int
strcasecmp( s1, s2)
char	*s1,
	*s2;

{
	int	r;

	while ( *s1 && *s2 ) {
		if ( (r = (tolower( *s1) - tolower( *s2))) != 0 )
			return r;
		s1++;
		s2++;
	} 
	return ( *s1 - *s2);
}
#endif
