;;; -*- Syntax: Ansi-Common-Lisp; Base: 10; Mode: lisp; Package: odcl -*-
;;; $Id: parsing.lisp,v 1.26 2003/09/09 17:20:42 craig Exp $
;;;
;;; Copyright (c) 1999 - 2003 onShore Development, Inc.

(in-package :odcl)

(defun read-chunque (stream length)
  (let ((str (make-string length)))
    (dotimes (i length)
      (setf (aref str i)
            (read-char stream)))
    str))

(defun lightly-tokenize (bag string)
  (let ((results nil)
        (last 0))
    (dotimes (i (length string))
      (when (member (aref string i) bag :test #'char=)
        (if (= last i)
            (incf last)
            (push (subseq string last i) results))
        (setf last (1+ i))))
    (push (subseq string last) results)))

(defun str-y-or-n (str)
  (not (characterp (or (find #\n str)
		       (find #\N str)))))

(defun str-n-or-y (str)
  (characterp (or (find #\y str)
		  (find #\Y str))))

(defun read-number-or-nil (str)
  (let ((num (read-from-string str)))
    (if (numberp num)
	num nil)))

(defun read-list-or-nil (str)
  (let ((num (read-from-string str)))
    (if (listp num)
	num nil)))

;; ------------------------------------------------------------
;; Parsing and processing utilities

(defun read-comma-sep (line format)
  "Read a line of comma-separated text and parse it into fields as
specified by the format.  Double-quoted field values are returned
without their containing quotes.  If the line has fewer fields than
specified in the format, only the fields found are returned."
  (let ((splitted (parse-csv-string line))
        fields)
    (dotimes (x (length format))
      (let ((field (nth x splitted)))
        (if (not (null field))
            (setq fields (cons field fields))
            (format t "~&Field ~A of type ~A not found in line:~%  ~A~%"
                    (cadr (nth x format))
                    (car (nth x format))
                    line))))
    (nreverse fields)))

(defun parse-boolean (string)
  (not (null (position-if (lambda (x) (position x "Yy")) string))))

(let ((str (make-string 80 :element-type 'base-char)))
  (defun read-delimited (del stream)
    "read from a stream, stopping at the given delimiter.  delimiter
can be a single char, or a list of chars."
    (declare (optimize (speed 3) #+cmu (extensions:inhibit-warnings 3))
	     (type (simple-array * (*)) str))
    (let ((dellist (etypecase del
		     (character (list del))
		     (list del))))
      (do ((i 0 (incf i))
	   (c (read-char stream) (read-char stream)))
	  ((member c dellist :test #'char=) (adjust-array str i))
	(if (= i (length str))
	    (setf str (adjust-array str (ash (length str) 1))))
	(setf (aref str i) c)))))

(let ((str (make-string 80 :element-type 'base-char)))
  (defun read-comma-delimited (stream)
    "read from a stream, stopping at the given delimiter."
    ;; (declare (optimize (speed 3)))
    (do ((i 0 (incf i))
	 (c (read-char stream) (read-char stream)))
	((char= c #\,) (adjust-array str i))
      (if (= i (length str))
	  (setf str (adjust-array str (ash (length str) 1))))
      (setf (aref str i) c))))

(defun loop-over-lines (stream func &key max &aux (ctr 0))
  (loop as line = (read-line stream nil)
	while (and (not (null line))
                   (or (null max) (<= (incf ctr) max)))
	do (apply func (list line))))

(defmacro nostr (val)
  `(or (null ,val) (string= ,val "")))

(defun %read-line (delim stream &optional (eof-error t))
  "simple stupid delimted line reader; returns a list of strings from
a stream"
  (let ((str nil))
    (do ((ch (read-char stream eof-error :eof)
	     (read-char stream eof-error :eof)))
	((or (equal ch :eof)
	     (equal ch delim)) str)
      (push ch str))
    (if str (concatenate 'string (nreverse str)) nil)))

(defun parse-csv-string (string &key (quote #\") (delim #\,) (escape #\\) (trim #\Space))
  "Misnamed function for reading delimited text.
FIXME: key args should take lists."
  (let ((len (length string))
	(vals (list ""))
	(inq nil)
        (escaped nil))
    (do ((i 0 (incf i)))
	((= i len)
         (progn 
           (when inq
                (warn "unbalanced quoting"))
           (reverse
            (mapcar #'(lambda (tok)
                        (string-trim (list trim)
                                     (coerce (reverse tok) 'string)))
                    vals))))
      (let ((ch (elt string i)))
	(cond
          (escaped
           (progn
             (setq escaped nil)
             (push ch (car vals))))
          ((char= ch escape)
           (setq escaped t))
	  ((char= ch quote)
           (setq inq (not inq)))
	  ((char= ch delim)
	   (if inq
	       (push ch (car vals))
	       (push nil vals)))
           (t
	    (push ch (car vals))))))))

(defun read-csv-record (stream format &key (output-format :list)
                        (quote-char #\") (sep-char #\,) (escape-char #\\)
                        &aux char field result in-record in-field did-quote quoting? escaping?)
  "Consume and tokenize one input record from STREAM, returning a list
of the field values found.  If OUTPUT-FORMAT (keyword argument, default :LIST)
is :ALIST, an alist of field values, indexed by field property name is
returned.

The list FORMAT specifies each input field's property name and value function.
Each element of FORMAT is a list containing a keyword (the property name)
and a symbol denoting the function to supply the field's value.  If the value
function is omitted, NIL will be returned for that field.

Each input record is terminated by either a single Newline character, a single
Return character, or a Return character followed by a Newline character.

Input fields are delimited by SEP-CHAR (keyword argument, default ',').

Any sequence of characters that starts and ends with a QUOTE-CHAR (keyword
argument, default '\"') is quoted, and any single character that is
preceded by an ESCAPE-CHAR (keyword argument, default '\\') is quoted.

Quoting may be used arbitrarily, and in particular, removes the special
meaning of any quoted SEP-CHAR.  Quoting also removes the special meaning
of the end-of-line characters.  Quoting protects whitespace at the
beginning and end of a field (unquoted leading and trailing whitespace is
removed."				      
  (declare (optimize (speed 3))
           (type (or cons null) field))
  (flet ((add-char ()
           (push char field)
           (setf in-record t)
           (setf in-field t))
         (add-field ()
           (when in-record
             (when field
               (while (char= (car field) #\Space)
                 (pop field)))
             (let* ((len (length field))
                    (nfield nil)
                    (sfield (make-string len)))
               (when (or did-quote
                         (< 0 len))
                 (dotimes (i len)
                   (setf (aref sfield (- len i 1))
                         (pop field))))
	       (if format
		   (destructuring-bind (&optional property value-fn)
		       (pop format)
		     (declare (optimize (speed 1)))
		     (when property
		       (when value-fn
			 (setf nfield (funcall (symbol-function value-fn) sfield)))
		       (ecase output-format
			 (:list
			  (push nfield result))
			 (:alist
			  (push (cons property nfield) result)))))
		   (push sfield result)))
               (setf did-quote nil)
               (setf in-field nil))))
    (while (setf char (read-char stream nil #\Null))
      (cond ((char= char #\Null)
             (add-field)
             (return-from read-csv-record (nreverse result)))
            ((or (char= char #\Newline)
                 (char= char #\Return))
	     ;; Line Termination conventions:
	     ;;   mac: CR  win: CR/LF  unix: LF
	     (cond
               (escaping?
                (add-char)
                (setf escaping? nil))
               (quoting?
                (add-char))
               (t (let ((peek (peek-char nil stream nil)))
                    (when (and peek
                               (or (char= peek #\Newline)
                                   (char= peek #\Return)))
                      (read-char stream nil nil))
                    (add-field)
                    (return-from read-csv-record (nreverse result))))))
            (escaping?
             (add-char)
	     (setf escaping? nil))
            ((char= char escape-char)
             (setf escaping? t))
            ((char= char quote-char)
             (setf did-quote t
                   quoting? (not quoting?)))
            ((and (char= char sep-char)
                  (not quoting?))
             (add-field))
            ((char= char #\Space)
             (when in-field
               (add-char)))
            (t
             (add-char))))))

(defun split-tok (string char)
  (labels ((split (list string char)
	     (let ((pos (position char string)))
	       (if (not pos)
		   (nreverse (cons string list))
		   (split (cons :replace (cons (subseq string 0 pos) list))
			  (subseq string (+ 1 pos))
			  char)))))
    (split nil string char)))

(defun map-csv-file (file &key (function #'identity))
  "Map FUNCTION over the values in a comma-separated values FILE and return
a list of the results.

The first line of the input file specifies the column headers.  Each column
header is interned in the keyword package.

Each subsequent line is converted to an alist indexed by the keywords from
the column headers.  If there are more values than keywords, the extra values
are indexed by NIL.

FUNCTION is called once for each input line except the first, with the alist
as its sole argument.  Its value is accumulated and returned in a fresh list."
  (with-open-file (stream file :direction :input)
    (labels ((ensure-length (list desired-length)
		 (append list (let (mt)
				(dotimes (v (- desired-length (length list)))
					  (push nil mt))
				   mt)))
	     (keywords (list)
	       (mapcar #'(lambda (s) (when s (intern (string-upcase s) :keyword))) list))
	     (next ()
	       (odcl::read-csv-record stream nil)))
	(do ((accumulator '())
	     (header (keywords (next)))
	     (record (next) (next)))
	    ((not record) (nreverse accumulator))
	  (when (< (length header) (length record))
	    (setf header (ensure-length header (length record))))
	  (push (funcall function (mapcar #'cons header record))
		accumulator)))))
