;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;                                                                       ;;
;;;                Centre for Speech Technology Research                  ;;
;;;                     University of Edinburgh, UK                       ;;
;;;                       Copyright (c) 1996,1997                         ;;
;;;                        All Rights Reserved.                           ;;
;;;                                                                       ;;
;;;  Permission to use, copy, modify, distribute this software and its    ;;
;;;  documentation for research, educational and individual use only, is  ;;
;;;  hereby granted without fee, subject to the following conditions:     ;;
;;;   1. The code must retain the above copyright notice, this list of    ;;
;;;      conditions and the following disclaimer.                         ;;
;;;   2. Any modifications must be clearly marked as such.                ;;
;;;   3. Original authors' names are not deleted.                         ;;
;;;  This software may not be used for commercial purposes without        ;;
;;;  specific prior written permission from the authors.                  ;;
;;;                                                                       ;;
;;;  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        ;;
;;;  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ;;
;;;  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ;;
;;;  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     ;;
;;;  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ;;
;;;  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ;;
;;;  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ;;
;;;  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ;;
;;;  THIS SOFTWARE.                                                       ;;
;;;                                                                       ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;;                Author: Alan W Black and Kurt Dusterhoff
;;;                Date: November 1996
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;;   Tilt intonation modules, accent/boundary preditions and F0 generation
;;;   The F0 generation is done using models as described in 
;;;   Dusterhoff, K. and Black, A. (1997). "Generating F0 contours for 
;;;   speech synthesis using the Tilt intonation theory"
;;;   (http://www.cstr.ed.ac.uk/awb/papers/esca-int97.ps) 
;;;   Proceedings of ESCA Workshop of Intonation, pp 107-110, September, 
;;;   Athens, Greece.
;;;
;;;   Intonation_Tilt assigns accents and boundaries by a CART tree
;;;   the c and sil nodes are derived directly duration creation
;;;
;;;   Int_Targets_Tilt generates the F0 using the CART trees as
;;;   described in the paper referenced above.
;;;

(defvar int_tilt_params nil
  "int_tilt_params
Parameters for tilt intonation model.")

(Parameter.def 'tilt_method 'cart)

(define (Intonation_Tilt utt)
  "(Intonation_Tilt utt)
Assign accent and boundary IntEvents to each syllable, and fill in
spaces with silence and connections."
 (let (accent boundary
       (firstseg (utt.relation.first utt 'Segment))
       (lastseg (utt.relation.last utt 'Segment)))
   (if (and firstseg (phone_is_silence (item.name firstseg)))
       (tilt_add_silence utt firstseg))
  (mapcar
   (lambda (syl)
     (set! accent (wagon_predict syl tilt_accent_cart_tree))
     (set! boundary (wagon_predict syl tilt_boundary_cart_tree))
;;     (format t "%s %s: accent %s boundary %s\n"
;;	     (utt.item.feat utt syl "R:SylStructure.parent.name")
;;	     (utt.item.feat utt syl "name")
;;	     accent
;;	     boundary)
     (cond
      ((and (equal? 'NONE accent) (equal? 'NONE boundary))
       t)  ;; do nothing
      ((equal? 'NONE accent)
       (tilt_add_intevent utt syl boundary))
      ((equal? 'NONE boundary)
       (tilt_add_intevent utt syl accent))
      (t   ;; accent and boundary
       (tilt_add_intevent utt syl (string-append accent boundary)))))
   (utt.relation.items utt 'Syllable))
  (if (and lastseg (phone_is_silence (item.name lastseg)))
      (tilt_add_silence utt lastseg))
  utt))

(define (tilt_add_intevent utt syl name)
"(tilt_add_intevent utt syl name)
Add a new IntEvent related to syl with name.  This doesn't yet deal
with adding sil and c, name is assumed to be accents or boundary."
  (let ((isyls (tilt_find_intervening_syls utt syl))
	ie)
    ;;; Check if a silence and/or c is required
    ;;; these are heuristics
    (if (and (not isyls)  ;; might need a silence
	     (item.relation.prev syl "Syllable")
	     (phone_is_silence 
	      (item.feat syl "R:SylStructure.daughter1.R:Segment.p.name")))
	(tilt_add_silence 
	 utt (item.relation.prev 
	      (item.relation.daughter1 syl 'SylStructure)
	      'Segment)))
    (tilt_fill_sil_c utt isyls)

    (set! ie (utt.relation.append utt 'IntEvent (list name)))
    (if (not (item.relation syl 'Intonation))
	(utt.relation.append utt 'Intonation syl))
    (item.relation.append_daughter syl 'Intonation ie)))

(define (tilt_find_intervening_syls utt syl)
"(tilt_find_intervening_syls utt syl)
Find the syllables from the last IntEvent upto this one."
  (tilt_find_syls_upto 
   (tilt_find_last_intevent_syl utt (utt.relation.last utt 'IntEvent))
   syl)
)

(define (tilt_find_syls_upto isyl syl)
  (cond
   ((null isyl)
    nil)
   ((equal? isyl syl)
    nil)
   (t
    (cons isyl (tilt_find_syls_upto 
		(item.relation.next isyl 'Syllable) syl)))))

(define (tilt_find_last_intevent_syl utt ie)
"(tilt_find_last_intevent_syl utt ie)
Search back in the IntEvent stream to find an event related to
a syllable, return that syllable or the first syllable if none found."
  (cond
   ((null ie) (utt.relation.first utt 'Syllable))
   ((item.relation.parent ie 'Intonation)
    (item.relation.next
     (item.relation.parent ie 'Intonation)
     'Syllable))
   (t
    (tilt_find_last_intevent_syl 
     utt (item.relation.prev ie 'IntEvent)))))

(define (tilt_fill_sil_c utt syls)
  "(tilt_fill_sil_c utt syls)
Fill in silences and syls in these syllables"
  (cond
   ((null syls) 
    nil)
   (t
    (let ((last_lab (if (utt.relation.last utt 'IntEvent)
			(item.name (utt.relation.last utt 'IntEvent))
			"none")))
    (cond 
     ((and (not (string-equal "sil" last_lab))
	   (phone_is_silence 
	    (item.feat (car syls) "R:Syllable.daughter1.R:Segment.p.name")))
      (tilt_add_silence 
       utt (item.relation.prev 
	    (item.relation.daughter1 (car syls) 'SylStructure)
	    'Segment))
      (tilt_fill_sil_c utt syls))
     ((not (string-equal "c" last_lab))
      (utt.relation.append utt 'IntEvent (list "c"))
      (tilt_fill_sil_c utt (cdr syls)))
     (t 
      (tilt_fill_sil_c utt (cdr syls))))))))

(define (tilt_add_silence utt silseg)
"(tilt_add_silence utt silseg)
Ass a silence IntEvent linked to given silence segment."
  (let ((ie (utt.relation.append utt 'IntEvent)))
    (item.set_name ie "sil")
    (if (not (item.relation silseg 'Intonation))
	(utt.relation.append utt 'Intonation silseg)
    (item.relation.append_daughter silseg 'Intonation ie)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;;  Fo generate through tilt parameters and F0 rendering
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (Int_Targets_Tilt utt)
  "(Int_Targets_Tilt utt)
Assign Tilt parameters to each IntEvent and then generate the 
F0 contour and assign targets."
  (tilt_assign_parameters utt)
  (tilt_fix_ends utt)
  (tilt_F0_and_targets utt)  ;; this has to be C++, sorry
  (tilt_map_f0_range utt)
  utt
)

(define (tilt_map_f0_range utt)
  "(tilt_map_f0_range utt)
In order fo better trained models to be used for voices which don't
have the necessary data to train models from the targets may be mapped
to a different pitch range.  Note this is not optimal as pitch ranges
don't map that easily, but the the results can sometimes be better than
using a less sophisticated F0 generation model.  The method used
is to define the mean and standard deviation of the speaker the
model was trained on and the mean and standard deciation of the
desired speaker.  Mapping is by converting the actual F0 value
to zscores (distance from mean in number of stddev) and back into
the other domain.  The variable int_tilt_params is used to find
the values."
  (let ((target_f0_mean (car (cdr (assoc 'target_f0_mean int_tilt_params))))
	(target_f0_std (car (cdr (assoc 'target_f0_std int_tilt_params))))
	(model_f0_std (car (cdr (assoc 'model_f0_std int_tilt_params))))
	(model_f0_mean (car (cdr (assoc 'model_f0_mean int_tilt_params)))))
    (if target_f0_mean  ;; only if one is specified
	 (lambda (targ)
	   (item.set_name
	    targ
	    (+ target_f0_mean
	       (* target_f0_std
		  (/ (- (parse-number (item.name targ))
			model_f0_mean)
		     model_f0_std)))))
	 (utt.relation.leafs utt 'Target)))))

(define (tilt_fix_ends utt)
"(tilt_fix_ends utt)
Correct the end points on the IntEvents."
  (mapcar
   (lambda (ie) (tilt_fix_end utt ie))
   (reverse (utt.relation.items utt 'IntEvent))))

(define (tilt_fix_end utt ie)
  "(tilt_fix_end utt ie)
Correct the end time on this IntEvent."
  (let ((name (item.name ie)))
    (cond
     ((string-equal name "sil") 
      (item.set_feat
       ie 'end (item.feat ie "R:Intonation.parent.segment_end")))
     ((string-equal name "c")
      (let ((nname (item.feat utt ie "R:IntEvent.n.name"))
	    (nie (item.relation.next ie "IntEvent")))
	(cond
	 ((null nie)
	  (item.set_feat
	   ie 'end (item.feat (utt.relation.last utt 'Segment) 'end))
	  ((string-equal nname "sil")
	   ;; the start of this silence
	   (let ((silend (item.end nie))
		 (ss (item.relation.daughter1 nie 'Intonation))
		 (starttime 0))
	     (set! starttime (item.feat ss 'segment_start))
	     (while (and ss 
			 (phone_is_silence 
			  (item.feat utt ss "R:Segment.p.name")))
		    (set! starttime (item.feat ss 'segment_start))
		    (set! ss (item.relation.prev ss "Segment")))
	     (item.set_feat ie 'end starttime)))
	  (t;; all other types ?
	   (item.set_feat
	    ie
	    'end
	    (- (item.feat nie 'end)
	       (item.feat nie 'tilt_duration)))))))
      (t;; all other types
       (item.set_feat
	ie
	'end
	(+ (syl_vowel_start (item.relation.parent ie "Intonation"))
	   (item.feat ie 'tilt_peak_pos)
	   (* (- 1.0 (item.feat ie 'tilt_tilt))
	      (item.feat ie 'tilt_duration) 
	      0.5))))))))
       
(define (tilt_assign_parameters utt) 
  "(tilt_assign_parameters utt)
Assigned tilt parameters to IntEvents, depending on the value
of the Parameter tilt_method uses wagon trees (cart) or linear
regression models (lr)."
  (let ((method (Parameter.get 'tilt_method)))
  (cond
   ((equal? method 'cart)
    (tilt_assign_parameters_wagon utt))
   ((equal? method 'lr)
    (tilt_assign_parameters_lr utt))
   (t
    (error "Tilt: unknown tilt param prediction method: " tilt_method)))))

(define (tilt_assign_parameters_wagon utt)
 "(tilt_assign_parameters_wagon utt)
Assing parameters (start_f0, tilt, amplitude, peak_pos and duration)
to each IntEvent.  Uses Wagon trees to predict values"
  (mapcar
   (lambda (ie)
     (let ((param_trees (cdr (assoc (item.name ie)
				    tilt_param_trees))))
       (cond
	((null param_trees)  
	 (format stderr "Tilt: unknown IntEvent type %s, ignored\n"
		 (item.name ie))
	 ;; *need* to assign default values 
	 (item.set_feat ie "tilt_start_f0" 100)
	 (item.set_feat ie "tilt_amplitude" 7.0)
	 (item.set_feat ie "tilt_duration" 0.2)
	 (item.set_feat ie "tilt_tilt" 0.0)
	 (item.set_feat ie "tilt_peak_pos" 0.050)
	 )
	(t
	 (tilt_assign_params_wagon ie param_trees)))))
   (utt.relation.items utt 'IntEvent)))

(define (tilt_assign_params_wagon ie trees)
  "(tilt_assign_params_wagon ie trees)
Assign the names parameters to ie using the trees and names in
trees."
  (mapcar
   (lambda (tree)
     (let ((val (wagon_predict ie (car (cdr tree)))))
       (item.set_feat ie (car tree) val)))
   trees))

(define (tilt_assign_parameters_lr utt)
  "(tilt_assign_parameters_lr utt)
Assing parameters (start_f0, tilt, amplitude, peak_pos and duration)
to each IntEvent. Prediction by linear regression models"
  (mapcar
   (lambda (ie)
     (let ((param_lrmodels (cdr (assoc (item.name ie)
				    tilt_param_lrmodels))))
       (cond
	((null param_lrmodels)  
	 (format stderr "Tilt: unknown IntEvent type %s, ignored\n"
		 (item.name ie))
	 ;; *need* to assign default values 
	 (item.set_feat ie "tilt_start_f0" 100)
	 (item.set_feat ie "tilt_amplitude" 7.0)
	 (item.set_feat ie "tilt_duration" 0.2)
	 (item.set_feat ie "tilt_tilt" 0.0)
	 (item.set_feat ie "tilt_peak_pos" 0.050)
	 )
	(t
	 (tilt_assign_params_lr ie param_lrmodels)))))
   (utt.relation.items utt 'IntEvent)))

(define (tilt_assign_params_lr ie lrmodels)
  "(tilt_assign_params_lr ie lrmodels)
Assign the names parameters to ie using the trees and names in
trees."
  (mapcar
   (lambda (lrm)
     (let ((val (lr_predict ie (cdr lrm))))
       (item.set_feat ie (car lrm) val)))
   lrmodels))

(define (utt.save.tilt_events utt filename)
"(utt.save.til_events UTT FILENAME)
Save tilt events in UTT to FILENAME in a format suitable for
ev_synth."
  (let ((fd (fopen filename "w")))
    (format fd "#\n")
    (mapcar
     (lambda (ie)
       (let ((name (item.name ie)))
	 (cond
	  ((or (string-equal name "sil")
	       (string-equal name "c"))
	   (format fd "   %2.4f   100 %s; tilt: %2.6f\n" 
		   (item.feat ie 'end)
		   name 
		   (item.feat ie "tilt_start_f0")))
	  (t ;; accent or boundary
	   (format fd "   %2.4f   100 %s; tilt: %2.6f %2.6f %2.6f %2.6f %2.6f\n" 
		   (item.feat ie 'end)
		   name 
		   (item.feat ie "tilt_start_f0")
		   (item.feat ie "tilt_amplitude")
		   (item.feat ie "tilt_duration")
		   (item.feat ie "tilt_tilt")
		   (item.feat ie "tilt_peak_pos"))))))
     (utt.relation.items utt 'IntEvent))
    (fclose fd)
    utt))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;  Some feature functions used in the tilt trees
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def_feature_docstring 'Syllable.lisp_syl_vowel_start
  "Syllable.lisp_syl_vowel_start
  The start time of the vowel in this syllable.")

(define (syl_vowel_start syl)
"(syl_vowel_start utt syl)
Start time of vowel in syl."
  (let ((sttime (item.feat syl 'syllable_start)))
    (mapcar
     (lambda (s)
       (if (string-equal '+ (item.feat s 'ph_vc))
	   (set! sttime (item.feat s 'segment_start))))
     (item.relation.daughters syl 'SylStructure))
    sttime))

(def_feature_docstring 'Syllable.lisp_next_stress
  "Syllable.lisp_next_stress
  Number of syllables to next stressed syllable. 0 if this syllable is
  stressed.  It is effectively assumed the syllable after the last syllable
  is stressed.")

(define (next_stress syl)
  (cond 
   ((null syl) 0)
   ((string-equal (item.feat syl 'stress) "1")
    0)
   (t
    (+ 1 (next_stress (item.relation.next syl 'Syllable))))))

(def_feature_docstring 'Syllable.lisp_last_stress
  "Syllable.lisp_last_stress
  Number of syllables from previous stressed syllable.  0 if this syllable
  is stressed.  It is effectively assumed that the syllable before the 
  first syllable is stressed.")
(define (last_stress syl)
  (cond 
   ((null syl) 0)
   ((string-equal (item.feat syl 'stress) "1")
    0)
   (t
    (+ 1 (last_stress (item.relation.prev syl 'Syllable))))))

(def_feature_docstring 'Syllable.lisp_get_rhyme_length
  "Syllable.lisp_get_rhyme_length
  Length from start of the vowel to end of syllable.")
(define (get_rhyme_length syl)
  (- (item.feat syl 'syllable_end)
     (syl_vowel_start syl)))

(def_feature_docstring 'Syllable.lisp_get_onset_length
  "Syllable.lisp_get_onset_length
  Length from start of syllable to start of vowel.")
(define (get_onset_length syl)
  (cond
   ((< (- (syl_vowel_start syl)
	  (item.feat syl 'syllable_start))
       0.000)
    0.000)  ;; just in case
   (t
    (- (syl_vowel_start syl)
       (item.feat syl 'syllable_start)))))

(def_feature_docstring 'Syllable.lisp_tilt_accent
  "Syllable.lisp_tilt_accent
  Returns \"a\" if there is a tilt accent related to this syllable, 0 
  otherwise.")
(define (tilt_accent syl)
  (let ((events (item.relation.daughters syl 'Intonation))
	(r "0"))
    (mapcar
     (lambda (i)
       (if (member_string (item.name i) tilt_accent_list)
	   (set! r "a")))
     events)
    r))

(def_feature_docstring 'Syllable.lisp_tilt_boundary
  "Syllable.lisp_tilt_boundary
  Returns boundary label if there is a tilt boundary related to this 
syllable, 0 otherwise.")
(define (tilt_boundary syl)
  (let ((events (item.relation.daughters syl 'Intonation))
	(r "0"))
    (mapcar
     (lambda (i)
       (let ((name (item.name i)))
       (if (member_string name tilt_boundary_list)
	   (cond
	    ((string-matches name "a.*")
	     (set! r (string-after name "a")))
	    ((string-matches name "m.*")
	     (set! r (string-after name "m")))
	    (t
	     (set! r name))))))
     events)
    r))

(def_feature_docstring 'Syllable.lisp_tilt_accented
  "Syllable.lisp_tilt_accented
  Returns 1 if there is a tilt accent related to this syllable, 0 
  otherwise.")
(define (tilt_accented syl)
  (if (string-equal (tilt_accent syl) "a")
      "1"
      "0"))

(def_feature_docstring 'Syllable.lisp_tilt_boundaried
  "Syllable.lisp_tilt_boundaried
  Returns 1 if there is a tilt boundary related to this syllable, 0 
  otherwise.")
(define (tilt_boundaried syl)
  (if (string-equal (tilt_boundary syl) "0")
      "0"
      "1"))

;;The classification is based on the most high portion of the vowel
;;content (because I want it to be).
(define (vowel_class syl)
  (let ((vh (phone_feature (item.feat syl 'syl_vowel) 'vheight)))
    (cond
     ((string-equal "1" vh) "H")
     ((string-equal "3" vh) "L")
     (t "M")))
)

(defvar tilt_accent_list '("a" "arb" "afb" "m" "mfb" "mrb")
  "tilt_accent_list
List of events containing accents in tilt model.")
(defvar tilt_boundary_list '("rb" "arb" "afb" "fb" "mfb" "mrb")
  "tilt_boundary_list
List of events containing boundaries in tilt model.")

(def_feature_docstring 'IntEvent.lisp_last_tilt_accent
  "IntEvent.lisp_last_tilt_accent
  Returns the most recent tilt accent.")
(define (last_tilt_accent intev)
  (let ((pie (item.relation.prev intev 'IntEvent)))
    (cond
     ((not pie)
      "0")
     ((member_string (item.name pie) tilt_accent_list)
      (item.name pie))
     (t (last_tilt_accent pie)))))

(def_feature_docstring 'IntEvent.lisp_next_tilt_accent
  "IntEvent.lisp_next_tilt_accent
  Returns the next tilt accent.")
(define (next_tilt_accent intev)
  (let ((nie (item.relation.next intev 'IntEvent)))
    (cond
     ((not nie) "0")
     ((member_string (item.name nie) tilt_accent_list)
      (item.name nie))
     (t (next_tilt_accent nie)))))

(def_feature_docstring 'IntEvent.lisp_last_tilt_boundary
  "IntEvent.lisp_last_tilt_boundary
  Returns the most recent tilt boundary.")
(define (last_tilt_boundary intev)
  (let ((pie (item.relation.prev intev 'IntEvent)))
    (cond
     ((not pie) "0")
     ((member_string (item.name pie) tilt_boundary_list)
      (item.name pie))
     (t (last_tilt_boundary pie)))))

(def_feature_docstring 'IntEvent.lisp_next_tilt_boundary
  "IntEvent.lisp_next_tilt_boundary
  Returns the next tilt boundary.")
(define (next_tilt_boundary intev)
  (let ((nie (item.relation.next intev 'IntEvent)))
    (cond
     ((not nie) "0")
     ((member_string (item.name nie) tilt_boundary_list)
      (item.name nie))
     (t (next_tilt_boundary nie)))))

(provide 'tilt)
