/************************************************************************/
/*                                                                      */
/*                Centre for Speech Technology Research                 */
/*                     University of Edinburgh, UK                      */
/*                       Copyright (c) 1996,1997                        */
/*                        All Rights Reserved.                          */
/*                                                                      */
/*  Permission to use, copy, modify, distribute this software and its   */
/*  documentation for research, educational and individual use only, is */
/*  hereby granted without fee, subject to the following conditions:    */
/*   1. The code must retain the above copyright notice, this list of   */
/*      conditions and the following disclaimer.                        */
/*   2. Any modifications must be clearly marked as such.               */
/*   3. Original authors' names are not deleted.                        */
/*  This software may not be used for commercial purposes without       */
/*  specific prior written permission from the authors.                 */
/*                                                                      */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
/*  THIS SOFTWARE.                                                      */
/*                                                                      */
/************************************************************************/
/*                                                                      */
/*                 Author: Paul Taylor (pault@cstr.ed.ac.uk)            */
/*                   Date: Fri May  9 1997                              */
/* -------------------------------------------------------------------  */
/* Examples of Generation of Acoustic Feature Vectors from Waveforms    */
/*                                                                      */
/************************************************************************/

#include <stdlib.h>
#include "EST_sigpr.h"
#include "EST_cmd_line.h"
#include "EST_inline_utils.h"
#include "sigpr/EST_sig2fv.h"

/**@name Signal processing example code
  * 
  * Some examples of track manipulations.
  *
  * @see EST_Track
  * @see EST_TrackMap:example
  * @see EST_TMatrix
  * @see EST_TMatrix:example
  */
//@{

EST_StrList empty;

void print_map(EST_TrackMap &t);
void print_track_map(EST_Track &t);

int main(void)

{
    EST_StrList base_list; // decl
    EST_StrList delta_list; // decl
    EST_StrList acc_list; // decl
    EST_Option op, al; // decl
    init_lib_ops(al, op); 
    EST_Wave sig; // decl
    EST_Track fv, part; // decl
    float shift; // decl
    EST_TrackMap *map; // decl
    int i;


    cout << "position 1\n";

    /**@name Producing_a_single_type_of_feature_vector_for_an_utterance.

      The core of the signal processing library comprises functions
      which take an input \Ref{EST_FVector} and produce a single
      output float value or produce a output \Ref{EST_FVector} (for
      example the \Ref{sig2pow} function takes a \REF{EST_FVector} of speech
      and outputs a single power value, while \Ref{sig2lpc} outputs
      a set of coefficients in a \Ref{EST_FVector}.)

      In speech processing, we nearly always want to do this type of
      analysis several times for a single waveform, to find how the
      analysis values vary with time. To do this, the speech signal
      is divided into small {\it frames} of speech at certain time 
      points. Analysis is then performed on each, and the output 
      is stored in the frame position corresponing to that time point
      in an \Ref{EST_Track} object.

      The following code demonstrates a simple case of calculating the
      linear prediction (LP) coefficients for a waveform.

      */
    //@{ code

    // First set the order of the lpc analysis to 16 (this entails 17 actual
    // coefficients) and then load in the waveform to be analysed.

    int lpc_order = 16;
    sig.load("data/kdt_001.wav");

    // now allocate enrough space in the track to hold the analysis,
    // and name the channels in the track appropriately.
    // The following command resizes fv to have enough frames for
    // analysis frames at 0.01 intervals up to the end of the waveform
    // (sig.end()), and enough channels to store lpc_order + 1 coefficients.
    // The channels are named so as to take lpc coefficients.

    make_fixed_timed_track(fv, "lpc", sig.end(), lpc_order + 1, 0.01);
    
    // the simplest way to do the actual analysis is as follows, which
    // will fill the track with the values from the LP analysis using the
    // default processing controls.

    sig2coef(sig, fv, "lpc");

    // In this style of analysis, default values are used to control the
    // windowing mechanisms which split the whole signal into frames.
    // Specifically, each frame is defined to start a certain distance 
    // before the time interval, and extending the same distance after.
    // This distance is calculated as a function of the local window
    // spacing and can be adjusted as follows:
    
    // extending one time period before and one time period after the
    // current time mark:

    sig2coef(sig, fv, "lpc", 2.0);

    // extending 1.5 time periods before and  after the
    // current time mark, etc;
    sig2coef(sig, fv, "lpc", 3.0);

    // The type of windowing function may be changed also as this
    // can be passed in as an optional argument. First we 
    // create a window function (This is explained more in \Ref{Windowing}).

    EST_WindowFunc *wf =  EST_Window::creator("hamming");
    // and then pass it in as the last argument
    sig2coef(sig, fv, "lpc", 3.0, wf);



    //@}

    cout << "position 2\n";

    /**@name Pitch-Synchronous vs fixed frame analysis.

      Most of the core signal processing functions operate on individual
      frames of speech and are oblivious as to how these frames were
      extracted from the original speech. This allows us to take the frames
      from anywhere in the signal: specifically, this facilitates two
      common forms of analysis: 

      \begin{description}
      \item[fixed frame] The time points are space at even intervals
      throughout the signal.
      \item[pitch-synchronous] The time points represent {\em pitchmarks}
      and correspond to a specific position in each pitch perdiod,
      e.g. the instant of glottal closure.
      \end{description}

      Functions such as \Ref{sig2coef} take a pre-allocated
      \Ref{EST_Track}, and the time array in this should be set at
      whatever time intervals are desired (it is standard to consider
      a time point as representing the middle of the time frame).

      It is a simple matter to fill the time array, but normally 
      pitchmarks are read from a file or taken from another signal
      processing algorithm (see \Ref{Pitchmark functions.}).
      */

    //@{ code

    // There are many ways to fill the time array for fixed frame
    // analysis:

    // manually:

    int num_frames = 300;
    fv.resize(num_frames, EST_ALL);
    shift = 0.01; // time interval in seconds

    for (i = 0; i < num_frames; ++i)
	fv.t(i) = shift * (float) i;

    // or by use of the  member function \Ref{EST_Track::fill_time}

    fv.fill_time(0.01);
    
    // Or by allocating the appropriate space as well with the
    // \Ref{make_fixed_times} function:
    make_fixed_times(fv, sig.end(), shift);

    // Or by allocating the appropriate space {\em and} setting the trackmap
    // with the \Ref{make_fixed_timed_track}:

    make_fixed_timed_track(fv, "lpc", sig.end(), lpc_order + 1, 0.01, 0);

    // Pitch synchronous values can simply be read from pitchmark
    // files:
    fv.load("data/kdt_001.pm");
    make_track(fv, "lpc", lpc_order + 1);

    // Regardless of how the time points where obtain, the analysis
    // function call is just the same:

    sig2coef(sig, fv, "lpc");
    //@}

    cout << "position 3\n";

    /**@name Hetrogeneous Tracks

      Multiple types of feature vector can be stored in the same Track.
      Imagine that we want lpc, cepstrum and power
      coefficients in that order in a track. This can be achieved by using
      the \Ref{sig2coef} function multiple times, or by the wrap
      around \Ref{sigpr_base} function.

      It is vitally important here to ensure that before passing the
      track to the signal processing functions that it has the correct
      number of channels and that these are appropriately named. This is
      most easily done using the \Ref{add_to_track_map} function, explained
      in \Ref{Track Allocation.}.

      */
    //@{ code

    // For each call, we only us the part of track that is relevant.
    // The sub_track member function of \Ref{EST_Track} is used to get
    // this. In the following example, we are assuming here that 
    // fv has sufficient space for 17
    // lpc coefficients, 8 cepstrum  coefficients and power and that
    // they are stored in that order.

    int c = 0;
    int cep_order = 16;
    map = new EST_TrackMap;

    add_to_track_map("lpc", *map, lpc_order, 0, c);
    add_to_track_map("cep", *map, cep_order, 0, c);
    add_to_track_map("power", *map, 1, 0, c);

    fv.resize(EST_CURRENT, *map);

    // After allocating the right number of frames and channels 
    // in {\tt fv}, we extract a sub_track, which has all the frames
    // (i.e. between 0 and EST_ALL) and the channels between 0 and
    // lpc_order + 1. For lpc_order = 16, this extracts channels
    // 0 - 16 inclusive

    fv.sub_track(part, 0, EST_ALL, 0, lpc_order + 1);

    // now call the signal processing function on this part:
    sig2coef(sig, part, "lpc");

    // We repeat the procedure for the cepstral coefficients, but this
    // time take the next 8 channels (17-24 inclusive)

    fv.sub_track(part, 0, EST_ALL, lpc_order, cep_order);

    // and calculate the coefficients:
    sig2coef(sig, part, "cep");

    // Extract the last channel for power:
    fv.sub_track(part, 0, EST_ALL, lpc_order + cep_order, 1);

    // and call the power function:
    power(sig, part, 0.01);

    // While the above technique is adequate for our needs and is
    // a useful demonstration of sub_track extraction, the
    // \Ref{sigpr_base} function is normally easier to use as it does
    // all the sub track extraction itself. To perform the lpc, cepstrum
    // and power analysis, we put these names into a StrList and
    // call \Ref{sigpr_base}.

    base_list.clear(); // empty the list, just in case
    base_list.append("lpc");
    base_list.append("cep");
    base_list.append("power");

    sigpr_base(sig, fv, op, base_list);

    // This will call \Ref{sigpr_track} as many times as is necessary.

    //@}

    cout << "position 4\n";
    /**@name Track Allocation.
      Virtually all the signal processing functions require the
      fv track should be the correct size before calling the function
      that fills it with values. Furthermore, it is normally a wise thing
      to make sure that the channels are named properly.
      */
    //@{ code

    // The \Ref{make_track_map} function takes a list of fv names,
    // and produces a \Ref{EST_TrackMap} which describes the channel
    // names for that list. This map can then be assigned to the
    // fv track, ensuring that everything has the correct number of channels
    // and that they are named appropriately.

    base_list.clear(); // empty the list, just in case
    base_list.append("lpc");
    base_list.append("cep");
    base_list.append("power");

    int channel_num = 0;

    // the order of the lpc, cepstrum etc is stored in the op variable.
    op.add_iitem("lpc_order", 16);
    op.add_iitem("cep_order", 8);

    map = new EST_TrackMap;

    // Now we create the track map;
    add_to_track_map(base_list, *map, op);

    // The \Ref{EST_Track} can be resized using a version of the
    // \Ref{make_fixed_times} function, which works out the number
    // of channels from the trackmap and assigns the track map
    // to the track.
    print_map(*map);

    make_fixed_times(fv, *map, sig.end(), 0.01);
    
    // If the time values are already filled (e.g. in ps-analysis)
    // the resize function can be used directly:

    fv.resize(EST_CURRENT, *map);

    // \Ref{make_track_map} can be called multiple times if we
    // aren't sure exactly how many channels we need, or if delta
    // coefficients are needed. In this case however, the full
    // version must be called, whose fourth argument specifies the 
    // delta number (0 is base) and whose fifth argument indicates
    // the last channel that was used.

    // In the following example, we make the set of coefficients
    // according to base_list, and then make trackmap entries for
    // their delta equivalents.

    map = new EST_TrackMap;

    add_to_track_map(base_list, *map, op, 0, channel_num);
    add_to_track_map(base_list, *map, op, 1, channel_num);

    cout << "position 4g\n";
    
    print_map(*map);
    fv.resize(EST_CURRENT, *map);

    //@} code

    cout << "position 5\n";

    /**@name Producing_delta_and_acceleration_coefficients.

      Delta coefficients represent the numerical differentiation of a
      track, and acceleration coefficients represent the second
      order numerical differentiation.
      */
    //@{ code


    // first some setting up - put a set of normal lpc coefficients in fv,
    // and name and size {\tt del} and {\tt acc} so that they have
    // delta and acc names and having timing info.
    c = 0;
    map = new EST_TrackMap;
    add_to_track_map("lpc", *map, 17, 0, c);
    fv.resize(EST_CURRENT, *map);

    EST_Track del, acc;
    make_timed_track(fv, del, "lpc", 17, 1);
    make_timed_track(fv, acc, "lpc", 17, 2);

    // Given a EST_Track of coefficients {\tt fv}, the \Ref{delta} function
    // is used to produce the delta equivalents {\tt del}:


    delta(fv, del);

    del.save("del1", "est");

    // This can be called again on {\tt del} to get the acceleration 
    // coefficients {\tt acc}:

    cout << "position 5\n";
    print_track_map(acc);
    delta(del, acc);

    acc.save("acc1", "est");


    // It is possible to directly calculate the delta coefficients of
    // a type of coefficient, even if we don't have the base type.
    // \Ref{sigpr_delta_track} will process the waveform, make a temporary
    // track of type "lpc", calculate the delta of this and pass it
    // back in fv.

    cout << "position 5z\n";
    make_fixed_times(del, sig.end(), 0.01);
    make_fixed_times(acc, sig.end(), 0.01);

    sigpr_delta_track(sig, del, "lpc", op);
    del.save("del2", "est");
    // Alternatively, the order of the delta can be set explicitly:

    cout << "position 5a\n";

    // 1 = delta
    sigpr_delta_track(sig, del, "lpc", op, 1);
    // 2 = acceleration
    sigpr_delta_track(sig, acc, "lpc", op, 2);

    del.save("del3", "est");
    acc.save("acc2", "est");

    // Sometimes, when dealing with a very complex track, we want a set
    // of delta coefficients of a particular type, but are not sure
    // if the track already contains the base type. While it would in
    // principle by possible to search the track to find out, the
    // \Ref{sigpr_delta_track} function can do this itself if the
    // full track is passed as the last argument:


    sigpr_delta_track(sig, del, "lpc", op, 1, fv);
    del.save("del4", "est");

    // In a similar manner to \Ref{sigpr_base} the \Ref{sigpr_delta} and
    // \Ref{sigpr_acc} functions allow for a full set of delta and 
    // acceleration coefficients to be set in one function call:

    delta_list.clear(); // empty the list, just in case
    delta_list.append("cep");
    delta_list.append("lpc");
    delta_list.append("power");

    op.add_iitem("lpc_order", 16);
    op.add_iitem("cep_order", 8);

    map = new EST_TrackMap;

    // make \Ref{EST_TrackMap} and allocate space:
    add_to_track_map(delta_list, *map, op, 1);
    fv.resize(EST_CURRENT, *map);

    print_track_map(fv);

    // now calculate delta coefficients.
    sigpr_delta(sig, fv, op, delta_list);


    // The clever thing about the preceding piece of code is that
    // fv may already contain the base coefficients - the track map
    // allocation, resizing won't affect this. When \Ref{sigpr_delta}
    // is called, it is aware of this possility and hence searches for
    // the base coefficients before calculating temporary ones.

    // Acceleration works in a similar way

    acc_list.clear(); // empty the list, just in case
    delta_list.append("power");

    add_to_track_map(acc_list, *map, op, 2, c);
    cout << "position 5b3\n";
    fv.resize(EST_CURRENT, *map);

    cout << "position 5b4\n";
    sigpr_acc(sig, fv, op, acc_list);
    cout << "position 5b5\n";
    fv.save("acc4", "est");

    exit(0);

    //@} code



    /**@name Producing Hetrogeneous base, delta and acceleration tracks.

      The \Ref{sigpr_base}, \Ref{sigpr_del} and \Ref{sigpr_acc} functions
      take lists of desired coefficients and fill the track with the
      appropriate values. However, they still require a \Ref{EST_Track}
      that has the appropriate channels and \Ref{EST_TrackMap}.

      The \Ref{sig2fv} function, by comparison, takes a list of
      coefficient names, makes the track map and performs the necessary
      resizing on the \Ref{EST_Track}. The \Ref{sig2fv_fixed} goes further
      and ensures the track has the correct number of frames and spaces
      them at even intervals.
      */

    //@{ code
    // read in pitchmarks from file
    fv.load("data/kdt_001.pm");

    // construct base coefficient types
    base_list.clear(); // empty the list, just in case
    base_list.append("lpc");
    base_list.append("power");

    // calculate coefficients
    sig2fv(sig, fv, op, base_list);

    // \Ref{sig2fv} can take lists of delta and acceleration coefficients
    // also:

    delta_list.clear(); // empty the list, just in case
    delta_list.append("f0");

    acc_list.clear(); // empty the list, just in case
    acc_list.append("power");

    // calculate coefficients
    sig2fv(sig, fv, op, base_list, delta_list, acc_list);

    // And now save all our good work :-)
    fv.save("/tmp/lpc.fv", "est");

    //@} code

    /**@name Windowing

      The \Ref{EST_Window} class provides a variety of means to
      divide speech into frames using windowing mechanisms.

     */
    
    //@{ code

    // A window function can be created from a window name using the
    // \Ref{EST_Window::creator} function:

    EST_WindowFunc *hamm =  EST_Window::creator("hamming");
    EST_WindowFunc *rect =  EST_Window::creator("rectangular");

    // This function can then be used to create a EST_TBuffer of 
    // window values. In the following example the values from a
    // 256 point hamming window are stored in the buffer win_vals:

    EST_FVector frame;
    EST_TBuffer<float> win_vals;

    hamm(256, win_vals);

    // this can then be used to make a frame of speech from the main EST_Wave
    // sig. The following example extracts speech starting at sample 1000

    for (i = 0; i < 256; ++i)
	frame[i] = (float)sig.a(i + 1000) * win_vals[i];

    // Alternatively, exactly the same operation can be performed in a
    // single step by passing the window function to the
    // \Ref{EST_Window::window_signal} function which takes a
    // \Ref{EST_Wave} and performs windoing on a section of it,
    // storing the output in the \Ref{EST_FVector} {\tt frame}.

    EST_Window::window_signal(sig, hamm, 1000, 256, frame, 1);

    // The window function need not be expliticly created, the window
    // signal can work on just the name of the window type:

    EST_Window::make_window(win_vals, 256, "hamming");

    //@} code

    /**@name Frame_based_signal_processing

      The signal processing library provides an extensize set of functions
      which operate on a single frame of coefficients.

      */

    //@{ code

    // The following example shows one method of splitting the signal
    // into frames and calling a signal processing algorithm.


    // First set up the track for 16 order LP analysis

    make_fixed_timed_track(fv, "lpc", sig.end(), 17, 0.01);

    // In this example, we take the analysis frame length to be 256 samples
    // long, and the shift in samples is just the shift in seconds times the
    // sampling frequency.

    int s_length = 256;
    int s_shift =  int(shift * float(sig.sample_rate()));
    EST_FVector coefs;

    // Now we set up a loop which calculates the frames one at a time.
    // {\tt start} is the start position in samples of each frame.
    // The \Ref{EST_Window::window_signal} function is called which
    // makes a \Ref{EST_FVector} frame of the speech via a hamming window. 

    // Using the \Ref{EST_Track::frame} function, the EST_FVector 
    // {\tt coefs} is set to frame {\tt k} in the track. It is important
    // to understand that this operation involves setting an internal
    // smart pointer in {\tt coefs} to the memory of frame {\tt k}. This
    // allows the signal processing function \Ref{sig2lpc} to operate
    // on an input and output \Ref{EST_FVector}, without any copying to or
    // from the main track. After the \Ref{sig2lpc} call, the kth frame
    // of {\tt fv} is now filled with the LP coefficients.

    for (int k1 = 0; k1 < fv.num_frames(); ++k1)
    {
	int start = (k1 * s_shift) - (s_length/2);
	EST_Window::window_signal(sig, "hamming", start, s_length, frame, 1);

	fv.frame(coefs, k1); 	// Extract a single frame
	sig2lpc(frame, coefs); 	// Pass this to actual algorithm
    }

    // A slightly different tack can be taken for pitch-synchronous analysis.

    // Setting up fv with the pitchmarks and channels:

    fv.load("data/kd1_001.pm");
    make_track(fv, "lpc", 17, 0);

    // Set up as before, but this time calculate the window starts and 
    // lengths from the time points. In this example, the length is a 
    // {\tt factor} (twice) the local frame shift.
    // Note that the only difference between this function and the fixed
    // frame one is in the calculation of the start and end points - the

    // windowing, frame extraction and call to \Ref{sig2lpc} are exactly
    // the same.

    float factor = 2.0;

    for (int k2 = 0; k2 < fv.num_frames(); ++k2)
    {
	s_length = irint(get_frame_size(fv, k2, sig.sample_rate())* factor);
	int start = (irint(fv.t(k2) * sig.sample_rate()) - (s_length/2));

	EST_Window::window_signal(sig, wf, start, s_length, frame, 1);

	fv.frame(coefs, k2);
	sig2lpc(frame, coefs);
    }
    //@} code

    /**@name Filtering 

      Signal processing filtering involves altering
      the frequency characteristics of a signal. In the EST library we
      so far have two main types of filter, {\bf finite impulse
      response (FIR)} filters and {\bf linear prediction (LP)}
      filters. {\bf infinite impulse response (IIR)} filters are not yet
      implemented, though LP filters are a special case of these.

      Filtering involves 2 stages: the design of the filter and the
      use of this filter on the waveform.
      */

    //@{ code

    // First we examine a simple low-pass filter which attempts to supress
    // all frequencies about a cut-off. Imagine we want to low pass filter
    // a signal at 400Hz. First we design the filter:

    EST_FVector filter;
    int freq = 400;
    int filter_order = 99;

    filter = design_lowpass_FIR_filter(sig.sample_rate(), 400, 99);

    // And now use this filter on the signal
    
    FIRfilter(sig, filter);

    // For one-off filtering operations, the filter design can be
    // done in the filter function itself. The \Ref{FIRlowpass_filter}
    // function takes the signal, cut-off frequency and order as
    // arguments and designs the filter on the fly. Because of the
    // overhead of filter design, this function is expensive and
    // should only be used for one-off operations.

    FIRlowpass_filter(sig, 400, 99);

    // The equivalent operations exist for high-pass filtering:

    filter = design_highpass_FIR_filter(sig.sample_rate(), 50, 99);
    FIRfilter(sig, filter);
    FIRhighpass_filter(sig, 50, 99);

    // Filters of arbitrary frequency response can also be designed using
    // the \Ref{design_FIR_filter} function. This function takes a
    // EST_FVector of order $2^{N}$ which specifies the desired frequency
    // response up to 1/2 the sampling frequency. The function returns
    // a set of filter coefficients that attempt to match the desired
    // reponse.

    EST_FVector response(16);
    response[0] = 1;
    response[1] = 1;
    response[2] = 1;
    response[3] = 1;
    response[4] = 0;
    response[5] = 0;
    response[6] = 0;
    response[7] = 0;
    response[8] = 1;
    response[9] = 1;
    response[10] = 1;
    response[11] = 1;
    response[12] = 0;
    response[13] = 0;
    response[14] = 0;
    response[15] = 0;

    filter = design_FIR_filter(response, 15);

    FIRfilter(sig, response);

    sig.save("sig.filt", "nist");

    // The normal filtering functions can cause a time delay in the
    // filtered waveform. To attempt to eliminate this, a set of
    // double filter function functions are provided which guarentess
    // zero phase differences between the original and filtered waveform.

    FIRlowpass_double_filter(sig, 400);
    FIRhighpass_double_filter(sig, 40);


    // Sometimes it is undesirable to have the input signal overwritten.
    // For these cases, a set of parallel functions exist which take
    // a input waveform for reading and a output waveform for writing to.

    EST_Wave sig_out;

    FIRfilter(sig, sig_out, response);
    FIRlowpass_filter(sig, sig_out, 400);
    FIRhighpass_filter(sig, sig_out, 40);
    FIRlowpass_double_filter(sig_out, 400);
    FIRhighpass_double_filter(sig_out, 40);
    
    //@}

    /**@name Fourier Transforms.

      ffts etcs.
     */
    //@{ code

    

    //@}

}

    //@}
