/* savutil.h - types and externs for libsavutil.a */
#ifndef _SAVUTIL_H_
#define _SAVUTIL_H_

#include <parsedate/parsedate.h>

extern int SavantVerbose, SavantDebug;

/*** Wordcodes are unsigned int arrays of width WORD_ENCODE_WIDTH ****/
typedef unsigned int * Wordcode;

#define WORD_ENCODE_WIDTH 3  /* number bytes per word in a wordvec */
/* DO NOT CHANGE THIS FROM 6.  It's not known if the algorithm
   actually -works- if this isn't 6, and there may be places in the
   code that use some knowledge of the fact that this is 6 without
   actually referring to CHARACTER_ENCODE_WIDTH.  Eit.  */
#define CHARACTER_ENCODE_WIDTH 6  /* number of bits per character in a packed wordvec */


/*** types of vector-fields ****/
#define NUM_FIELD_TYPES 8
enum Field_Types {BODY_FIELD, LOCATION_FIELD, SUBJECT_FIELD, SOURCE_FIELD, 
		  TIME_FIELD, DATE_FIELD, DAY_FIELD, LAST_FIELD, ERROR_FIELD};




/*** DocVecs ***/

/* this is the flat, array rep of a DV.  */
typedef struct {  
  int num_entries;
  unsigned int *weights;
} DocVec;

/* this is the dense version (no zero entries).  */
typedef struct {  
  int num_entries;
  unsigned int *wordcodes;
  unsigned int *weights;
} DenseDocVec;

/* and this is a tree rep of a DV, with wordcodes as sorting keys */
typedef struct _DV_Tree {
  unsigned int wordcode[WORD_ENCODE_WIDTH];
  unsigned int weight;
  struct _DV_Tree *left;
  struct _DV_Tree *right;
} DV_Tree;

/* and of course the linked-list rep */
typedef struct _DV_List {
  unsigned int wordcode[WORD_ENCODE_WIDTH];
  unsigned int weight;
  struct _DV_List *prev;
  struct _DV_List *next;
} DV_List;

/* IndexVec is really the same, but the values it contains are used
   for something else.  Rather than being the weight of a word in a 
   particular DocVec, the value is the index into any DocVec where the
   weight of the word for that DocVec can be found.  */
typedef struct {  
  int num_entries;
  unsigned int *wordcodes;
  int *indices;
} IndexVec;

/* Here is the linked list rep */
typedef struct _IV_List {
  unsigned int wordcode[WORD_ENCODE_WIDTH];
  int index;
  struct _IV_List *prev;
  struct _IV_List *next;
} IV_List;

/* not to be confused with DV_List (hopefully), this may be useful 
   for building up a collection of DV's */
typedef struct _List_of_DocVecs {
  DocVec *vec;
  struct _List_of_DocVecs *next;
} List_of_DocVecs;




/*** Misc ***/

typedef struct {
  int vecnum;
  float sim;
} DocSim;



/*** Extern declarations ***/

/* from docvec.c */
extern int init_docvecs(void);
extern DV_Tree *vectorize_file(FILE *, size_t, ssize_t, int, size_t *, enum Field_Types);
extern DV_Tree *vectorize_buffer(char *, enum Field_Types);
extern int dvtree_increment(DV_Tree **, Wordcode);
extern DV_Tree *merge_dvtrees(DV_Tree *, DV_Tree *);
extern DocVec *merge_with_global(DV_Tree *);
extern DocVec *dvtree_to_dv(DV_Tree *, IndexVec *);
extern DV_List *dvtree_to_dvlist(DV_Tree *);
extern DenseDocVec *dvtree_to_ddv(DV_Tree *);
extern IndexVec *flatten_ivlist(void);
extern int destroy_dv(DocVec *);
extern int destroy_ddv(DenseDocVec *);
extern int destroy_dvlist(DV_List *);
extern int destroy_dvtree(DV_Tree *);
extern int print_docvec(FILE *, DocVec *);
extern int print_dvtree(FILE *, DV_Tree *);
extern int print_ivlist(FILE *, IV_List *);
extern int print_dvlist(FILE *, DV_List *);
extern int print_givl(FILE *);

/*from big.c */
extern size_t fread_big(void *, size_t, size_t, FILE *);
extern size_t fwrite_big(void *, size_t, size_t, FILE *);

/* from docvecio.c */

extern int save_ddv(FILE *, DenseDocVec *);
extern DenseDocVec *load_ddv(FILE *);
extern int write_dv_mags(FILE *, float *, int);
extern float *load_dv_mags(FILE *, int *);
extern int write_dv_szs(FILE *, int *, int);
extern int *load_dv_szs(FILE *);
extern char *ddv2string(DenseDocVec *, int);
extern int ddv2description(DenseDocVec *, float, char ***, int **);

/* from opendie.c */
extern FILE *open_or_die(char *, char *, char *);

/* from stem.c */
extern int Stem(char *);

/* from words.c */
extern int by_alpha(const void *, const void *);
extern int core_comm_local(void);
extern int core_comm(char *);
extern int is_common(char *);
extern int encode_word(unsigned char *, Wordcode, enum Field_Types);
extern int decode_word(Wordcode, char *);
extern int wordcode_cmp(Wordcode, Wordcode);
extern double wordcode_diff(Wordcode, Wordcode);
extern enum Field_Types word_type(unsigned int);



#endif /* #ifndef _VECTOR_H_ */
