/* wcomp.c
 * Written by Dr. Tom <tomh@po.crl.go.jp>
 * 
 * Word completion for GTKeyboard - see also splay.c
 * Tweaked a bit by David Allen <s2mdalle@titan.vcu.edu> to handle different
 * file existance cases, error cases and such.
 */
/* GTKeyboard - A Graphical Keyboard For X
 * Copyright (C) 1999, 2000
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA  02111-1307, USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <../include/macro-fu.h>

#ifndef BUFFER_WORDLIST
#  include <sys/types.h>
#  include <sys/stat.h>
#  include <unistd.h>
#  include <fcntl.h>
#endif /* BUFFER_WORDLIST */

#include "config.h"
#include "wcomp.h"

#define FIRST 30     /* increase this if you have more than 2^30 words :-) */

#ifdef BUFFER_WORDLIST
char *strdup(const char *);
typedef FILE *DictType;
#else
typedef int DictType;
#endif

static void topnlist(WComp *wcomp, WCompCursor *new);
static int is_prefix(char *s, char *t);
static void initial_match(char *s, char *t);
char *wcomp_new(int size);

/* Open a wordlist and read it in, one word per line.  If name is NULL or "",
 * use a default wordlist.  Return a pointer to a WComp structure, or NULL
 * if the wordlist could not be read. 
 */
WComp *wcomp_open(char *name)
{
     int i, l, len, maxlen;
     unsigned int n;
     char *s, *w, *first[FIRST];
     DictType dict;
     WComp *wcomp;
     WCompTree *root;
     
#ifdef BUFFER_WORDLIST
     char buf[100];
#else
     char *wordlist;
     struct stat statbuf;
#endif
     
     /* Open the wordlist and read it in. */
     
     if(name == NULL || *name == '\0')
     {
          if(file_exists(DEFAULT_WORDLIST))
               name = DEFAULT_WORDLIST;
          else if(file_exists(ALTERNATE_WORDLIST)) 
               name = ALTERNATE_WORDLIST;
          else 
          {
               /* I have no idea what file I should open as the cache,
                * and that's pretty bad.  Get out.
                */
               fprintf(stderr,"wcomp_open: no legal filename to open.\n");
               fflush(stderr);
               return NULL;
          } /* End else */
     } /* End if */
     
     maxlen = 0;
     n = 0;
     root = NULL;
     
#ifdef BUFFER_WORDLIST
     
     if ((dict = fopen(name, "r")) == NULL)
     {
          fprintf(stderr,"wcomp_open: failed to open \"%s\": %s\n",
                  name, g_strerror(errno));
          fflush(stderr);
          return NULL;
     } /* End if */

     while (fgets(buf, sizeof(buf), dict)) 
     {
          len = strlen(buf) - 1;
          buf[len] = '\0';
          if (len > maxlen) maxlen = len;
          w = strdup(buf);
          
          root = wcomp_insert(w, root);
          root->count = 0;
          if (n < FIRST) 
               first[n] = w;
          n++;
     } /* End while */
     
     fclose(dict);
     
#else /* BUFFER_WORDLIST */
     
     if ((dict = open(name, O_RDONLY)) < 0)
     {
          fprintf(stderr,"wcomp_open: failed to open \"%s\": %s\n",
                  name, g_strerror(errno));
          fflush(stderr);
          return NULL;
     } /* End if */

     fstat(dict, &statbuf);
     l = statbuf.st_size;
     s = wordlist = (char *)wcomp_new(l + 1);
     i = read(dict, s, l);
     s[l] = '\0';
     
     i = 0;
     while (i < l)
     {
          /* Find the end of this word and nul terminate it. */
          
          w = s;
          while (*s && *s != '\n')
               s++;
          
          len = s - w;
          *s++ = '\0';
          if (len > maxlen) 
               maxlen = len;
          i += len + 1;
          
          root = wcomp_insert(w, root);
          root->count = 0;
          if (n < FIRST)
               first[n] = w;
          
          n++;
     } /* End while */
     
     close(dict);
     
#endif /* BUFFER_WORDLIST */
     
     /* Create a WComp structure and fill it in. */
     
     wcomp = (WComp *)wcomp_new(sizeof(WComp));
     wcomp->topnlist = NULL;
     wcomp_set_topnsize(wcomp, TOPLEN);
     wcomp->maxwordlen = maxlen;
     wcomp->nwords = n;
     wcomp->unambig = (char *)wcomp_new(maxlen + 1);
     wcomp->word = (char *)wcomp_new(maxlen + 1);
#ifndef BUFFER_WORDLIST
     wcomp->wordlist = wordlist;
#endif
     
     /* Get lg n. */
     
     i = 0;
     while (n) 
     {
          i++;
          n >>= 1;
     } /* End while */

     /* Now splay the first lg n.  Assuming the wordlist was sorted
      * to begin with, this balances the tree.  If the wordlist is not
      * sorted, this won't do anything, and the above insertion will
      * be slower (since it has to sort the list). 
      */
     
     for (n = 0; n < i; n++) 
     {
          root = wcomp_splay(first[n], root);
     } /* End for */

     wcomp->root = root;
     
     return wcomp;
} /* End wcomp_open */

/* Call this to free everything (including the wcomp pointer). */

void wcomp_free(WComp *wcomp)
{
     WCompTree *t, *x;
     
     /* Free aux structs. */
     
     if(wcomp->topncurs)
          free(wcomp->topncurs);
     if(wcomp->topnlist)
          free(wcomp->topnlist);
     if(wcomp->unambig)
          free(wcomp->unambig);
     if(wcomp->word)
          free(wcomp->word);
     
     /* Start at the root; free each node. */
     
     t = wcomp->root;
     while (t != NULL) 
     {
          if (t->left == NULL)
          {
               x = t->right;
          } /* End if */
          else
          {
               x = wcomp_splay(t->key, t->left);
               x->right = t->right;
               if (x->right != NULL) x->right->parent = x;
          } /* End else */

#  ifdef BUFFER_WORDLIST
          free(t->key);
#  endif
          free(t);
          t = x;
     } /* End while */
#ifndef BUFFER_WORDLIST
     free(wcomp->wordlist);
#endif
     free(wcomp);
} /* End wcomp_free() */

/* Set up the "top ten list".  We keep an internal array of cursors
 * for doing the search, and a parallel array to return to the user. 
 */
int wcomp_set_topnsize(WComp *wcomp, int size)
{
     /* Free the old one, if any. */
     if(wcomp->topnlist) 
     {
          free(wcomp->topnlist);
          free(wcomp->topncurs);
     } /* End if */
     
     /* Limit the maximum size. */
     if (size > MAXTOPLEN) 
          size = MAXTOPLEN;
     
     /* Now allocate new ones.  topnlist is two bigger because we
      * return any unambiguous completion in the first slot, and the
      *	list is NULL terminated. 
      */
     wcomp->topncurs = (WCompCursor *)wcomp_new(size * sizeof(WCompCursor));
     wcomp->topnlist = (char **)wcomp_new((size + 2) * sizeof(char *));
     wcomp->topnsize = size;
     wcomp->topn = 0;
     wcomp->topnlist[0] = wcomp->unambig;
     wcomp->topnlist[1] = NULL;
     return(1);  /* HACK:  What should this return? */
} /* End wcomp_set_topnsize() */

/* Return TRUE if the word is in the tree. */
int wcomp_lookup(WComp *wcomp, char *w)
{
     WCompCursor p;
     
     wcomp_find(w, wcomp->root, &p);
     return (CMP(w, p.t->key) == 0);
} /* End wcomp_lookup() */

/* Look for the (prefix) w in the tree and return a NULL terminated list
 * of pointers to possible completions.  The first completion is the
 * unambiguous completion, if any, stored as just the characters which
 * need to be appended; thus if "abc" is a word in the tree, and no other
 * word begins with "ab", then if the query is "ab", the unambigous
 * completion is "c".  If there is no unambiguous completion, the
 * pointer points to a nul.  The rest of the completions are stored as
 * full words in MRU order, and the list is NULL terminated. 
 */
char **wcomp_complete(WComp *wcomp, char *w)
{
     int i, n;
     char *u;
     WCompCursor p;
     
     /* First find the prefix in the tree. */
     
     wcomp_find(w, wcomp->root, &p);
     
     /* If w is not a prefix of the current word, skip to the next word. */
     
     i = is_prefix(w, p.t->key);
     if (!i) 
     {
          wcomp_next(&p);
          if (p.t) 
          {
               i = is_prefix(w, p.t->key);
          } /* End if */
     } /* End if */
     
     /* We initialize the unambiguous match to the current word (if
      * w is a prefix of that word).  Also insert that word into topncurs. 
      */
     wcomp->topn = 0;
     u = wcomp->unambig;
     u[0] = '\0';
     if (i) 
     {
          strcpy(u, p.t->key);
          topnlist(wcomp, &p);
          wcomp_next(&p);
     } /* End if */
     
     /* Look through all the words with this prefix.  Keep track
      * of the longest initial match and the top n. 
      */
     while (p.t && is_prefix(w, p.t->key)) 
     {
          initial_match(u, p.t->key);
          topnlist(wcomp, &p);
          wcomp_next(&p);
     } /* End while */
     
     /* Copy the topncurs pointers into the topnlist, and set the
      * topnlist[0] (unamibiguous) pointer. 
      */
     n = wcomp->topn;
     for (i = 0; i < n; i++) 
     {
          wcomp->topnlist[i + 1] = wcomp->topncurs[i].t->key;
     } /* End for */

     wcomp->topnlist[n + 1] = NULL;
     
     n = 0;
     i = strlen(u);
     if (i != 0) n = strlen(w);
     wcomp->topnlist[0] = u + n;
     
     return wcomp->topnlist;
} /* End wcomp_complete() */

static void topnlist(WComp *wcomp, WCompCursor *new)
{
     int c, i, j;
     WCompTree *t;
     
     /* Common case: the count is zero and the depth is large. */
     
     for (i = wcomp->topn - 1; i >= 0; i--) 
     {
          t = wcomp->topncurs[i].t;
          c = t->count - new->t->count;
          if (c > 0)
               break;

          if (c == 0 && wcomp->topncurs[i].depth <= new->depth)
               break;
     } /* End for */
     
     /* Insert it after this point. Abort if we're at the bottom. */
     
     i++;
     if (i == wcomp->topnsize)
          return;
     
     /* Move the rest down one. */
     
     j = wcomp->topn;
     if (j == wcomp->topnsize)
          j--;
     else
          wcomp->topn++;
     
     for (; j > i; j--) 
     {
          wcomp->topncurs[j] = wcomp->topncurs[j - 1];
     } /* End for */
     
     /* Copy the new one in. */
     
     wcomp->topncurs[i] = *new;
} /* End topnlist() */

/* Return TRUE if s is a prefix of t. */

static int is_prefix(char *s, char *t)
{
     while (*s && (*s == *t)) 
     {
          s++;
          t++;
     } /* End while */
     return *s == '\0';
} /* End is_prefix() */

/* Truncate s where it differs from t. */

static void initial_match(char *s, char *t)
{
     while (*s && (*s == *t)) 
     {
          s++;
          t++;
     } /* End while */
     *s = '\0';
} /* End initial_match() */

/* Mark a word as "used".  This splays it to the root and increments
 * the count. 
 */
void wcomp_use(WComp *wcomp, char *w)
{
     wcomp->root = wcomp_splay(w, wcomp->root);
     wcomp->root->count++;
} /* End wcomp_use() */

void wcomp_write_cache(WComp *wcomp, FILE *out)
{
     WCompCursor p;
     WCompTree *t;
     
     /* Dump the cache. The find gets to the first word. */
     
     wcomp_find("", wcomp->root, &p);
     while (p.t) 
     {
          t = p.t;
          if (t->count)
               fprintf(out, "%d %s\n", t->count, t->key);
          wcomp_next(&p);
     } /* End while */
} /* End wcomp_write_cache() */

void wcomp_read_cache(WComp *wcomp, FILE *in)
{
     int count;
     char *s, buf[100];
     WCompCursor p;
     
     /* Read the cache file and update the tree. */
     
     while (fgets(buf, sizeof(buf), in)) 
     {
          count = atoi(buf);
          for (s = buf; *s && *s != ' '; s++) 
          {
               ;
          } /* End for */
          
          if (*s) 
          {
               s++;
               s[strlen(s) - 1] = '\0';
               wcomp_find(s, wcomp->root, &p);
               p.t->count = count;
          } /* End if */
     } /* End while */
} /* End wcomp_read_cache() */

/* Checking for malloc() errors irritates me. */
char *wcomp_new(int size)
{
     char *p;
     
     p = (char *)malloc(size);
     if (p == NULL) 
     {
          fprintf(stderr, "out of memory\n");
          exit(1);
     } /* End if */
     return (p);
} /* End wcomp_new() */
