/* invent.c: project tree inventory library routines
 *
 ****************************************************************
 * Copyright (C) 2002, 2003 Tom Lord
 *
 * See the file "COPYING" for further information about
 * the copyright and warranty status of this work.
 */


#include "hackerlab/bugs/panic.h"
#include "hackerlab/os/errno.h"
#include "hackerlab/mem/mem.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "hackerlab/arrays/ar.h"
#include "hackerlab/fs/file-names.h"
#include "hackerlab/vu/safe.h"
#include "tla/libarch/inv-tags.h"
#include "tla/libarch/invent.h"


/* __STDC__ prototypes for static functions */
static void source_inventory_callback (t_uchar * path,
                                       struct stat * statb,
                                       enum arch_inventory_category category,
                                       t_uchar * tag,
                                       void * closure);
static void source_inventory_files_callback (t_uchar * path,
                                             struct stat * statb,
                                             enum arch_inventory_category category,
                                             t_uchar * tag,
                                             void * closure);
static int cmp_files (const void * va, const void * vb);
static int right_order_for_recursion (char * a, char * b);
static int contains_illegal_character (char * filename);
static int filename_matches (regex_t * pattern, char * filename);
static int is_control_file (char * rel_file, char * filename);
static int is_nested_tree (char * path);
static int is_comment_line (t_uchar * line, long len);
static int sets_re (char * kw, char ** re, t_uchar * line, long len);
static int sets_tagging_method (char * kw, enum arch_tagging_method * method_var, enum arch_tagging_method method, t_uchar * line, long len);



rel_table
arch_source_inventory (t_uchar * tree_root, int include_ctl, int include_precious, int include_nested)
{
  int here_fd;
  rel_table answer = 0;
  struct arch_inventory_options options;

  here_fd = safe_open (".", O_RDONLY, 0);
  safe_chdir (tree_root);

  mem_set0 ((t_uchar *)&options, sizeof (options));
  options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0);
  options.want_tags = 1;
  options.method = arch_names_tagging; /* default only */
  options.nested = include_nested;
  options.include_excluded = !!include_ctl;
  arch_get_inventory_naming_conventions (&options, ".");

  arch_inventory_traversal (&options, ".", source_inventory_callback, (void *)&answer);

  arch_free_inventory_naming_conventions (&options);

  safe_fchdir (here_fd);
  safe_close (here_fd);

  return answer;
}

rel_table
arch_source_files_inventory (t_uchar * tree_root, int include_ctl, int include_precious)
{
  int here_fd;
  rel_table answer = 0;
  struct arch_inventory_options options;

  here_fd = safe_open (".", O_RDONLY, 0);
  safe_chdir (tree_root);

  mem_set0 ((t_uchar *)&options, sizeof (options));
  options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0);
  options.want_tags = 1;
  options.method = arch_names_tagging; /* default only */
  options.nested = 0;
  options.include_excluded = !!include_ctl;
  arch_get_inventory_naming_conventions (&options, ".");

  arch_inventory_traversal (&options, ".", source_inventory_files_callback, (void *)&answer);

  arch_free_inventory_naming_conventions (&options);

  safe_fchdir (here_fd);
  safe_close (here_fd);

  return answer;
}


static void
source_inventory_callback (t_uchar * path,
                           struct stat * statb,
                           enum arch_inventory_category category,
                           t_uchar * tag,
                           void * closure)
{
  rel_table * answer = (rel_table *)closure;

  rel_add_records (answer, rel_make_record (path, tag, 0), 0);
}



static void
source_inventory_files_callback (t_uchar * path,
                                 struct stat * statb,
                                 enum arch_inventory_category category,
                                 t_uchar * tag,
                                 void * closure)
{
  rel_table * answer = (rel_table *)closure;

  if (!S_ISDIR (statb->st_mode))
    rel_add_records (answer, rel_make_record (path, tag, 0), 0);
}




t_uchar *
arch_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
  switch (cat)
    {
    default:
      {
        panic ("unrecognized inventory category (arch_default_naming_conventions_regexp)");
        return 0;                 /* not reached */
      }

    case arch_inventory_source:
      {
        return str_save (0, "^[_=a-zA-Z0-9].*$");
      }

    case arch_inventory_precious:
      {
        return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS)$");
      }

    case arch_inventory_backup:
      {
        return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.orig|\\.rej|\\.original|\\.modified|\\.reject)$");
      }

    case arch_inventory_junk:
      {
        return str_save (0, "^(,.*)$");
      }

    case arch_inventory_unrecognized:
      {
        return str_save (0, "^(.*\\.(o|a|so|core|so(\\.[[:digit:]]+)*)|core)$");
      }

    case arch_inventory_excludes:
      {
        return str_save (0, "^(.arch-ids|\\{arch\\})$");
      }
    }
}



t_uchar *
arch_ancient_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
  switch (cat)
    {
    default:
      {
        panic ("unrecognized inventory category (arch_ancient_default_naming_conventions_regexp)");
        return 0;                 /* not reached */
      }

    case arch_inventory_source:
      {
        return str_save (0, "^([_=a-zA-Z0-9].*|\\.arch-ids|\\{arch\\}|\\.arch-project-tree)$");
      }

    case arch_inventory_precious:
      {
        return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS)$");
      }

    case arch_inventory_backup:
      {
        return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.orig|\\.rej|\\.original|\\.modified|\\.reject)$");
      }

    case arch_inventory_junk:
      {
        return str_save (0, "^(,.*)$");
      }

    case arch_inventory_unrecognized:
      {
        return str_save (0, "^(.*\\.(o|a|so|core)|core)$");
      }

    case arch_inventory_excludes:
      {
        return str_save (0, "^(.arch-ids|\\{arch\\})$");
      }
    }
}


void
arch_get_inventory_naming_conventions (struct arch_inventory_options * options,
                                       char * tree_root)
{
  char * excludes;
  char * junk;
  char * backup;
  char * precious;
  char * unrecognized;
  char * source;
  int re_error;

  /* default naming conventions.
   */
  excludes = arch_ancient_default_naming_conventions_regexp (arch_inventory_excludes);
  junk = arch_ancient_default_naming_conventions_regexp (arch_inventory_junk);
  backup = arch_ancient_default_naming_conventions_regexp (arch_inventory_backup);
  precious = arch_ancient_default_naming_conventions_regexp (arch_inventory_precious);
  unrecognized = arch_ancient_default_naming_conventions_regexp (arch_inventory_unrecognized);
  source = arch_ancient_default_naming_conventions_regexp (arch_inventory_source);

  if (tree_root)
    {
      t_uchar * tagging_method_file;

      tagging_method_file = arch_tree_tagging_method_file (tree_root);

      if (!safe_access (tagging_method_file, F_OK))
        {
          int in_fd;
          t_uchar * line;
          long len;

          in_fd = safe_open (tagging_method_file, O_RDONLY, 0);

          while (1)
            {
              safe_next_line (&line, &len, in_fd);
              if (!len)
                break;

              (void)(!is_comment_line (line, len)
                     && !sets_tagging_method ("implicit", &options->method, arch_implicit_tagging, line, len)
                     && !sets_tagging_method ("tagline", &options->method, arch_tagline_tagging, line, len)
                     && !sets_tagging_method ("explicit", &options->method, arch_explicit_tagging, line, len)
                     && !sets_tagging_method ("names", &options->method, arch_names_tagging, line, len)
                     && !sets_re ("exclude", &excludes, line, len)
                     && !sets_re ("junk", &junk, line, len)
                     && !sets_re ("backup", &backup, line, len)
                     && !sets_re ("precious", &precious, line, len)
                     && !sets_re ("unrecognized", &unrecognized, line, len)
                     && !sets_re ("source", &source, line, len));
            }

          safe_close (in_fd);
        }
      lim_free (0, tagging_method_file);
    }

  /* compile the conventions.
   */

  re_error = regcomp (&options->excludes_pattern, excludes, REG_EXTENDED);
  if (re_error)
    panic ("bogus tagging-method regexp for `excludes'");

  re_error = regcomp (&options->junk_pattern, junk, REG_EXTENDED);
  if (re_error)
    panic ("bogus tagging-method regexp for `junk'");

  re_error = regcomp (&options->backup_pattern, backup, REG_EXTENDED);
  if (re_error)
    panic ("bogus tagging-method regexp for `backup'");

  re_error = regcomp (&options->precious_pattern, precious, REG_EXTENDED);
  if (re_error)
    panic ("bogus tagging-method regexp for `precious'");

  re_error = regcomp (&options->unrecognized_pattern, unrecognized, REG_EXTENDED);
  if (re_error)
    panic ("bogus tagging-method regexp for `unrecognized'");

  re_error = regcomp (&options->source_pattern, source, REG_EXTENDED);
  if (re_error)
    panic ("bogus tagging-method regexp for `source'");


  lim_free (0, excludes);
  lim_free (0, junk);
  lim_free (0, backup);
  lim_free (0, precious);
  lim_free (0, unrecognized);
  lim_free (0, source);
}


void
arch_free_inventory_naming_conventions (struct arch_inventory_options * options)
{
  regfree (&options->excludes_pattern);
  regfree (&options->junk_pattern);
  regfree (&options->backup_pattern);
  regfree (&options->precious_pattern);
  regfree (&options->unrecognized_pattern);
  regfree (&options->source_pattern);
}


void
arch_inventory_traversal (struct arch_inventory_options * options,
                          t_uchar * root,
                          inv_callback callback,
                          void * closure)
{
  DIR * dir;
  char ** files = 0;
  int n_files;
  int deferred_recursions_head;
  int deferred_recursions_tail;
  int * deferred_recursions = 0;
  int * is_deferred_nested = 0;
  char * rel_file = 0;
  int x;


  safe_opendir (&dir, root);

  files = 0;
  n_files = 0;

  while (1)
    {
      char * file;

      safe_readdir (&file, dir);
      if (!file)
        break;
      *(char **)ar_push ((void **)&files, 0, sizeof (char *)) = file;
      ++n_files;
    }

  safe_closedir (dir);

  qsort ((void *)files, n_files, sizeof (char *), cmp_files);

  /* We want to invoke `callback' on a lexically sorted list of paths.
   * Suppose that "foo" is a directory, but "foo-bar" also exists.
   * That means we have to invoke callbacks in the order:
   *
   *                foo
   *                foo-bar
   *                foo/xyzzy
   *
   * When we detect that "foo" is a directory, we can't
   * necessarilly recurse immediately. Instead, we keep a queue
   * of deferred directories, recursing on them at the right time.
   */

  rel_file = 0;
  deferred_recursions_head = 0;
  deferred_recursions_tail = 0;
  deferred_recursions = 0;
  is_deferred_nested = 0;

  ar_setsize ((void **)&deferred_recursions, 0, n_files, sizeof (int));
  ar_setsize ((void **)&is_deferred_nested, 0, n_files, sizeof (int));

  x = 0;
  while ((x < n_files) || (deferred_recursions_head != deferred_recursions_tail))
    {
      int is_deferred;
      int deferred_nested;
      char * file;
      struct stat stat_buf;

      if ((deferred_recursions_head != deferred_recursions_tail)
          && ((x >= n_files)
              || right_order_for_recursion (files[deferred_recursions[deferred_recursions_head]], files[x])))
        {
          is_deferred = 1;
          file = files[deferred_recursions[deferred_recursions_head]];
          deferred_nested = is_deferred_nested[deferred_recursions_head];
          ++deferred_recursions_head;
        }
      else
        {
          is_deferred = 0;
          file = files[x];
          ++x;
        }

      rel_file = file_name_in_vicinity (0, root, file);

      if (is_deferred)
        {
          if (deferred_nested)
            goto handle_deferred_nested;
          else
            goto handle_deferred;
        }

      /* . and .. are mandatory exclude files
       */
      if (!str_cmp (".", file) || !str_cmp ("..", file))
        {
        next_file:
          lim_free (0, rel_file);
          rel_file = 0;
          continue;
        }

      safe_lstat (rel_file, &stat_buf);


      /* non-printing characters, spaces, and glob characters are
       * mandatory unrecognized files
       */
      if (contains_illegal_character (file))
        {
        unrecognized_file:
          if (options->categories & arch_inventory_unrecognized)
            {
              callback (rel_file, &stat_buf, arch_inventory_unrecognized, 0, closure);
            }
          goto next_file;
        }

      /* callers can specify a pattern for additional files to
       * exclude from consideration.
       */
      if (!options->include_excluded && filename_matches (&options->excludes_pattern, file))
        goto next_file;

      /* arch control files that get past the exclude pattern are
       * always source.
       */
      if (is_control_file (rel_file, file))
        {
          goto handle_source_file;
        }

      /* file names beginning with "++" are always precious.
       */
      if ((file[0] == '+') && (file[1] == '+'))
        {
          if (options->categories & arch_inventory_precious)
            {
              callback (rel_file, &stat_buf, arch_inventory_precious, 0, closure);
            }
          goto next_file;
        }

      /* callers can specify a pattern for "junk" files -- files
       * presumed safe-to-be-removed by automatic tools, barring
       * concurrent tools.
       *
       * file names beginning with ",," are always considered junk files.
       */
      if ((file[0] == ',' && file[1] == ',') || filename_matches (&options->junk_pattern, file))
        {
          if (options->categories & arch_inventory_junk)
            {
              callback (rel_file, &stat_buf, arch_inventory_junk, 0, closure);
            }
          goto next_file;
        }

      /* callers can specify a pattern for "backup" files -- files
       * that are created by editors and similar programs to save old
       * versions
       */
      if (filename_matches (&options->backup_pattern, file))
        {
          if (options->categories & arch_inventory_backup)
            {
              callback (rel_file, &stat_buf, arch_inventory_backup, 0, closure);
            }
          goto next_file;
        }

      /* callers can specify a pattern for "precious" files -- files
       * that are not part of the source, but which should never be
       * automatically removed.
       */
      if (filename_matches (&options->precious_pattern, file))
        {
          if (options->categories & arch_inventory_precious)
            {
              callback (rel_file, &stat_buf, arch_inventory_precious, 0, closure);
            }
          goto next_file;
        }

      /* callers can specify a pattern for explicitly "unrecognized" files --
       * files that should be flagged as errors in tree-lint reports.
       */
      if (filename_matches (&options->unrecognized_pattern, file))
        {
          goto unrecognized_file;
        }

      /* finally, a pattern for "source" files -- files which are expected
       * to be source files.  Note that if the tagging method is "explicit" and
       * an apparent source file lacks a tag and is not a nested tree, it reverts
       * to being an unrecognized file.
       *
       * if a directory appears to be a source directory, but contains a rules
       * directory of its own, then it is in fact the root of a nested tree -- not
       * a regular source file.
       */
      if (filename_matches (&options->source_pattern, file))
        {
        handle_source_file:
          if (S_ISDIR (stat_buf.st_mode) && is_nested_tree (rel_file))
            {
              if (options->categories & arch_inventory_tree)
                {
                  callback (rel_file, &stat_buf, arch_inventory_tree, 0, closure);
                }

              if (options->nested)
                {
                  if ((x < n_files) && !right_order_for_recursion (file, files[x]))
                    {
                      deferred_recursions[deferred_recursions_tail] = x - 1;
                      is_deferred_nested[deferred_recursions_tail] = 1;
                      ++deferred_recursions_tail;
                      lim_free (0, rel_file);
                      rel_file = 0;
                      continue;
                    }

                handle_deferred_nested:
                  arch_inventory_traversal (options, rel_file, callback, closure);
                }
              goto next_file;
            }
          else
            {
              t_uchar * tag;

              tag = 0;
              if (options->want_tags || (options->method == arch_explicit_tagging))
                {
                  tag = arch_inventory_tag (options->method, rel_file);

                  if (!tag)
                    {
                      if (!options->include_untagged)
                        goto unrecognized_file;
                    }
                }

              if (options->categories & arch_inventory_source)
                {
                  callback (rel_file, &stat_buf, arch_inventory_source, tag, closure);
                }

              if (tag)
                {
                  lim_free (0, tag);
                  tag = 0;
                }

              if (S_ISDIR (stat_buf.st_mode))
                {
                  if ((x < n_files) && !right_order_for_recursion (file, files[x]))
                    {
                      deferred_recursions[deferred_recursions_tail] = x - 1;
                      is_deferred_nested[deferred_recursions_tail] = 0;
                      ++deferred_recursions_tail;
                      lim_free (0, rel_file);
                      rel_file = 0;
                      continue;
                    }

                handle_deferred:
                  arch_inventory_traversal (options, rel_file, callback, closure);
                }
              goto next_file;
            }
        }
      else
        goto unrecognized_file;
    }

  for (x = 0; x < n_files; ++x)
    {
      lim_free (0, files[x]);
    }

  ar_free ((void **)&files, 0);
  lim_free (0, rel_file);
  ar_free ((void **)&deferred_recursions, 0);
  ar_free ((void **)&is_deferred_nested, 0);
}




static int
cmp_files (const void * va, const void * vb)
{
  char * a;
  char * b;

  a = *(char **)va;
  b = *(char **)vb;

  return str_cmp (a, b);
}

static int
right_order_for_recursion (char * a, char * b)
{
  /* a and b are already in lexical order (a < b)
   */
  while ((*a == *b) && *a && *b)
    {
      ++a;
      ++b;
    }

  if (!*a)
    {
      /* Does "A/" come before "B" in an alphabetical listing?
       */
      return (*b > '/');
    }
  else if (!*b)
    {
      /* Does "B/" come after "A" in an aphabetical listing?
       */
      return (*a < '/');
    }
  else
    {
      invariant (*a < *b);
      return 1;
    }
}


static int
contains_illegal_character (char * filename)
{
  int x;

  for (x = 0; filename[x]; ++x)
    if ((filename[x] == '*')
        || (filename[x] == '?')
        || (filename[x] == '[')
        || (filename[x] == ']')
        || (filename[x] == '\\')
        || (filename[x] == ' ')
        || (filename[x] == '\t')
        || (!char_is_printable (filename[x])))
      return 1;

  return 0;
}

static int
filename_matches (regex_t * pattern, char * filename)
{
  int answer;

  answer = regexec (pattern, filename, 0, 0, 0);

  if (answer == REG_NOMATCH)
    return 0;

  if (answer == REG_NOERROR)
    return 1;

  panic ("unexpected regexec error in arch_inventory_traversal");
  return -1;
}

static int
is_control_file (char * rel_file, char * filename)
{
  static int compiled = 0;
  static regex_t control_pattern = {0,};

  if (!compiled)
    {
      int re_error;

      re_error = regcomp (&control_pattern, "^(.*/?(\\.arch-ids(/(=id|[^/]*\\.id))?|\\./\\{arch\\}(/[a-zA-Z=][^/~]*)*|\\./\\{arch\\}/\\.arch-project-tree))$", REG_EXTENDED);
      invariant (!re_error);
      compiled = 1;
    }

  return filename_matches (&control_pattern, rel_file);
}


static int
is_nested_tree (char * path)
{
  int errn;
  t_uchar * ctl_file;
  struct stat stat_buf;

  ctl_file = file_name_in_vicinity (0, path, "{arch}");

  if (0 > vu_lstat (&errn, ctl_file, &stat_buf))
    {
      if (errn == ENOENT)
        {
          lim_free (0, ctl_file);
          return 0;
        }
      else
        {
          panic ("i/o error in arch_inventory_traversal/is_nested_tree");
          return 0;             /* notreached */
        }
    }

  lim_free (0, ctl_file);
  return 1;
}

static int
is_comment_line (t_uchar * line, long len)
{
  return !len || char_is_space (line[0]) || (line[0] == '#');
}

static int
sets_re (char * kw, char ** re, t_uchar * line, long len)
{
  int l;

  l = str_length (kw);

  if (len < (l + 1))
    return 0;

  if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
    return 0;

  if (*re)
    lim_free (0, *re);

  line += l;
  len -= l;
  while (len && char_is_space (line[0]))
    {
      ++line;
      --len;
    }
  while (len && char_is_space (line [len - 1]))
    --len;
  *re = str_save_n (0, line, len);

  return 1;
}

static int
sets_tagging_method (char * kw, enum arch_tagging_method * method_var, enum arch_tagging_method method, t_uchar * line, long len)
{
  int l;

  l = str_length (kw);

  if (len < (l + 1))
    return 0;

  if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
    return 0;

  *method_var = method;
  return 1;
}



/* tag: Tom Lord Wed May 14 09:47:16 2003 (invent.c)
 */
