/* ftag.c: 
 *
 ****************************************************************
 * Copyright (C) 2002 Tom Lord
 * 
 * See the file "COPYING" for further information about
 * the copyright and warranty status of this work.
 */


#include "hackerlab/os/errno.h"
#include "hackerlab/os/sys/stat.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "hackerlab/mem/alloc-limits.h"
#include "hackerlab/mem/mem.h"
#include "hackerlab/fs/file-names.h"
#include "hackerlab/vu/vu.h"
#include "liblarch/ftag.h"


/* __STDC__ prototypes for static functions */
static t_uchar * explicit_tag (int * errn,
			       struct alloc_limits * limits,
			       t_uchar * arg_file,
			       t_uchar * id_file,
			       t_uchar * prefix,
			       t_uchar * postfix);
static t_uchar * implicit_tag (int * errn,
			       struct alloc_limits * limits,
			       t_uchar * file,
			       t_uchar * basename,
			       t_uchar * prefix);
static long smash_non_graphical (t_uchar * buf, long amt);



/*(c file_tag)
 * static t_uchar * file_tag (int * errn,
 *			      struct alloc_limits * limits,
 *			      enum ftag_method method,
 *			      t_uchar * path);
 * 
 * Return a newly allocated string containing the inventory tag
 * of the file `path' using inventory method `method'.
 * 
 * Return 0 and set `*errn' if the tag can not be computed.
 * If no I/O error occurs, the file does not have an explicit 
 * tag, but `method' is `ftag_explicit', `*errn' is set to 0.
 */
t_uchar *
file_tag (int * errn,
	  struct alloc_limits * limits,
	  enum ftag_method method,
	  t_uchar * path)
{
  t_uchar * answer;
  t_uchar * as_file;
  t_uchar * basename;
  t_uchar * dir;
  t_uchar * dir_as_file;
  t_uchar * dir_basename;
  t_uchar * id_file;
  struct stat stat_buf;
  int is_dir;
  int is_symlink;


  answer = 0;
  as_file = 0;
  basename = 0;
  dir = 0;
  dir_as_file = 0;
  dir_basename = 0;
  id_file = 0;

  if (!path)
    {
      *errn = EINVAL;

    return_answer:

      if (as_file)
	lim_free (limits, as_file);
      if (basename)
	lim_free (limits, basename);
      if (dir)
	lim_free (limits, dir);
      if (dir_as_file)
	lim_free (limits, dir_as_file);
      if (dir_basename)
	lim_free (limits, dir_basename);
      if (id_file)
	lim_free (limits, id_file);

      return answer;
    }

  as_file = file_name_from_directory (limits, path);
  if (!as_file)
    {
    enomem_error:
      *errn = ENOMEM;
      goto return_answer;
    }

  if (method == ftag_names)
    {
      answer = str_alloc_cat (limits, "?", as_file);
      if (!answer)
	goto enomem_error;
      else
	goto return_answer;
    }


  basename = file_name_tail (limits, as_file);
  dir = file_name_directory (limits, as_file);
  if (!dir)
    dir = str_save (limits, ".");
  if (!(basename && dir))
    goto enomem_error;
  
  dir_as_file = file_name_from_directory (limits, dir);
  if (!dir_as_file)
    goto enomem_error;

  dir_basename = file_name_tail (limits, dir_as_file);
  if (!dir_basename)
    goto enomem_error;

  /* Explicit tag files use their contents as tag, with the
   * prefix 'E'.
   */
  if (!str_cmp (dir_basename, ".arch-ids"))
    {
      answer = explicit_tag (errn, limits, path, as_file, "E_", 0);
      goto return_answer;
    }

  /* Explicit tag file directories:
   */
  if (!str_cmp (basename, ".arch-ids"))
    {
      long amt;
      answer = str_alloc_cat (limits, "D_", as_file);
      if (!answer)
	goto enomem_error;
      amt = smash_non_graphical (answer, str_length (answer));
      answer[amt] = 0;
      goto return_answer;
    }

  /* Paths beginning with "./{arch}" are tagged with their own
   * path name, with the prefix "A_".  The presumptions are that these
   * files never move, and that if a file is present, its contents are
   * invariant.
   */
  if (   !str_cmp_prefix ("./{arch}/", as_file)
	 || !str_cmp ("./{arch}", as_file))
    {
      long amt;
      answer = str_alloc_cat (limits, "A_", as_file);
      if (!answer)
	goto enomem_error;
      amt = smash_non_graphical (answer, str_length (answer));
      answer[amt] = 0;
      goto return_answer;
    }
      

  /* Try for an explicit tag:
   */
  if (0 > vu_lstat (errn, as_file, &stat_buf))
    goto return_answer;

  if (S_ISDIR (stat_buf.st_mode))
    {
      is_dir = 1;
      is_symlink = 0;
      id_file = file_name_in_vicinity (limits, as_file, ".arch-ids/=id");
      if (!id_file)
	goto enomem_error;
    }
  else
    {
      is_dir = 0;
      is_symlink = S_ISLNK (stat_buf.st_mode);

      id_file = file_name_in_vicinity (limits, dir, ".arch-ids/");
      if (!id_file)
	goto enomem_error;
      id_file = str_realloc_cat (limits, id_file, basename);
      if (!id_file)
	goto enomem_error;
      id_file = str_realloc_cat (limits, id_file, ".id");
      if (!id_file)
	goto enomem_error;
    }

  answer = explicit_tag (errn, limits, path, id_file, "x_", 0);
  if (answer || (*errn != ENOENT))
    goto return_answer;
  else
    {
      /* Is there a .arch-ids/=all file here?
       */
      lim_free (limits, id_file);
      if (is_dir)
	id_file = file_name_in_vicinity (limits, as_file, ".arch-ids/=all");
      else
	id_file = file_name_in_vicinity (limits, dir, ".arch-ids/=all");

      if (!id_file)
	goto enomem_error;

      answer = explicit_tag (errn, limits, path, id_file, "a_", (is_dir ? (t_uchar *)"./." : basename));
      if (answer || (*errn != ENOENT))
	goto return_answer;
	
      if ((method == ftag_implicit) && !is_dir && !is_symlink)
	{
          answer = implicit_tag (errn, limits, path, basename, "i_");
	  if (answer || *errn)
	    goto return_answer;
	}

      if ((method == ftag_tagline) && !is_dir && !is_symlink)
	{
          answer = implicit_tag (errn, limits, path, 0, "i_");
	  if (answer || *errn)
	    goto return_answer;
	}


      /* is there an "=default" tag?
       */
      lim_free (limits, id_file);
      if (is_dir)
	id_file = file_name_in_vicinity (limits, as_file, ".arch-ids/=default");
      else
	id_file = file_name_in_vicinity (limits, dir, ".arch-ids/=default");

      if (!id_file)
	goto enomem_error;
      
      answer = explicit_tag (errn, limits, path, id_file, "w_", (is_dir ?  (t_uchar *)"./." : basename));
      if (answer || (*errn != ENOENT))
	goto return_answer;

      /* no explicit, =all, implicit, tagline, or =default tag.
       */
      if ((method == ftag_implicit) || (method == ftag_tagline))
	{
	  lim_free (limits, id_file);
	  id_file = file_name_in_vicinity (limits, dir, ".arch-ids/=dont-care");
	  if (!id_file)
	    goto enomem_error;

	  if (0 <= vu_lstat (errn, id_file, &stat_buf))
	    {
	      long amt;
	      answer = str_alloc_cat (limits, "k_", as_file);
	      if (!answer)
		goto enomem_error;
	      amt = smash_non_graphical (answer, str_length (answer));
	      answer[amt] = 0;
	      goto return_answer;
	    }
	  else if (*errn == ENOENT)
	    {
	      long amt;
	      answer = str_alloc_cat (limits, "?_", as_file);
	      if (!answer)
		goto enomem_error;
	      amt = smash_non_graphical (answer, str_length (answer));
	      answer[amt] = 0;
	      goto return_answer;
	    }
	  else
	    goto return_answer;
	}
      else
	{
	  *errn = 0;
	  goto return_answer;
	}
    }
  
}



static t_uchar *
explicit_tag (int * errn,
	      struct alloc_limits * limits,
	      t_uchar * arg_file,
	      t_uchar * id_file,
	      t_uchar * prefix,
	      t_uchar * postfix)
{
  int id_fd;
  t_uchar * answer;
  char buf[1024];
  long amt;
  int ign;


  id_fd = vu_open (errn, id_file, O_RDONLY, 0);

  if (id_fd < 0)
    return 0;

  answer = str_save (limits, prefix);
  if (!answer)
    {
    enomem_error:
      *errn = ENOMEM;
      if (answer)
	lim_free (limits, answer);
      return 0;
    }

  while (1)
    {
      t_uchar * eol;

      amt = vu_read_retry (errn, id_fd, buf, sizeof (buf));

      if (amt < 0)
	{
	  lim_free (limits, answer);
	  vu_close (&ign, id_fd);
	  return 0;
	}
	  
      if (!amt)
	break;

      eol = str_chr_index_n (buf, amt, '\n');
      if (!eol)
	{
	  t_uchar * old_answer;
	  amt = smash_non_graphical (buf, amt);
	  old_answer = answer;
	  answer = str_realloc_cat_n (limits, answer, buf, amt);
	  if (!answer)
	    goto enomem_error;
	}
      else
	{
	  t_uchar * old_answer;
	  amt = eol - (t_uchar *)buf;
	  amt = smash_non_graphical (buf, amt);
	  old_answer = answer;
	  answer = str_realloc_cat_n (limits, answer, buf, amt);
	  if (!answer)
	    goto enomem_error;
	  break;
	}
    }

  answer = str_realloc_cat (limits, answer, (postfix ? postfix : (t_uchar *)""));
  if (!answer)
    goto enomem_error;
  vu_close (&ign, id_fd);
  return answer;
}


static t_uchar *
implicit_tag (int * errn,
	      struct alloc_limits * limits,
	      t_uchar * file,
	      t_uchar * basename,
	      t_uchar * prefix)
{
  int file_fd;
  char buf[1025];
  int amt;
  int line;
  int bottom;

  /* This is a slightly screwy, historic interface.
   *
   * Passing `base != 0' means the old, larch-style tag syntax.
   * 
   * Passing `base == 0' means tagline syntax.
   */

  /* Search the file itself (last, then first 1K) for a line beginning:
   *
   * tla-style tagline tagging (basename == 0)
   * -----------------------------------------
   * 
   * <punct>arch-tag:<blanks>
   *
   *
   * larch-style implicit tagging (basename != 0)
   * --------------------------------------------
   *
   * <punct>basename<blanks>-
   * 
   * or
   * 
   * <punct>tag:<blanks>
   *
   * after the dash, skip any blanks -- the rest is the tag.
   */

  file_fd = vu_open (errn, file, O_RDONLY, 0);
  if (file_fd < 0)
    return 0;
	    
  for (bottom = 1; bottom >= 0; --bottom)
    {
      if (!bottom)
	{
	  if (0 > vu_lseek (errn, file_fd, 0, SEEK_SET))
	    {
	      int ign;
	    error_return:
	      vu_close (&ign, file_fd);
	      return 0;
	    }
	  amt = vu_read_retry (errn, file_fd, buf, sizeof (buf) - 1);
	  if (amt < 0)
	    goto error_return;
	}
      else
	{
	  struct stat file_stat_buf;
	  char * x;

	  if (0 > vu_fstat (errn, file_fd, &file_stat_buf))
	    goto error_return;
	  if (file_stat_buf.st_size > sizeof (buf))
	    amt = sizeof (buf);
	  else
	    continue;
	  if (0 > vu_lseek (errn, file_fd, -1026, SEEK_END))
	    goto error_return;
	  amt = vu_read_retry (errn, file_fd, buf, sizeof (buf));
	  if (amt < 0)
	    goto error_return;
	  x = str_chr_index_n (buf, amt, '\n');
	  if (!x)
	    continue;
	  amt = amt - (1 + x - buf);
	  mem_move (buf, x + 1, amt);
	}

      buf[amt] = 0;
      line = 0;

      while (1)
	{
	  int is_inventory_tag;

	  /* skip punctuation and blanks at the start of the line
	   */
	  while ((line < amt) && (char_is_punct (buf[line]) || char_is_blank (buf[line])))
	    ++line;

	  if (line == amt)
	    break;

	  if (buf[line] == '\n')
	    {
	      ++line;
	      continue;
	    }

	  is_inventory_tag = (basename ? !str_cmp_prefix ("tag:", buf + line) : !str_cmp_prefix ("arch-tag:", buf + line));
		
	  if (   !is_inventory_tag 
	      && (!basename || str_cmp_prefix (basename, buf + line)))
	    {
	      t_uchar * eol;

	    not_this_line:
	      eol = str_chr_index_n (buf + line, amt - line, '\n');
	      if (!eol)
		break;
	      line = eol - (t_uchar *)buf;
	    }
	  else
	    {
	      t_uchar * eol;
		    
	      if (is_inventory_tag)
		line += (basename ? str_length ("tag:") : str_length ("arch-tag:"));
	      else
		line += str_length (basename);

	      if (!is_inventory_tag)
		{
		  while ((line < amt) && char_is_blank (buf[line]))
		    ++line;
			
		  if (line == amt)
		    break;
			
		  if (buf[line] != '-')
		    goto not_this_line;

		  ++line;
		}

	      if (line == amt)
		break;
		    
	      /* This is the tag line.
	       */
	      while ((line < amt) && char_is_blank (buf[line]))
		++line;

	      eol = str_chr_index_n (buf + line, amt - line, '\n');
	      if (!eol)
		eol = buf + amt;

	      if (0 == (eol - (t_uchar *)(buf + line)))
		{
		  /* an empty tag
		   */
		  break;
		}

	      {
		long size;
		t_uchar * answer;

		size = smash_non_graphical (buf + line, eol - (t_uchar *)(buf + line));
		answer = str_alloc_cat_n (limits, prefix, buf + line, size);
		if (0 > vu_close (errn, file_fd))
		  goto error_return;
		if (!answer)
		  *errn = ENOMEM;
		return answer;
	      }
	    }
	}
    }

  if (0 > vu_close (errn, file_fd))
    goto error_return;
  *errn = 0;
  return 0;
}



static long
smash_non_graphical (t_uchar * buf, long amt)
{
  long x;

  while (amt > 0)
    {
      if (!char_is_graph (buf[amt - 1]))
	--amt;
      else
	break;
    }

  for (x = 0; x < amt; ++x)
    {
      if (!char_is_graph (buf[x]))
	buf[x] = '_';
    }

  return amt;
}


/* tag: Tom Lord Thu Feb 21 15:50:48 2002 (ftag.c)
 */
