/*
 *   Copyright (C) 1997, 1998
 *   	Free Software Foundation, Inc.
 *
 *   This program is free software; you can redistribute it and/or modify it
 *   under the terms of the GNU General Public License as published by the
 *   Free Software Foundation; either version 2, or (at your option) any
 *   later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the Free Software
 *   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 *
 */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif /* HAVE_CONFIG_H */

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif /* HAVE_STDLIB_H */
#include <stdio.h>
#include <string.h>
#include <sys/types.h>

#include <salloc.h>
#include <getopttools.h>
#include <file_exists.h>
#include <rm.h>
#include <file_size.h>
#include <logfile.h>

#include <crawl.h>
#include <webbase.h>

#define MAX_OPTIONS 100
#define APPLICATION_OPTIONS		0x8000000

typedef struct crawler_params {
  crawl_params_t* crawl;
  int home_pages;
  char* where;
  char* log;
  int size_current;
  int size_max;
  char file_current[32];
  int file_number;
} crawler_params_t;

static crawler_params_t params;

static int verbose = 0;

static void init(int argc, char** argv);
static void finish();
static void dump_data(char* args, webbase_url_t* webbase_url);

int main(int argc, char** argv)
{
  crawl_params_t* crawl;
  char* where;

  init(argc, argv);

  crawl = params.crawl;

  if(params.where) {
    where = smalloc(strlen(params.where) + 32);
    sprintf(where, "where %s", params.where);
  } else {
    where = strdup("");
  }

  strcpy(params.file_current, "raw.0000");
  if(file_exists(params.file_current))
    rm(params.file_current);
  webbase_walk_url(crawl->base, where, dump_data, 0, WEBBASE_GET_URL_LIGHT);
  if(file_exists("TMP"))
    rm("TMP");
  finish();
  return 0;
}

static void dump_data(char* args, webbase_url_t* webbase_url) {}
#if 0
static void dump_data(char* args, webbase_url_t* webbase_url)
{
  if((webbase_url->w_info & WEBBASE_URL_INFO_CONTENT) && !strncmp("text", webbase_url->w_content_type, 4) && !robots_p(webbase_url->w_url)) {
    char* path = uri_furi_string(webbase_url->w_url, strlen(webbase_url->w_url), URI_FURI_REAL_PATH);
    if(path && file_exists(path)) {
      char cmd[128];
      bodyparse(path, "TMP", webbase_url->w_url, 1000000, 0, BODY_PARSE_TRANSPARENT);
      if(params.size_current > params.size_max) {
	params.file_number++;
	sprintf(params.file_current, "raw.%04d", params.file_number);
	if(file_exists(params.file_current))
	  rm(params.file_current);
	params.size_current = 0;
      }
      sprintf(cmd, "cat TMP >> %s", params.file_current);
      system(cmd);
      params.size_current = file_size(params.file_current);
    }
  }
}
#endif

void finish()
{
  crawl_free(params.crawl);
  if(params.log) free(params.log);
  if(params.where) free(params.where);
  exit(0);
}

static void init(int argc, char** argv)
{
  static struct option long_options[MAX_OPTIONS + 1] =
  {
    /* These options set a flag. */
    {"verbose", 0, &verbose, 1},
    {"log", 1, 0, 0},
    {"where", 1, 0, 0},
    {"size_max", 1, 0, 0},
    {0, MAX_OPTIONS, 0, APPLICATION_OPTIONS}
  };

  params.size_max = 1 * 1024 * 1024;

  getopt_merge(long_options, crawl_options(long_options));

  opterr = 0;
  optind = 0;
  while(1) {
    /* `getopt_long' stores the option index here. */
    int option_index = 0;
    int c;

    c = getopt_long_only(argc, argv, "-", long_options, &option_index);

    /* Detect the end of the options. */
    if (c == -1)
      break;
     
    switch (c)
      {
      case 0:
	/* If this option set a flag, do nothing else now. */
	if (long_options[option_index].flag != 0)
	  break;
	if(!strcmp(long_options[option_index].name, "log")) {
	  params.log = strdup(optarg);
	} else if(!strcmp(long_options[option_index].name, "where")) {
	  params.where = strdup(optarg);
	} else if(!strcmp(long_options[option_index].name, "size_max")) {
	  params.size_max = atoi(optarg);
	} else if(!strcmp(long_options[option_index].name, "")) {
	}
	break;
      }
  }

  params.crawl = crawl_alloc(argc, argv, long_options);

  if(params.log) logfile(params.log);

  if(verbose) getopt_dump(long_options);

  return;
}
