#!/usr/bin/perl

# This file is part of The New Aspell
# Copyright (C) 2004 by Kevin Atkinson under the GNU LGPL
# license version 2.0 or 2.1.  You should have received a copy of the
# LGPL license along with this library if you did not you can find it
# at http://www.gnu.org/.

if ($#ARGV < 1) 
  {die "Usage: $0 [--no-ascii] <unicode data file> <textual reference table(s)>\n"}

if ($ARGV[0] eq '--no-ascii')
{
  $no_ascii = 1;
  shift;
} 

open IN, $ARGV[0] or die "Can't open \"$ARGV[0]\": $!\n";

while (<IN>) {
  chop;
  ($char, @data) = split /\t/;
  $unidata{$char} = [@data];
}

shift;

$" = "\t";
 
foreach $file (@ARGV) {
  ($base) = $file =~ /^(.+)\.txt/i or die "$file does not end in \".txt\"\n";
  $base = lc $base;

  open IN,  $file        or die "Can't open \"$ARGV[0]\": $!\n";
  open OUT,">$base\.dat" or die "Can't create \"$ARGV[0]\": $!\n";

  @ascii = (0..127);
  if ($no_ascii) 
  {
    foreach $i (65..90,97..122)
    {
      $chardata[$i] = [hex('E000') + $i + 128, 'other', 
                       $i, $i, $i, 0, $i, $i];
    }
    @ascii = (0..64,91..96,123..127);
  }
  
  foreach $i (@ascii) {
    $char = sprintf("%02X",$i);
    $unichar = "00".$char;
    $char_uni{$char} = $unichar;
    $uni_char{$unichar} = $char;
  }  

  foreach $i (128..255)
  {
    $chardata[$i] = [hex('E000') + $i - 128, 'other', 
                     $i, $i, $i, 0, $i, $i];
  }

  $i = 1;
  while (<IN>) {
    /^\s*(\=|0x)([A-F0-9]{2})\s+(U\+|0x)([A-F0-9]{4})/ or next;
    #print "$2 $4\n";
    $char_uni{$2} = $4;
    $uni_char{$4} = $2;
    $i++;
  }

  foreach $char (keys %char_uni) {
    $unichar = $char_uni{$char};
    $info = $unidata{$unichar};
    $chardata[hex $char] = 
      [hex $unichar,
       $info->[0], 
       hex($uni_char{$info->[1]} || $char),
       hex($uni_char{$info->[2]} || $char),
       hex($uni_char{$info->[3]} || $char),
       hex($uni_char{$info->[4]} || $char),
       hex($uni_char{$info->[5]} || $char),
       hex($uni_char{$info->[6]} || $char)];
  }

  print OUT "# Aspell Character Data File.  Do Not Edit!\n";
  print OUT "$base\n";
  foreach $i (0..255) {
    print OUT "@{$chardata[$i]}\n";
  }
}

