// **********************************
// from basic code to apps: keep this file unchanged
// **********************************

// DataWarehouse.m                                        
// this is the portion of the code where the data related to
// the neural network structure are stored; each has the address
// of its instance of DataWarehouse

#import "DataWarehouse.h"

@implementation DataWarehouse

// to define the name of the files: verificationFileName and trainingFileName
// are used in the simple bp perspective to train and veryfy a neural
// network using data contained in two files;
//                                 initValuesFileName is used (but it is
// not stricly mandatory) in the CT case;
//                                 minmaxName contains min and max values
// of all the inputs and outputs, in internal and external metrics; it is
// absolutely necessary both in bp and in CT perspective

-setVerificationFileName: (char *) fn1   andTrainingFileName: (char *) fn2
           andMinmaxName: (char *) fn3 andInitValuesFileName: (char *) fn4
{
  verificationFileName= fn1;
  trainingFileName=     fn2;
  minmaxFileName=       fn3;
  initValuesFileName=   fn4;

  return self;
}

// structure both of the network and of the learning process;
// epochNumberInEachTrainingCycle has to be used carefully in CT scheme,
// in principle avoiding (if you are not sure of what you are doing)
// epochNumberInEachTrainingCycle>1 when
// patternNumberInVerificationSet==-1 and patternNumberInTrainingSet==-1

- setInputNodeNumber: (int) ni andHiddenNodeNumber: (int) nh
                               andOutputNodeNumber: (int) no
                               andPatternNumberInVerificationSet: (int) pn1
                               andPatternNumberInTrainingSet: (int) pn2  
                               andEpochNumberInEachTrainingCycle: (int) cn
{
  inputNodeNumber   =ni;
  inputNodeNumber1  =ni+1;
  hiddenNodeNumber  =nh;
  hiddenNodeNumber1 =nh+1;
  outputNodeNumber  =no;
  patternNumberInVerificationSet=pn1;
  patternNumberInTrainingSet    =pn2;
  epochNumberInEachTrainingCycle   =cn;

  return self;
}

// backpropagation parameters; the last three are switches

- setBackPropagationParametersWeightRange: (float) wr eps: (float) e
                                   alpha: (float) a
                                   andWithOrderInLearning: (int) o
                                   andLongTermLearningInCT: (int) lt
                                   andUseOutputsAsTargetsInCT: (int) uo
{
  weightRange = wr;
  eps         = e;
  alpha       = a;
  usingRandomOrderInLearning = o;
  longTermLearningInCT_OnlyWithCompleteTrainingSet = lt;
  useOutputsAsTargetsInCT_RelearningScheme = uo;

  return self;
}

// parameters and matrixes retrieval

-(int) getInputNodeNumber {return inputNodeNumber;}
-(int) getHiddenNodeNumber{return hiddenNodeNumber;}
-(int) getOutputNodeNumber{return outputNodeNumber;}
-(int) getPatternNumberInVerificationSet{return
                                         patternNumberInVerificationSet;}
-(int) getPatternNumberInTrainingSet{return patternNumberInTrainingSet;}
-(int) getEpochNumberInEachTrainingCycle{return
                                       epochNumberInEachTrainingCycle;}

- getOutputVerificationMatrix{return outputVerificationMatrix;}
- getTargetVerificationMatrix{return targetVerificationMatrix;}
- getDataVerificationMatrix{return dataVerificationMatrix;}
- getOutputTrainingMatrix{return outputTrainingMatrix;}
- getTargetTrainingMatrix{return targetTrainingMatrix;}
- getDataTrainingMatrix{return dataTrainingMatrix;}
- getMinmax{return minmax;}

- getInputLayer{return inputLayer;}
- getHiddenLayer{return hiddenLayer;}
- getHiddenLayerTransFuncDerivatives{return hiddenLayerTransFuncDerivatives;}
- getOutputLayer{return outputLayer;}
- getOutputLayerTransFuncDerivatives{return outputLayerTransFuncDerivatives;}
- getDeltaOut{return deltaOut;}

- getWih{return wih;}
- getDwih{return dwih;}
- getWho{return who;}
- getDwho{return dwho;}

-(float) getEps{return eps;}
-(float) getAlpha{return alpha;}
-(int) getRandomOrderInLearning{return usingRandomOrderInLearning;}
-(int) getLongTermLearningInCT_OnlyWithCompleteTrainingSet{return
                longTermLearningInCT_OnlyWithCompleteTrainingSet;}
-(int) getUseOutputsAsTargetsInCT_RelearningScheme{return
                useOutputsAsTargetsInCT_RelearningScheme;}

// in createEnd we do a lot of things

- createEnd
{
  int i, j, k;
  float V;

  [super createEnd];

  // creation of arrays: both in training and in verfication files we create
  // three matrixes, a little redundant, but helpful to clarify the code
  // (i)   output matrix contains the outputs of the neural network
  // (ii)  target matrix contains the targets used to train the neural network
  //       (which are extracted from the data matrix)
  // (iii) data matrix contains input and target data, read from the data
  // files or internally generated with or without the CT scheme

  // the Code argument is used only for error handling purposes

   k=inputNodeNumber+outputNodeNumber;

   // in the following matrix definitions, values < 0 of
   // patternNumberInTrainingSet or patternNumberInVerificationSet 
   // (in Cross Target context)
   // are corrected multiplying them by -1 internally (see Matrix.m)

   outputTrainingMatrix= [Matrix createBegin: [self getZone]];
                                               // here it is not possible to
                                               // use simply 'self' (see [*]
                                               // at the end of the file)

   [outputTrainingMatrix setDimensionRows: patternNumberInTrainingSet 
                                     Cols: outputNodeNumber Code: 1];
   outputTrainingMatrix = [outputTrainingMatrix createEnd];

   targetTrainingMatrix= [Matrix createBegin: [self getZone]];
   [targetTrainingMatrix setDimensionRows: patternNumberInTrainingSet
                              Cols: outputNodeNumber Code: 2];
   targetTrainingMatrix = [targetTrainingMatrix createEnd];

   dataTrainingMatrix= [Matrix createBegin: [self getZone]];
   [dataTrainingMatrix setDimensionRows: patternNumberInTrainingSet
                              Cols: k Code: 3];
   dataTrainingMatrix = [dataTrainingMatrix createEnd];

   outputVerificationMatrix= [Matrix createBegin: [self getZone]];
   [outputVerificationMatrix setDimensionRows: patternNumberInVerificationSet 
                                     Cols: outputNodeNumber Code: 4];
   outputVerificationMatrix = [outputVerificationMatrix createEnd];

   targetVerificationMatrix= [Matrix createBegin: [self getZone]];
   [targetVerificationMatrix setDimensionRows: patternNumberInVerificationSet
                              Cols: outputNodeNumber Code: 5];
   targetVerificationMatrix = [targetVerificationMatrix createEnd];

   dataVerificationMatrix= [Matrix createBegin: [self getZone]];
   [dataVerificationMatrix setDimensionRows: patternNumberInVerificationSet
                              Cols: k Code: 6];
   dataVerificationMatrix = [dataVerificationMatrix createEnd];

   // this is the matrix where mim/max values in internal and external
   // metrics are stored; each row contains an input or target position
   // the cols number is always 4, being stored two couples of
   // min/max values
   minmax=[Matrix createBegin: [self getZone]];
   [minmax setDimensionRows: k Cols: 4 Code: 7];
   minmax=[minmax createEnd];

          
  // to open the files

  minmaxSet      = [InFile create: [self getZone]
//                      withName: minmaxFileName]; // 1.3.1 or 1.4.1
                        setName:  minmaxFileName]; // 2.0.1
  if (minmaxSet == nil)
     {
     printf("Data file '%s' does not exist.\n",minmaxFileName);
     exit(0);
     }

  // if we are using internal input and target values, mainly in the Cross
  // Target (CT) perspective, we do not open trainingFileName and 
  // verificationFileName

  if (patternNumberInVerificationSet > 0) 
  {

  trainingSet     = [InFile create: [self getZone]
//                          withName: trainingFileName]; // 1.3.1 or 1.4.1
                            setName:  trainingFileName]; // 2.0.1
  if (trainingSet == nil)
     {
     printf("Data file '%s' does not exist.\n",trainingFileName);
     exit(0);
     }

  verificationSet = [InFile create: [self getZone]
//                          withName: verificationFileName]; // 1.3.1 or 1.4.1
                            setName:  verificationFileName]; // 2.0.1
  if (verificationSet == nil)
     {
     printf("Data file '%s' does not exist.\n",verificationFileName);
     exit(0);
     }

  }

  // in CT perspective, we check whether the file reported in initValuesFileName
  // exists (it is not mandatory to have this file, also in CT scheme)
  else
  initValues =      [InFile create: [self getZone]
//                          withName: initValuesFileName]; // 1.3.1 or 1.4.1
                            setName:  initValuesFileName]; // 2.0.1
   // to load min max matrix

   for (i=0;i<k;i++)
   for (j=0;j<4;j++){
   if(0 != [minmaxSet getFloat: &V]) [minmax R: i C: j setFrom: V];
   else {printf("Lacking data in minmax set file.\n");exit(0);}
                    }

   [minmaxSet drop];

  // if we are using internal input and target values, mainly in the Cross
  // Target (CT) perspective, we do not need to read the files below

  if (patternNumberInVerificationSet > 0) 
  {

   // loading trainingSet

   for (i=0;i<patternNumberInTrainingSet;i++)
   for (j=0;j<k;j++){
   if(0 != [trainingSet getFloat: &V])
        [dataTrainingMatrix R: i C: j setFrom: V
         asExternalValueUsing: minmax withShift: 0 ];
   else {printf("Lacking data in training set file.\n");exit(0);}
                    }

   [trainingSet drop];

   // loading verificationSet

   for (i=0;i<patternNumberInVerificationSet;i++)
   for (j=0;j<k;j++){
   if(0 != [verificationSet getFloat: &V])
        [dataVerificationMatrix R: i C: j setFrom: V
         asExternalValueUsing: minmax withShift: 0 ];
   else {printf("Lacking data in verification set file.\n");exit(0);}
                    }

   [verificationSet drop];

  }

  else  // CT case
  if (initValues != nil)
  {
  for (j=0;j<k;j++){
   if(0 != [initValues getFloat: &V])
   // fill cols
   for (i=0;i<-1*patternNumberInTrainingSet;i++)
        [dataTrainingMatrix R: i C: j setFrom: V
                            asExternalValueUsing: minmax withShift: 0 ];
   else {printf("Lacking data in 'init.val' file.\n");exit(0);}
                    }
   [initValues drop];
   }


// a check in CT case
/*
 for (i=0;i<-1*patternNumberInTrainingSet;i++)
 {for (j=0;j<k;j++ )printf("%7.3f ",[dataTrainingMatrix R:i C:j]);
 printf("\n");}
 exit(0);
*/

   // to create internal matrixes, with input and hidden bias
   // 'wih' means weights from the input  layer to the hidden one
   // 'who' means weights from the hiddel layer to the output one
   // the 'd' in 'dwih' or 'dwho' stays for difference

   // rows=hiddenNodeNumber cols=inputNodeNumber1
   wih=[Matrix createBegin: [self getZone]];
   [wih setDimensionRows: hiddenNodeNumber Cols: inputNodeNumber1
                                           Code: 8];
   wih=[wih createEnd];

   dwih=[Matrix createBegin: [self getZone]];
   [dwih setDimensionRows: hiddenNodeNumber Cols: inputNodeNumber1
                                            Code: 9];
   dwih=[dwih createEnd];

   // rows=outputNodeNumber cols=hiddenNodeNumber1
   who=[Matrix createBegin: [self getZone]];
   [who setDimensionRows: outputNodeNumber Cols: hiddenNodeNumber1
                                           Code: 10];
   who=[who createEnd];

   dwho=[Matrix createBegin: [self getZone]];
   [dwho setDimensionRows: outputNodeNumber Cols: hiddenNodeNumber1
                                            Code: 11];
   dwho=[dwho createEnd];
 
   // Initialization of the neural network weights

   for (i=0; i< hiddenNodeNumber;i++)
   for (j=0; j< inputNodeNumber1;j++)
   [wih R: i C: j setFrom: (float) [uniformDblRand
                                       getDoubleWithMin: -weightRange
                                                withMax:  weightRange]];
   for (i=0; i< outputNodeNumber;i++)
   for (j=0; j< hiddenNodeNumber1;j++)
   [who R: i C: j setFrom: (float) [uniformDblRand
                                       getDoubleWithMin: -weightRange
                                                withMax:  weightRange]];

   // creation of internal vectors
   inputLayer=[Matrix createBegin: [self getZone]];
   [inputLayer setDimension: inputNodeNumber1 Code: 12];
   inputLayer=[inputLayer createEnd];

   hiddenLayer=[Matrix createBegin: [self getZone]];
   [hiddenLayer setDimension: hiddenNodeNumber1 Code: 13];
   hiddenLayer=[hiddenLayer createEnd];

  hiddenLayerTransFuncDerivatives=[Matrix createBegin: [self getZone]];
  [hiddenLayerTransFuncDerivatives setDimension: hiddenNodeNumber1 Code: 131];
  hiddenLayerTransFuncDerivatives=[hiddenLayerTransFuncDerivatives createEnd];

   outputLayer=[Matrix createBegin: [self getZone]];
   [outputLayer setDimension: outputNodeNumber Code: 14];
   outputLayer=[outputLayer createEnd];

  outputLayerTransFuncDerivatives=[Matrix createBegin: [self getZone]];
  [outputLayerTransFuncDerivatives setDimension: outputNodeNumber Code: 141];
  outputLayerTransFuncDerivatives=[outputLayerTransFuncDerivatives createEnd];

   deltaOut=[Matrix createBegin: [self getZone]];
   [deltaOut setDimension: outputNodeNumber Code: 15];
   deltaOut=[deltaOut createEnd];



  return self;

}

  // to use again the weights of a previous run

- readWeightsFromFileOfAgentNumber: (int) n
{
   int i, j;
   float V;
   id <InFile> tempInFile;
   char name [40];

   //wih
   sprintf(name,"agent%d.wih",n);

// tempInFile=[InFile create: [self getZone] withName: name];// 1.3.1 or 1.4.1
   tempInFile=[InFile create: [self getZone] setName:  name];// 2.0.1
    
   for (i=0;i<hiddenNodeNumber;i++)
   for (j=0;j<inputNodeNumber+1;j++)
   if(0 != [tempInFile getFloat: &V]) [wih R:i C:j setFrom: V];
   else {printf("Lacking data in agent%d.wih file.\n",n);exit(0);}

   [tempInFile drop];

   //who
   sprintf(name,"agent%d.who",n);

// tempInFile=[InFile create: [self getZone] withName: name];// 1.3.1 or 1.4.1
   tempInFile=[InFile create: [self getZone] setName:  name];// 2.0.1
    
   for (i=0;i<outputNodeNumber;i++)
   for (j=0;j<hiddenNodeNumber+1;j++)
   if(0 != [tempInFile getFloat: &V]) [who R:i C:j setFrom: V];
   else {printf("Lacking data in agent%d.who file.\n",n);exit(0);}

   [tempInFile drop];

  return self;
}


@end

// [*]
//                
// from a mail of Alex Lancaster, on 4/20/98 (owner-swarm-support@santafe.edu)


// 1. You should use the "...: self]" form of creation *only* when the
// object that you are creating these objects *within* is itself a Swarm
// (or a subclass of a Swarm - in other words when creating objects
// within a ModelSwarm or an ObserverSwarm).

// 2. At all other times you should still use the "...: [self getZone]"
// form.  In other words, when creating objects within an ordinary
// SwarmObject (not a Swarm!) you still need to get the Zone of that
// object.

// The reason for this, is that a Swarm is a special kind of object,
// which is intended to form the basis of segregated storage allocation,
// and in fact is also a "Zone".  An ordinary SwarmObject does not have
// this property.  Since all SwarmObjects (or subclasses thereof) must
// have been created at the top level in a Model or ObserverSwarm - then
// the [self getZone] call returns the Zone (in other words - the Swarm)
// in which that object was created.  In a future parallelised version of
// Swarm - the notion of a Swarm as a storage Zone will come into its
// own.
