/*
Copyright (C) 2000-2010  Ministere de la culture et de la communication (France), AJLSM
See LICENCE file
 */
package fr.gouv.culture.oai;

import java.net.URL;
import java.util.HashMap;

import org.apache.avalon.framework.parameters.Parameters;
import org.apache.avalon.framework.service.ServiceException;
import org.apache.avalon.framework.service.ServiceManager;
import org.apache.avalon.framework.service.Serviceable;
import org.apache.cocoon.components.source.SourceUtil;
import org.apache.cocoon.xml.XMLConsumer;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

import fr.gouv.culture.oai.util.OAIUtilities;
import fr.gouv.culture.sdx.documentbase.DocumentBase;
import fr.gouv.culture.sdx.exception.SDXException;
import fr.gouv.culture.sdx.exception.SDXExceptionCode;

public abstract class AbstractOAIHarvester extends SynchronizedOAIObjectImpl implements OAIHarvester, Serviceable {

	public static final String OAI_REQUEST_URL = "oaiRequestUrl";
	public static final String OAI_REPOSITORY_URL = "oaiRepositoryUrl";

	/**Service manager for the object*/
	protected ServiceManager manager = null;

	/**List of email address strings for
	 * administrators of this harvester
	 *
	 */
	protected String[] adminEmails = null;

	/**User agent value to send with request*/
	protected String userAgent = "";

	/**Variable to hold the url of the request to be sent*/
	protected String requestUrl = null;
	/**The new URL to resolve the next resumptionToken*/
	protected String newRequestUrl = null;
	/**The parameters for the request sent for which a response is being received*/
	protected Parameters requestParams = null;
	/**buffer for data collection from sax stream*/
	protected StringBuffer sBuff = null;
	/**flag for sax event handling indicating that an element's content should be captured in the endElement method*/
	protected boolean captureElemContent = false;
	/**flag for sax event handling indicating that a record should be capture*/
	protected boolean captureRecord = false;
	/**flag for sax event handling indicating that a record should be delete*/
	protected boolean deleteRecord = false;


	/**Variable to hold the url of the repository from which a response is being received*/
	protected String repoUrl = null;
	/**Variable to hold the _datestamp of the response of the repository from which a response is being received*/
	protected String responseDate = null;
	/**Variable to hold the resumptionToken of the response of the repository from which a response is being received*/
	protected String resumptionToken = null;
	/**Variable to hold cursor information from a request using resumptionTokens to return an entire set by multiple parts*/
	protected int cursor = -1;
	/***/
	protected String errorCode = null;

	/**The oai identifier for the current record from the stream*/
	protected String currentOaiIdentifier = null;
	/**The _datestamp for the current record from the stream*/
	protected String currentDatestamp = null;
	/**The oai status for the current record from the stream*/
	protected String currentOaiStatus = null;


	/**if a identifier name is provided we will attempt
	 * to take the value of the element named as such
	 * outside of the OAI2.0 namespace and retrieve an
	 * underlying XML document assuming the value is
	 * a valid url identifier and will incorporate the
	 * XML content into the oai-record
	 */
	protected String identifierName = "";//Defaulted for nothing

	/**Variable to hold any value retrieved based on
	 * @see #identifierName
	 */
	protected String currentMetadtaUrlIdentifier = null;


	/**The first externally provided xml consumer.
	 * As serialization of the sax stream must be done
	 * the consumer of this object has to be dynamically
	 * changed and therefore we need a reference to the
	 * first externall provided consumer so that we may
	 * continue to supply it with sax events.
	 */
	protected XMLConsumer firstXmlConsumer = null;


	/**The service manager for the object
	 *
	 * @param serviceManager
	 * @throws ServiceException
	 */
	public void service(ServiceManager serviceManager) throws ServiceException {
		this.manager = serviceManager;
	}

	/**Set's the consumer of this object's events
	 * and will attempt to establish our
	 * <code>firstXmlConsumer</code>
	 *
	 * @param consumer
	 */
	public void setConsumer(XMLConsumer consumer) {
		/*if (consumer != null && this.firstXmlConsumer == null) {
			consumer = new EmbeddedXMLPipe(consumer);
			this.firstXmlConsumer = consumer;
		}*/
		this.firstXmlConsumer = consumer;
		super.setConsumer(consumer);
	}


	/**Retrieves the list of
	 * adminstrator email addresses
	 *
	 * @return String[]
	 */
	public String[] getAdminEmails() {
		return adminEmails;
	}

	/**Establishes the list of
	 * adminstrator email addresses
	 */
	public void setAdminEmails(String[] adminEmails) {
		this.adminEmails = adminEmails;
	}

	/**Establishes the identifier class field
	 *
	 * @param name
	 * @see #identifierName
	 */
	public void setIdentifierName(String name) {
		this.identifierName = name;
	}

	/**Currently does nothing
	 *
	 * @param contentHandler
	 * @throws SAXException
	 */
	public void toSAX(ContentHandler contentHandler) 
	throws SAXException {
		String xmlns=OAIObject.Node.Xmlns.OAI_2_0;
		String localname=DocumentBase.ConfigurationNode.OAI_HARVESTER;
		String fullname=OAIObject.Node.Prefix.OAI +":"+localname;
		AttributesImpl att= new AttributesImpl();
		att.addAttribute("","","",Node.Type.CDATA,"");
		contentHandler.startElement(xmlns,localname,fullname,att);
	}


	public void startElement(String s, String s1, String s2, Attributes attributes) 
	throws SAXException {

		/*read the _datestamp from the sax stream and set the class variable
        we have either getRecord response or list records
        we read the events and index the results
        if one specifies a field which contains an identifier, we try to build full documents from the URL's provided
        we have a resumption token
        should send incoming data to a pipeline so that we can index the metadata and try to reach the actual data*/

		if (OAIObject.Node.Xmlns.OAI_2_0.equals(s)) {
			if (OAIObject.Node.Name.RESPONSE_DATE.equals(s1) || OAIObject.Node.Name.RESUMPTION_TOKEN.equals(s1)
					|| OAIObject.Node.Name.IDENTIFIER.equals(s1) || OAIObject.Node.Name.DATESTAMP.equals(s1))
				this.captureElemContent = true;
			else if (OAIObject.Node.Name.RECORD.equals(s1)) {
				this.captureRecord = true;
				prepareRecordCapture();
				super.startDocument();//sending start document for the metadata serializer see prepareRecordCapture()
				super.startPrefixMapping(OAIObject.Node.Prefix.XSI, OAIObject.Node.Xmlns.XSI);
			} else if (OAIObject.Node.Name.REQUEST.equals(s1)) {
				this.captureElemContent = true;
				if (attributes != null) {
					int attsLen = attributes.getLength();
					for (int i = 0; i < attsLen; i++) {
						if (this.requestParams == null)
							this.requestParams = new Parameters();
						String paramName = attributes.getLocalName(i);
						String paramVal = attributes.getValue(i);
						if (OAIUtilities.checkString(paramName) && OAIUtilities.checkString(paramVal))
							this.requestParams.setParameter(paramName, paramVal);
					}
				}
			} else if (OAIObject.Node.Name.HEADER.equals((s1))) {
				if (attributes != null) {
					String status = attributes.getValue(OAIObject.Node.Name.STATUS);
					if (OAIObject.Node.Value.DELETED.equalsIgnoreCase(status)) {
						this.currentOaiStatus = status;//memorize the status of the current record ; we need that for the indexation
						this.deleteRecord = true;//indicate that the current record must be deleted
						this.captureRecord = true;//we do not need to "capture" the record ; we just want to capture its meta-informations
					}
				}
			} else if (OAIObject.Node.Name.ERROR.equals(s1)) {
				this.captureElemContent = true;
				if (attributes != null) {
					this.errorCode = attributes.getValue(OAIObject.Node.Name.CODE);
					if (OAIUtilities.checkString(this.errorCode))
						OAIUtilities.logError(super.logger, this.errorCode, null);
				}

			}
		} else if (OAIUtilities.checkString(this.identifierName)) {
			/*looking for an element name or the VALUE of an attribute with the name "name" matching super.indentifierName
			 *so one could create a metadata identifier element like
			 *<super.identifierName>myIdentifierValue</super.identifierName>
			 *or
			 *<anyElementName name="super.identifierName">myIdentifierValue</anyElementName>
			 */
			if (this.identifierName.equals(s1))
				this.captureElemContent = true;
		}

		super.startElement(s, s1, s2, attributes);


	}

	public void characters(char[] chars, int relation, int relation1) 
			throws SAXException 
	{
		if (this.captureElemContent) {
			if (this.sBuff == null) this.sBuff = new StringBuffer();
			this.sBuff.append(chars, relation, relation1);
		}

		super.characters(chars, relation, relation1);
	}

	public void endElement(String s, String s1, String s2) 
		throws SAXException 
	{

		super.endElement(s, s1, s2);
		
		if ( this.sBuff != null && this.sBuff.length() > 0 ) 
		{
			String content = this.sBuff.toString();

			if (OAIObject.Node.Xmlns.OAI_2_0.equals(s)) {
				if (OAIObject.Node.Name.RESPONSE_DATE.equals(s1))
					this.responseDate = content;
				else if (OAIObject.Node.Name.RESUMPTION_TOKEN.equals(s1))
					this.resumptionToken = content;
				else if (OAIObject.Node.Name.IDENTIFIER.equals(s1))
					this.currentOaiIdentifier = content;
				else if (OAIObject.Node.Name.DATESTAMP.equals(s1))
					this.currentDatestamp = content;
				else if (OAIObject.Node.Name.REQUEST.equals(s1))
					this.repoUrl = content;
				else if (OAIObject.Node.Name.ERROR.equals(s1))
					handleErrors(content);
			}


			if (!OAIObject.Node.Xmlns.OAI_2_0.equals(s)) {
				if (OAIUtilities.checkString(this.identifierName) && this.identifierName.equals(s1)) {
					this.currentMetadtaUrlIdentifier = content;
					try {
						prepareResourceFromUrlIdentifierCapture();
					} catch (Exception e) {
						//if we can't build the document we don't just fail completely we will continue
						OAIUtilities.logException(logger, e);
					}
				}
			}

		}
		if (OAIObject.Node.Xmlns.OAI_2_0.equals(s)) {

			//boolean shoulHarvestCurrentDocument = true;

			if (OAIObject.Node.Name.HEADER.equals(s1)) {

				/*shoulHarvestCurrentDocument = shouldHarvestDocument();//we want to know if it's a good idea to harvest this document vs the docs we have in the document base
            	if (!shoulHarvestCurrentDocument)//we do not need to harvest this doc, see shoulHarvestCurrentDocument() for details
            		abortRecordCapture();*/

				if (this.deleteRecord)//the record must be deleted
					prepareRecordForDeletion();

			}

			if (this.captureRecord) {

				if (OAIObject.Node.Name.RECORD.equals(s1)) {
					try {
						this.captureRecord = false;
						captureResourceFromUrlIdentifier();
						super.endPrefixMapping(OAIObject.Node.Prefix.XSI);
						super.endDocument();//sending end document for the metadata serializer see captureRecord() implementations
						/*if (!shoulHarvestCurrentDocument) abortRecordCapture();
                        else captureRecord();*/
						captureRecord();
					} catch (Exception e) {
						//if we can't build the document we don't just fail completely we will continue
						OAIUtilities.logException(logger, e);
					}
					super.recycle();//nullifying the consumer
					super.setConsumer(firstXmlConsumer);//resetting the consumer to the first external consumer provided if any
					acquireSynchronizedXMLConsumer();
				}
			} 
			else if (OAIObject.Node.Name.OAI_PMH.equals(s1)) {
				
				/* Fin de la moisson du lot courant
				 * 1- On indexe les notices recoltees
				 * 2- Si on n'a pas de resumption token : on enregistre les metadonnees de la moisson
				 * 2bis- Si on a un resumption token : on continue la moisson du lot suivant
				 */
				
				try {
					
					//before saving critical fields, we have to check if the harvest is finish, if there's not resumptionToken
					if (this.resumptionToken == null) {
						boolean storedData = storeHarvestedData();
						if(!OAIUtilities.checkString(this.errorCode))
							saveCriticalFields(storedData);//we only do this if we were able to index some documents
						else {
							// la moisson s'est arretee sur une erreur, on ne stocke pas les informations de cette moisson pour la reprendre comme si tout c'etait correctement passe
						}
					}

					else handleResumptionToken();//if we have a resumptionToken we continue to harvest

				} catch (Exception e) {
					OAIUtilities.logException(logger, e);
					throw new SAXException(e.getMessage(), e);
				} finally {
					resetAllFields();
				}
			}

		}

		//resetting the string buffer
		this.sBuff = null;
		//resetting the class field
		this.captureElemContent = false;

	}

	/**Stops any record capture currently
	 * being executed and sends a flag to
	 * the called method telling it to
	 * delete any document saved to any media
	 */
	protected void abortRecordCapture() {
		resetRecordCaptureFields(true);//so we can cancel the harvest of the currrent record if necessary
	}

	/**Logs error messages, and the request parameters
	 * for that were sent to the repository which
	 * may have caused the error state
	 *
	 * @param errorMsg
	 */
	protected void handleErrors(String errorMsg) {
		StringBuffer msg = new StringBuffer();
		msg.append("An error occured during harvesting, please see below:\n\tRepository: "+this.repoUrl+"\n\tRequest: "+this.requestUrl);
		if (this.requestParams != null) {
			String[] paramNames = this.requestParams.getNames();
			for (int i = 0; i < paramNames.length; i++) {
				String paramName = paramNames[i];
				String paramVal = "";
				if (OAIUtilities.checkString(paramName))
					paramVal = this.requestParams.getParameter(paramName, "");
				msg.append("\n\tParameter: "+paramName+" ; value:"+paramVal);

			}

		}
		msg.append("\n\t"+errorMsg);
		OAIUtilities.logError(logger, msg.toString(), null);
	}

	/**Prepares resources for capturing an oai record
	 *
	 * @throws SAXException
	 */
	protected abstract void prepareRecordCapture() throws SAXException;

	/**Querys underlying data structures do
	 * determine whether the current oai record
	 * should be harvested based on the state of the
	 * harvester (ie. past harvests, presence or lack or record
	 * in harvester data structures)
	 *
	 * @return boolean
	 */
	protected abstract boolean shouldHarvestDocument();

	/**When a complete record is received, this method
	 * takes the necessary steps to save the record
	 * to any underlying media, or pre-media
	 *
	 * @throws Exception
	 */
	protected abstract void captureRecord() throws Exception;

	/**After receiving a header@status="deleted" for a record,
	 * this method makes the necessary preparations to delete
	 * the record from the harvester
	 *
	 */
	protected abstract void prepareRecordForDeletion();


	/**Prepares resources for capturing the underlying document
	 * available via a url described by the oai record
	 *
	 * @see  #currentMetadtaUrlIdentifier
	 * @see #identifierName
	 */
	protected abstract void prepareResourceFromUrlIdentifierCapture();

	/**When a complete "underlying document" is received, this method
	 * takes the necessary steps to save the document
	 * to any underlying media, or pre-media
	 *
	 * @see  #currentMetadtaUrlIdentifier
	 * @see #identifierName
	 */
	protected abstract void captureResourceFromUrlIdentifier();

	/**This method saves all harvested records to a particular media*/
	protected abstract boolean storeHarvestedData() throws Exception;

	/**This method stores information about a failed
	 * (internal failure not external error from OAI repository)
	 * harvest request, so that the valid request may be reexecuted
	 * by the proper mechanism.
	 */
	protected abstract void storeFailedHarvestData(Exception e);

	/**This method handles and reissues a new request using any resumption token received*/
	protected abstract void handleResumptionToken();

	/**If data has been harvested, this method
	 * saves the any/all details of the harvest
	 *
	 * @param dataHarvested boolean indicating data was harvested
	 * @throws SAXException
	 */
	protected abstract void saveCriticalFields(boolean dataHarvested) throws SAXException;


	/**Stops any record capture currently
	 * being executed, resets the corresponding
	 * class fields and potentially
	 * deletes any document saved to any media
	 * @param deleteDoc
	 */
	protected abstract void resetRecordCaptureFields(boolean deleteDoc);

	/**Resets the necessary class fields
	 *
	 */
	protected void resetAllFields() {
		this.resetResumptionToken();
		this.identifierName = null;
		this.requestParams = null;
		this.repoUrl = this.currentMetadtaUrlIdentifier = this.currentOaiIdentifier = null;
		this.sBuff = null;
		this.captureRecord = this.captureElemContent = false;
		this.errorCode = null;
	}

	/**Clears any consumers provided to this object*/
	public void recycle() {
		this.firstXmlConsumer = null;
		super.recycle();
	}

	protected void resetResumptionToken() {
		this.resumptionToken = null;
	}

	/**Receive an OAI request*/
	public synchronized void receiveSynchronizedRequest(String url) {
		receiveSynchronizedRequest(url, url);
	}

	/**Receive an OAI request as an URL. The original request may have been changed if we are updating.
	 * In that case, we only want to get the last documents
	 * @param url : the url wich represent the request
	 * @param originalRequestUrl : the original request*/
	public synchronized void receiveSynchronizedRequest(String url, String originalRequestUrl) {
		if (OAIUtilities.checkString(url)) {
			requestUrl = originalRequestUrl;
			String currentUrl = url;
			String oldRequestUrl = null;
			while ( currentUrl != null && !currentUrl.equals(oldRequestUrl) ) {
				try {
					this.acquire();
					this.receiveRequest(currentUrl);
				} catch (Exception e) {
					OAIUtilities.logError(logger, currentUrl, e);
					newRequestUrl = null;
				} finally{
					this.release();
				}
				oldRequestUrl = currentUrl;
				currentUrl = newRequestUrl;
			}
			currentUrl = oldRequestUrl = newRequestUrl = requestUrl = url = originalRequestUrl = null;
		}
	}

	/**Internal receive request method that by passes synchronization of this object
	 * as it may have already been synchronized elsewhere in the processing. It is
	 * useful when handling resumption tokens
	 *
	 * @param url
	 */
	public synchronized void receiveRequest(String url) {
		if (OAIUtilities.checkString(url)) {
			String[] args = {url};
			try {
				URL rUrl = new URL(url);
				fr.gouv.culture.util.apache.avalon.excalibur.source.impl.URLSource source = new fr.gouv.culture.util.apache.avalon.excalibur.source.impl.URLSource();
				Parameters par = new Parameters();// de quoi passer des parametres de timeout (connect et read). Visiblement, ca ne fonctionne pas car le moissonneur envoie une seconde requete lorsque la premiere ne renvoie pas de reponse dans les 3 minutes !
				par.setParameter("connect-timeout", "54000000");//connect timeout sur 15 min
				par.setParameter("read-timeout", "54000000");//read timeout sur 15 min
				source.parameterize(par);
				par = null;
				HashMap sp = new HashMap();
				sp.put(HTTP_HEADER_NAME_USER_AGENT, this.userAgent);
				for (int i = 0; i < adminEmails.length; i++) {
					String adminEmail = adminEmails[i];
					sp.put(HTTP_HEADER_NAME_FROM, adminEmail);//using adminEmail in following harvester implementation guidlines
				}
				source.init(rUrl,sp);
				sp = null;
				OAIUtilities.logInfo(logger, SDXExceptionCode.OAI_HARVESTER_RECEPTION_BEGIN,args);
				SourceUtil.toSAX(this.manager, source, "xml",/*handler*/ this);//consume the sax events
			} catch (Exception e) {
				OAIUtilities.logError(logger, url, e);
			} finally{
				OAIUtilities.logInfo(logger, SDXExceptionCode.OAI_HARVESTER_RECEPTION_END,args);
			}
		}

	}

	/**This method returns the parameters for the request
	 * sent by this harvester as well as the "repository url",
	 * "request url", and the "harvester admin email".
	 *
	 * This info will be useful when harvested records are processed.
	 *
	 * @return Parameters
	 */
	protected Parameters getHarvestParameters() {
		Parameters ret = new Parameters();
		if (this.requestParams != null)
			ret.merge(this.requestParams);
		ret.setParameter(OAI_REPOSITORY_URL, this.repoUrl);
		ret.setParameter(OAI_REQUEST_URL, this.requestUrl);
		for (int i = 0; i < adminEmails.length; i++) {
			String adminEmail = adminEmails[i];
			ret.setParameter(OAIObject.Node.Name.ADMIN_EMAIL + "_" + Integer.toString(i), adminEmail);
		}
		return ret;
	}


	/**Close OAI harvester.
	 *
	 */
	public void close(){
		// nothing to do here
	}

}
