/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.examples;

import java.io.InputStream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.internal.util.XMLUtils;
import org.apache.uima.resource.ResourceInitializationException;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class XmlDetagger
extends CasAnnotator_ImplBase {
    public static final String PARAM_XMLTAG = "XmlTagContainingText";
    private SAXParserFactory parserFactory = XMLUtils.createSAXParserFactory();
    private Type sourceDocInfoType;
    private String mXmlTagContainingText = null;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.mXmlTagContainingText = (String)this.getContext().getConfigParameterValue(PARAM_XMLTAG);
    }

    public void typeSystemInit(TypeSystem aTypeSystem) throws AnalysisEngineProcessException {
        this.sourceDocInfoType = aTypeSystem.getType("org.apache.uima.examples.SourceDocumentInformation");
    }

    public void process(CAS aCAS) throws AnalysisEngineProcessException {
        CAS xmlCas = aCAS.getView("xmlDocument");
        InputStream xmlStream = xmlCas.getSofa().getSofaDataStream();
        DetagHandler handler = new DetagHandler();
        try {
            SAXParser parser = this.parserFactory.newSAXParser();
            parser.parse(xmlStream, (DefaultHandler)handler);
        }
        catch (Exception e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
        CAS plainTextView = aCAS.createView("plainTextDocument");
        plainTextView.setDocumentText(handler.getDetaggedText());
        plainTextView.setDocumentLanguage(aCAS.getView("_InitialView").getDocumentLanguage());
        FSIterator iter = xmlCas.getAnnotationIndex(this.sourceDocInfoType).iterator();
        if (iter.hasNext()) {
            FeatureStructure sourceDocInfoFs = (FeatureStructure)iter.next();
            plainTextView.getIndexRepository().addFS(sourceDocInfoFs);
        }
    }

    class DetagHandler
    extends DefaultHandler {
        private StringBuffer detaggedText = new StringBuffer();
        private boolean insideTextTag;

        public DetagHandler() {
            this.insideTextTag = XmlDetagger.this.mXmlTagContainingText == null;
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            if (qName.equalsIgnoreCase(XmlDetagger.this.mXmlTagContainingText)) {
                this.insideTextTag = true;
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if (qName.equalsIgnoreCase(XmlDetagger.this.mXmlTagContainingText)) {
                this.insideTextTag = false;
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (this.insideTextTag) {
                this.detaggedText.append(ch, start, length);
            }
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (this.insideTextTag) {
                this.detaggedText.append(ch, start, length);
            }
        }

        String getDetaggedText() {
            return this.detaggedText.toString();
        }
    }
}

