/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.conceptMapper.support.tokenizer;

import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.conceptMapper.support.stemmer.Stemmer;
import org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

public class OffsetTokenizer
extends JCasAnnotator_ImplBase {
    private String text;
    private Stemmer stemmer = null;
    public static final String PARAM_CASE_MATCH = "caseMatch";
    public static final String PARAM_STEMMER_CLASS = "Stemmer";
    public static final String PARAM_TOKEN_DELIM = "tokenDelimiters";
    private int offset;
    private int length;
    private String wsDelim = " \t\n\r\f";
    private String extraDelim = ",-/();:";
    private String delim = this.wsDelim + this.extraDelim;
    private boolean caseFoldInitCap = false;
    private boolean caseFoldDigit = false;
    private boolean caseFoldAll = false;
    private boolean stemTokens = false;
    private Pattern capPat = null;
    private Pattern hasDigit = null;

    public OffsetTokenizer() {
        try {
            this.capPat = Pattern.compile("^[A-Z][a-z]+$");
            this.hasDigit = Pattern.compile("[0-9]");
        }
        catch (PatternSyntaxException e) {
            e.printStackTrace();
        }
    }

    public String getText() {
        return this.text;
    }

    public void setText(String text) {
        this.text = text;
        this.offset = 0;
        this.length = this.getText().length();
    }

    public Stemmer getStemmer() {
        return this.stemmer;
    }

    public void setStemmer(Stemmer stemmer) {
        this.stemmer = stemmer;
    }

    public TokenAnnotation newToken(JCas jcas) {
        return new TokenAnnotation(jcas);
    }

    public TokenAnnotation nextToken(JCas jcas) {
        StringBuilder token = new StringBuilder();
        while (this.offset < this.length && this.getDelim().indexOf(this.getText().charAt(this.offset)) >= 0) {
            ++this.offset;
        }
        if (this.offset < this.length) {
            int start = this.offset;
            while (this.offset < this.length && this.getDelim().indexOf(this.getText().charAt(this.offset)) < 0) {
                token.append(this.getText().charAt(this.offset));
                ++this.offset;
            }
            TokenAnnotation returnVal = this.newToken(jcas);
            returnVal.setText(this.stem(this.foldCase(token.toString())));
            returnVal.setBegin(start);
            returnVal.setEnd(this.offset);
            return returnVal;
        }
        return null;
    }

    protected String foldCase(String token) {
        if (this.shouldFoldCase(token)) {
            return OffsetTokenizer.doFoldCase(token);
        }
        return token;
    }

    public static String doFoldCase(String token) {
        return token.trim().toLowerCase();
    }

    public boolean shouldFoldCase(String token) {
        return this.caseFoldAll || this.caseFoldInitCap && this.capPat.matcher(token).matches() || this.caseFoldDigit && this.hasDigit.matcher(token).find();
    }

    public boolean shouldStem() {
        return this.stemTokens;
    }

    protected void setDelim(String delim) {
        this.delim = this.wsDelim + delim;
    }

    protected void overrideDelim(String delim) {
        this.delim = delim;
    }

    private void setStemming(boolean flag) {
        this.stemTokens = flag;
    }

    private void setCaseFoldInitCap(boolean flag) {
        this.caseFoldInitCap = flag;
    }

    private void setCaseFoldDigit(boolean flag) {
        this.caseFoldDigit = flag;
    }

    private void setCaseFoldAll(boolean flag) {
        this.caseFoldAll = flag;
    }

    protected String getDelim() {
        return this.delim;
    }

    protected boolean getStemming() {
        return this.stemTokens;
    }

    protected boolean getCaseFoldInitCap() {
        return this.caseFoldInitCap;
    }

    protected boolean getCaseFoldDigit() {
        return this.caseFoldDigit;
    }

    protected boolean getCaseFoldAll() {
        return this.caseFoldAll;
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            String[] configParameterNames = uimaContext.getConfigParameterNames();
            Object[] configParameters = new Object[configParameterNames.length];
            for (int i = 0; i < configParameters.length; ++i) {
                configParameters[i] = uimaContext.getConfigParameterValue(configParameterNames[i]);
            }
            this.processAllConfigurationParameters(configParameterNames, configParameters);
            this.initTokenizer(configParameterNames, configParameters);
        }
        catch (Exception e) {
            throw new ResourceInitializationException((Throwable)e);
        }
    }

    public void processAllConfigurationParameters(String[] configParameterNames, Object[] configParameters) throws AnnotatorConfigurationException {
        for (int i = 0; i < configParameterNames.length; ++i) {
            this.processConfigurationParameter(configParameterNames[i], configParameters[i]);
        }
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        try {
            this.doTokenization(jcas, jcas.getDocumentText(), this.getDelim());
        }
        catch (Exception e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    public void initTokenizer(String[] paramNames, Object[] paramValues) throws Exception {
    }

    protected void doTokenization(JCas jcas, String documentText, String delimiters) {
        TokenAnnotation annotation;
        int numTokens = 0;
        this.overrideDelim(delimiters);
        this.setText(documentText);
        while (null != (annotation = this.nextToken(jcas))) {
            annotation.addToIndexes();
            ++numTokens;
        }
    }

    public void processConfigurationParameter(String configParameterName, Object configParameterValue) {
        String tokenDelimiters;
        if (configParameterName.equals(PARAM_CASE_MATCH)) {
            String caseSense = (String)configParameterValue;
            if (caseSense != null) {
                if (caseSense.equalsIgnoreCase("insensitive")) {
                    this.setCaseFoldInitCap(true);
                } else if (caseSense.equalsIgnoreCase("digitfold")) {
                    this.setCaseFoldDigit(true);
                } else if (caseSense.equalsIgnoreCase("ignoreall")) {
                    this.setCaseFoldAll(true);
                }
            }
        } else if (configParameterName.equals(PARAM_STEMMER_CLASS) && configParameterValue != null) {
            try {
                Class<?> stemmerClass = Class.forName((String)configParameterValue);
                this.setStemmer((Stemmer)stemmerClass.newInstance());
                this.setStemming(true);
            }
            catch (Exception e) {
                System.err.println("Exception trying to instantiate stemmer class: '" + (String)configParameterValue + "', original exception:" + e.getMessage());
                e.printStackTrace();
            }
        } else if (configParameterName.equals(PARAM_TOKEN_DELIM) && (tokenDelimiters = (String)configParameterValue) != null) {
            this.setDelim(tokenDelimiters);
        }
    }

    protected String stem(String token) {
        if (this.shouldStem()) {
            return OffsetTokenizer.doStemming(token, this.getStemmer());
        }
        return token;
    }

    public static String doStemming(String token, Stemmer stemmer) {
        return stemmer.stem(token.trim());
    }
}

