package edu.stanford.nlp.international.arabic.process;

import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.util.StringUtils;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Properties;

/* loaded from: input_file:edu/stanford/nlp/international/arabic/process/ArabicTokenizer.class */
public class ArabicTokenizer<T extends HasWord> extends AbstractTokenizer<T> {
    private final ArabicLexer lexer;
    private static final String atbOptions = "normArDigits=true,normArPunc=true,normAlif=true,removeDiacritics=true,removeTatweel=true,removeQuranChars=true";

    /* loaded from: input_file:edu/stanford/nlp/international/arabic/process/ArabicTokenizer$ArabicTokenizerFactory.class */
    public static class ArabicTokenizerFactory<T extends HasWord> implements TokenizerFactory<T> {
        protected final LexedTokenFactory<T> factory;
        protected Properties lexerProperties = null;

        public static TokenizerFactory<CoreLabel> newTokenizerFactory() {
            return new ArabicTokenizerFactory(new CoreLabelTokenFactory());
        }

        private ArabicTokenizerFactory(LexedTokenFactory<T> lexedTokenFactory) {
            this.factory = lexedTokenFactory;
        }

        @Override // edu.stanford.nlp.objectbank.IteratorFromReaderFactory
        public Iterator<T> getIterator(Reader reader) {
            return getTokenizer(reader);
        }

        @Override // edu.stanford.nlp.objectbank.TokenizerFactory
        public Tokenizer<T> getTokenizer(Reader reader) {
            return new ArabicTokenizer(reader, this.factory, this.lexerProperties);
        }

        @Override // edu.stanford.nlp.objectbank.TokenizerFactory
        public void setOptions(String str) {
            if (this.lexerProperties == null) {
                this.lexerProperties = StringUtils.stringToProperties(str);
                return;
            }
            Properties stringToProperties = StringUtils.stringToProperties(str);
            Enumeration<?> propertyNames = stringToProperties.propertyNames();
            while (propertyNames.hasMoreElements()) {
                String str2 = (String) propertyNames.nextElement();
                this.lexerProperties.put(str2, stringToProperties.getProperty(str2));
            }
        }

        @Override // edu.stanford.nlp.objectbank.TokenizerFactory
        public Tokenizer<T> getTokenizer(Reader reader, String str) {
            setOptions(str);
            return getTokenizer(reader);
        }
    }

    public static ArabicTokenizer<CoreLabel> newArabicTokenizer(Reader reader, Properties properties) {
        return new ArabicTokenizer<>(reader, new CoreLabelTokenFactory(), properties);
    }

    public ArabicTokenizer(Reader reader, LexedTokenFactory<T> lexedTokenFactory, Properties properties) {
        this.lexer = new ArabicLexer(reader, lexedTokenFactory, properties);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // edu.stanford.nlp.process.AbstractTokenizer
    public T getNext() {
        T t;
        do {
            try {
                t = (T) this.lexer.next();
                if (t == null) {
                    break;
                }
            } catch (IOException e) {
                throw new RuntimeIOException(e);
            }
        } while (t.word().length() == 0);
        return t;
    }

    public static TokenizerFactory<CoreLabel> factory() {
        return ArabicTokenizerFactory.newTokenizerFactory();
    }

    public static TokenizerFactory<CoreLabel> atbFactory() {
        TokenizerFactory<CoreLabel> newTokenizerFactory = ArabicTokenizerFactory.newTokenizerFactory();
        newTokenizerFactory.setOptions(atbOptions);
        return newTokenizerFactory;
    }

    public static void main(String[] strArr) {
        TokenizerFactory<CoreLabel> factory;
        if (strArr.length != 1) {
            System.err.printf("Usage: java %s [-atb|tokenizer_opts] < lines%n", ArabicTokenizer.class.getName());
            System.exit(-1);
        }
        String str = strArr[0];
        try {
            if (strArr[0].equals("-atb")) {
                factory = atbFactory();
            } else {
                factory = factory();
                factory.setOptions(str);
            }
            String property = System.getProperty("line.separator");
            Tokenizer<CoreLabel> tokenizer = factory.getTokenizer(new InputStreamReader(System.in, "UTF-8"));
            while (tokenizer.hasNext()) {
                String word = tokenizer.next().word();
                System.out.print(word);
                if (!word.equals(property)) {
                    System.out.print(" ");
                }
            }
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
    }
}
