package edu.stanford.nlp.international.arabic;

import edu.stanford.nlp.international.arabic.pipeline.DefaultLexicalMapper;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.util.Function;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/international/arabic/IBMArabicEscaper.class */
public class IBMArabicEscaper implements Function<List<HasWord>, List<HasWord>> {
    private static final Pattern pEnt = Pattern.compile("\\$[a-z]+_\\((.*?)\\)");
    private boolean warnedEntityEscaping;
    private boolean warnedProcliticEnclitic;
    private final DefaultLexicalMapper lexMapper;
    private final boolean annotationsAndClassingOnly;

    public IBMArabicEscaper() {
        this(false);
    }

    public IBMArabicEscaper(boolean z) {
        this.warnedEntityEscaping = false;
        this.warnedProcliticEnclitic = false;
        this.annotationsAndClassingOnly = z;
        this.lexMapper = new DefaultLexicalMapper();
    }

    public void disableWarnings() {
        this.warnedEntityEscaping = true;
        this.warnedProcliticEnclitic = true;
    }

    private String escapeString(String str) {
        String stripAnnotationsAndClassing = stripAnnotationsAndClassing(str);
        String escape = ATBTreeUtils.escape(stripAnnotationsAndClassing);
        if (escape.length() == 0) {
            return stripAnnotationsAndClassing;
        }
        if (!stripAnnotationsAndClassing.equals(escape)) {
            return escape;
        }
        String map = this.lexMapper.map(null, escape);
        return map.length() == 0 ? escape : map;
    }

    private String stripAnnotationsAndClassing(String str) {
        String str2 = str;
        int length = str2.length();
        if (length > 1) {
            Matcher matcher = pEnt.matcher(str2);
            if (matcher.matches()) {
                if (!this.warnedEntityEscaping) {
                    System.err.printf("%s: Removing IBM MT-style classing: %s --> %s\n", getClass().getName(), matcher.group(0), matcher.group(1));
                    this.warnedEntityEscaping = true;
                }
                str2 = matcher.replaceAll("$1");
            } else if (str2.charAt(0) == '+') {
                if (!this.warnedProcliticEnclitic) {
                    this.warnedProcliticEnclitic = true;
                    System.err.printf("%s: Removing IBM MT-style proclitic/enclitic indicators\n", getClass().getName());
                }
                str2 = str2.substring(1);
            } else if (str2.charAt(length - 1) == '#') {
                if (!this.warnedProcliticEnclitic) {
                    this.warnedProcliticEnclitic = true;
                    System.err.printf("%s: Removing IBM MT-style proclitic/enclitic indicators\n", getClass().getName());
                }
                str2 = str2.substring(0, length - 1);
            }
        }
        return str2.length() == 0 ? str : str2;
    }

    @Override // edu.stanford.nlp.util.Function
    public List<HasWord> apply(List<HasWord> list) {
        ArrayList<HasWord> arrayList = new ArrayList(list);
        for (HasWord hasWord : arrayList) {
            hasWord.setWord(apply(hasWord.word()));
        }
        return arrayList;
    }

    public String apply(String str) {
        String stripAnnotationsAndClassing = this.annotationsAndClassingOnly ? stripAnnotationsAndClassing(str) : escapeString(str);
        if (stripAnnotationsAndClassing.equals("")) {
            throw new RuntimeException(String.format("Word (%s) mapped to null", str));
        }
        return stripAnnotationsAndClassing.intern();
    }

    public static void main(String[] strArr) throws IOException {
        IBMArabicEscaper iBMArabicEscaper = new IBMArabicEscaper();
        boolean z = false;
        for (String str : strArr) {
            if ("-f".equals(str)) {
                z = true;
            } else {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
                PrintWriter printWriter = z ? new PrintWriter(new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8"))) : new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str + ".sent"), "UTF-8")));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String[] split = readLine.split("\\s+");
                    for (int i = 0; i < split.length; i++) {
                        printWriter.print(iBMArabicEscaper.escapeString(split[i]));
                        if (i != split.length - 1) {
                            printWriter.print(" ");
                        }
                    }
                    printWriter.println();
                }
                bufferedReader.close();
                printWriter.close();
            }
        }
    }
}
