package de.tudarmstadt.ukp.jwktl.parser;

import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryPage;
import de.tudarmstadt.ukp.jwktl.api.util.ILanguage;
import de.tudarmstadt.ukp.jwktl.parser.util.IBlockHandler;
import de.tudarmstadt.ukp.jwktl.parser.util.ParsingContext;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Logger;
import java.util.regex.Pattern;

/* loaded from: input_file:de/tudarmstadt/ukp/jwktl/parser/WiktionaryEntryParser.class */
public abstract class WiktionaryEntryParser implements IWiktionaryEntryParser {
    private static Logger logger = Logger.getLogger(WiktionaryEntryParser.class.getName());
    protected static final Pattern COMMENT_PATTERN = Pattern.compile("\\<!--((?!--\\>)[^��])*?--\\>");
    protected static final Pattern IMAGE_PATTERN = Pattern.compile("\\[\\[Image:([^\\]]+?)\\|[^\\]]+?\\]\\]");
    protected static final Pattern REFERENCES_PATTERN = Pattern.compile("<ref[^>]*>.+?</ref>");
    protected ILanguage language;
    protected String redirectTemplate;
    protected List<IBlockHandler> handlers = new LinkedList();
    protected long entryId = 0;

    /* loaded from: input_file:de/tudarmstadt/ukp/jwktl/parser/WiktionaryEntryParser$ParseStatus.class */
    private enum ParseStatus {
        IN_BODY,
        IN_HEAD
    }

    public WiktionaryEntryParser(ILanguage iLanguage, String str) {
        this.language = iLanguage;
        this.redirectTemplate = "#" + str + " [[";
    }

    @Override // de.tudarmstadt.ukp.jwktl.parser.IWiktionaryEntryParser
    public void parse(WiktionaryPage wiktionaryPage, String str) {
        if (checkForRedirect(wiktionaryPage, str)) {
            return;
        }
        BufferedReader bufferedReader = new BufferedReader(new StringReader(REFERENCES_PATTERN.matcher(IMAGE_PATTERN.matcher(COMMENT_PATTERN.matcher(str).replaceAll("")).replaceAll("")).replaceAll("")));
        try {
            ParsingContext createParsingContext = createParsingContext(wiktionaryPage);
            String readLine = bufferedReader.readLine();
            IBlockHandler iBlockHandler = null;
            IBlockHandler iBlockHandler2 = null;
            ParseStatus parseStatus = ParseStatus.IN_HEAD;
            boolean z = false;
            boolean z2 = readLine == null;
            while (!z2) {
                readLine = readLine.trim();
                String str2 = readLine + "\n";
                if (parseStatus == ParseStatus.IN_HEAD) {
                    if (isStartOfBlock(readLine)) {
                        iBlockHandler = selectHandler(readLine);
                        logger.fine("preprocessing " + readLine + " worker is " + iBlockHandler);
                    }
                    if (iBlockHandler != null && iBlockHandler.processHead(str2, createParsingContext)) {
                        logger.fine("processing " + readLine);
                        parseStatus = ParseStatus.IN_BODY;
                        iBlockHandler2 = iBlockHandler;
                    }
                    readLine = bufferedReader.readLine();
                } else if (parseStatus == ParseStatus.IN_BODY) {
                    if (!z) {
                        z = !iBlockHandler.processBody(str2, createParsingContext);
                        if (!z) {
                            readLine = bufferedReader.readLine();
                        } else if (isStartOfBlock(readLine)) {
                            iBlockHandler.fillContent(createParsingContext);
                            iBlockHandler2 = null;
                            iBlockHandler = null;
                            parseStatus = ParseStatus.IN_HEAD;
                            z = false;
                        } else {
                            readLine = bufferedReader.readLine();
                        }
                    } else if (isStartOfBlock(readLine)) {
                        iBlockHandler.fillContent(createParsingContext);
                        iBlockHandler2 = null;
                        iBlockHandler = null;
                        parseStatus = ParseStatus.IN_HEAD;
                        z = false;
                    } else {
                        iBlockHandler.processBody(str2, createParsingContext);
                        readLine = bufferedReader.readLine();
                    }
                }
                if (readLine == null) {
                    if (iBlockHandler2 != null) {
                        iBlockHandler2.fillContent(createParsingContext);
                    }
                    z2 = true;
                }
            }
        } catch (IOException e) {
            throw new RuntimeException("Error while parsing text of article " + wiktionaryPage.getTitle(), e);
        }
    }

    protected abstract ParsingContext createParsingContext(WiktionaryPage wiktionaryPage);

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean checkForRedirect(WiktionaryPage wiktionaryPage, String str) {
        if (!str.endsWith("]]") || !str.startsWith(this.redirectTemplate)) {
            return false;
        }
        wiktionaryPage.setRedirectTarget(str.substring(this.redirectTemplate.length(), str.length() - 2));
        return true;
    }

    protected abstract boolean isStartOfBlock(String str);

    protected IBlockHandler selectHandler(String str) {
        for (IBlockHandler iBlockHandler : this.handlers) {
            if (iBlockHandler.canHandle(str)) {
                return iBlockHandler;
            }
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void register(IBlockHandler iBlockHandler) {
        this.handlers.add(iBlockHandler);
    }

    public ILanguage getLanguage() {
        return this.language;
    }
}
