package de.tudarmstadt.ukp.jwktl.parser;

import de.tudarmstadt.ukp.jwktl.api.WiktionaryException;
import de.tudarmstadt.ukp.jwktl.api.util.ILanguage;
import de.tudarmstadt.ukp.jwktl.api.util.Language;
import de.tudarmstadt.ukp.jwktl.parser.XMLDumpParser;
import de.tudarmstadt.ukp.jwktl.parser.util.DumpInfo;
import de.tudarmstadt.ukp.jwktl.parser.util.IDumpInfo;
import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.TimeZone;
import java.util.logging.Logger;

/* loaded from: input_file:de/tudarmstadt/ukp/jwktl/parser/WiktionaryDumpParser.class */
public class WiktionaryDumpParser extends XMLDumpParser implements IWiktionaryMultistreamDumpParser {
    private static final Logger logger = Logger.getLogger(WiktionaryDumpParser.class.getName());
    protected List<IWiktionaryPageParser> parserRegistry;
    protected boolean inPage;
    protected DumpInfo dumpInfo;
    protected DateFormat timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US);

    public WiktionaryDumpParser(IWiktionaryPageParser... iWiktionaryPageParserArr) {
        this.timestampFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
        this.parserRegistry = new LinkedList();
        for (IWiktionaryPageParser iWiktionaryPageParser : iWiktionaryPageParserArr) {
            register(iWiktionaryPageParser);
        }
    }

    @Override // de.tudarmstadt.ukp.jwktl.parser.IWiktionaryDumpParser
    public void register(IWiktionaryPageParser iWiktionaryPageParser) {
        this.parserRegistry.add(iWiktionaryPageParser);
    }

    @Override // de.tudarmstadt.ukp.jwktl.parser.IWiktionaryDumpParser
    public Iterable<IWiktionaryPageParser> getPageParsers() {
        return this.parserRegistry;
    }

    @Override // de.tudarmstadt.ukp.jwktl.parser.XMLDumpParser, de.tudarmstadt.ukp.jwktl.parser.IWiktionaryDumpParser
    public void parse(File file) throws WiktionaryException {
        this.dumpInfo = new DumpInfo(file, this);
        super.parse(file);
        onClose();
    }

    @Override // de.tudarmstadt.ukp.jwktl.parser.IWiktionaryMultistreamDumpParser
    public void parseMultistream(File file, File file2, MultistreamFilter multistreamFilter) throws WiktionaryException {
        this.dumpInfo = new DumpInfo(file, this);
        try {
            new MultistreamXMLDumpParser(this).parseMultistream(file, file2, multistreamFilter);
            onClose();
        } catch (IOException e) {
            throw new WiktionaryException(e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // de.tudarmstadt.ukp.jwktl.parser.XMLDumpParser
    public void onParserStart() {
        super.onParserStart();
        this.inPage = false;
        this.dumpInfo.reset();
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().onParserStart(this.dumpInfo);
        }
    }

    protected void onSiteInfoComplete() {
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().onSiteInfoComplete(this.dumpInfo);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // de.tudarmstadt.ukp.jwktl.parser.XMLDumpParser
    public void onParserEnd() {
        super.onParserEnd();
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().onParserEnd(this.dumpInfo);
        }
    }

    protected void onClose() {
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().onClose(this.dumpInfo);
        }
    }

    @Override // de.tudarmstadt.ukp.jwktl.parser.XMLDumpParser
    protected void onElementStart(String str, XMLDumpParser.XMLDumpHandler xMLDumpHandler) {
        if ("page".equals(str)) {
            this.inPage = true;
            onPageStart();
        }
    }

    @Override // de.tudarmstadt.ukp.jwktl.parser.XMLDumpParser
    protected void onElementEnd(String str, XMLDumpParser.XMLDumpHandler xMLDumpHandler) {
        if ("base".equals(str)) {
            setBaseURL(xMLDumpHandler.getContents());
        } else if ("namespace".equals(str) && xMLDumpHandler.hasContents()) {
            addNamespace(xMLDumpHandler.getContents());
        } else if ("siteinfo".equals(str)) {
            onSiteInfoComplete();
        } else if ("page".equals(str)) {
            this.inPage = false;
            onPageEnd();
        }
        if (this.inPage) {
            if ("page".equals(xMLDumpHandler.getParent())) {
                if ("id".equals(str)) {
                    setPageId(Long.parseLong(xMLDumpHandler.getContents()));
                    return;
                } else {
                    if ("title".equals(str)) {
                        setTitle(xMLDumpHandler.getContents());
                        return;
                    }
                    return;
                }
            }
            if (!"revision".equals(xMLDumpHandler.getParent())) {
                if ("contributor".equals(xMLDumpHandler.getParent()) && "username".equals(str)) {
                    setAuthor(xMLDumpHandler.getContents());
                    return;
                }
                return;
            }
            if ("id".equals(str)) {
                setRevision(Integer.parseInt(xMLDumpHandler.getContents()));
                return;
            }
            if ("timestamp".equals(str)) {
                try {
                    setTimestamp(parseTimestamp(xMLDumpHandler.getContents()));
                } catch (ParseException e) {
                    setTimestamp(null);
                }
            } else if ("text".equals(str)) {
                setText(xMLDumpHandler.getContents());
            }
        }
    }

    protected void onPageStart() {
        this.parserRegistry.forEach((v0) -> {
            v0.onPageStart();
        });
    }

    protected void onPageEnd() {
        this.parserRegistry.forEach((v0) -> {
            v0.onPageEnd();
        });
        this.dumpInfo.incrementProcessedPages();
        if (this.dumpInfo.getProcessedPages() % 25000 == 0) {
            logger.info("Parsed " + this.dumpInfo.getProcessedPages() + " pages");
        }
    }

    protected void setBaseURL(String str) {
        this.dumpInfo.setDumpLanguage(resolveLanguage(str));
    }

    protected static ILanguage resolveLanguage(String str) {
        int indexOf = str.indexOf("://");
        return Language.findByCode(str.substring(indexOf + 3, indexOf + 5));
    }

    protected void addNamespace(String str) {
        this.dumpInfo.addNamespace(str);
    }

    protected void setAuthor(String str) {
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().setAuthor(str);
        }
    }

    protected void setRevision(long j) {
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().setRevision(j);
        }
    }

    protected void setTimestamp(Date date) {
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().setTimestamp(date);
        }
    }

    protected void setPageId(long j) {
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().setPageId(j);
        }
    }

    protected void setTitle(String str) {
        String str2 = null;
        int indexOf = str.indexOf(58);
        if (indexOf >= 0) {
            str2 = str.substring(0, indexOf);
            if (this.dumpInfo.hasNamespace(str2)) {
                str = str.substring(indexOf + 1);
            } else {
                str2 = null;
            }
        }
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().setTitle(str, str2);
        }
    }

    protected void setText(String str) {
        Iterator<IWiktionaryPageParser> it = this.parserRegistry.iterator();
        while (it.hasNext()) {
            it.next().setText(str);
        }
    }

    protected Date parseTimestamp(String str) throws ParseException {
        return this.timestampFormat.parse(str);
    }

    public IDumpInfo getDumpInfo() {
        return this.dumpInfo;
    }
}
