package com.google.refine.importers;

import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.io.CharStreams;
import com.google.refine.ProjectMetadata;
import com.google.refine.browsing.facets.ScatterplotFacet;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconStats;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.util.JSONUtilities;
import de.fau.cs.osr.ptk.common.AstVisitor;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.sweble.wikitext.parser.WikitextEncodingValidator;
import org.sweble.wikitext.parser.WikitextParser;
import org.sweble.wikitext.parser.WikitextPreprocessor;
import org.sweble.wikitext.parser.nodes.WtBody;
import org.sweble.wikitext.parser.nodes.WtBold;
import org.sweble.wikitext.parser.nodes.WtExternalLink;
import org.sweble.wikitext.parser.nodes.WtImageLink;
import org.sweble.wikitext.parser.nodes.WtInternalLink;
import org.sweble.wikitext.parser.nodes.WtItalics;
import org.sweble.wikitext.parser.nodes.WtLinkTitle;
import org.sweble.wikitext.parser.nodes.WtName;
import org.sweble.wikitext.parser.nodes.WtNewline;
import org.sweble.wikitext.parser.nodes.WtNode;
import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage;
import org.sweble.wikitext.parser.nodes.WtSection;
import org.sweble.wikitext.parser.nodes.WtTable;
import org.sweble.wikitext.parser.nodes.WtTableCaption;
import org.sweble.wikitext.parser.nodes.WtTableCell;
import org.sweble.wikitext.parser.nodes.WtTableHeader;
import org.sweble.wikitext.parser.nodes.WtTableRow;
import org.sweble.wikitext.parser.nodes.WtTagExtension;
import org.sweble.wikitext.parser.nodes.WtTagExtensionBody;
import org.sweble.wikitext.parser.nodes.WtTemplate;
import org.sweble.wikitext.parser.nodes.WtTemplateArgument;
import org.sweble.wikitext.parser.nodes.WtTemplateArguments;
import org.sweble.wikitext.parser.nodes.WtText;
import org.sweble.wikitext.parser.nodes.WtUrl;
import org.sweble.wikitext.parser.nodes.WtValue;
import org.sweble.wikitext.parser.nodes.WtXmlAttribute;
import org.sweble.wikitext.parser.nodes.WtXmlAttributes;
import org.sweble.wikitext.parser.nodes.WtXmlEmptyTag;
import org.sweble.wikitext.parser.nodes.WtXmlEndTag;
import org.sweble.wikitext.parser.nodes.WtXmlStartTag;
import org.sweble.wikitext.parser.parser.PreprocessorToParserTransformer;
import org.sweble.wikitext.parser.utils.SimpleParserConfig;
import xtc.parser.ParseException;

/* loaded from: input_file:com/google/refine/importers/WikitextImporter.class */
public class WikitextImporter extends TabularImportingParserBase {

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/google/refine/importers/WikitextImporter$SpanningCell.class */
    public class SpanningCell {
        public String value;
        public String reconciled;
        public String reference;
        public int colspan;
        public int rowspan;
        public int row;
        public int col;

        SpanningCell(String str, String str2, String str3, int i, int i2, int i3, int i4) {
            this.value = str;
            this.reconciled = str2;
            this.reference = str3;
            this.row = i;
            this.col = i2;
            this.rowspan = i3;
            this.colspan = i4;
        }
    }

    /* loaded from: input_file:com/google/refine/importers/WikitextImporter$WikiTableDataReader.class */
    public class WikiTableDataReader implements TabularImportingParserBase.TableDataReader {
        private int currentRow;
        private WikitextTableVisitor visitor;
        private List<List<Recon>> reconList;
        private List<Boolean> columnReconciled = null;
        private List<Boolean> columnReferenced;

        public WikiTableDataReader(WikitextTableVisitor wikitextTableVisitor, boolean z) {
            this.currentRow = 0;
            this.visitor = null;
            this.reconList = null;
            this.columnReferenced = null;
            this.visitor = wikitextTableVisitor;
            this.currentRow = 0;
            this.reconList = null;
            if (z) {
                this.columnReferenced = new ArrayList();
                for (List<String> list : this.visitor.references) {
                    for (int i = 0; i != list.size(); i++) {
                        while (i >= this.columnReferenced.size()) {
                            this.columnReferenced.add(false);
                        }
                        if (list.get(i) != null) {
                            this.columnReferenced.set(i, true);
                        }
                    }
                }
            }
        }

        @Override // com.google.refine.importers.TabularImportingParserBase.TableDataReader
        public List<Object> getNextRowOfCells() throws IOException {
            ArrayList arrayList = null;
            List<String> list = null;
            List<String> list2 = null;
            if (this.currentRow < this.visitor.rows.size()) {
                list = this.visitor.rows.get(this.currentRow);
                list2 = this.visitor.references.get(this.currentRow);
            }
            if (list != null) {
                arrayList = new ArrayList();
                for (int i = 0; i < list.size(); i++) {
                    Recon recon = null;
                    if (this.currentRow >= 0 && this.reconList != null) {
                        recon = this.reconList.get(this.currentRow).get(i);
                    }
                    String str = list.get(i);
                    if (str != null) {
                        arrayList.add(new Cell(str, recon));
                    } else {
                        arrayList.add(null);
                    }
                    if (this.columnReferenced != null && i < this.columnReferenced.size() && this.columnReferenced.get(i).booleanValue()) {
                        String str2 = this.currentRow == -1 ? list.get(i) + "_ref" : list2.get(i);
                        if (str2 != null) {
                            arrayList.add(new Cell(str2, null));
                        } else {
                            arrayList.add(null);
                        }
                    }
                }
            }
            this.currentRow++;
            return arrayList;
        }

        private void reconcileToQids(String str, StandardReconConfig standardReconConfig) {
            if ("null".equals(str)) {
                return;
            }
            this.reconList = new ArrayList();
            this.columnReconciled = new ArrayList();
            for (int i = 0; i < this.visitor.rows.size(); i++) {
                int size = this.visitor.rows.get(i).size();
                ArrayList arrayList = new ArrayList(size);
                for (int i2 = 0; i2 < size; i2++) {
                    arrayList.add(null);
                    if (i2 >= this.columnReconciled.size()) {
                        this.columnReconciled.add(false);
                    }
                }
                this.reconList.add(arrayList);
            }
            int i3 = 0;
            int size2 = this.visitor.wikilinkedCells.size();
            while (i3 < size2) {
                ArrayList arrayList2 = new ArrayList();
                int i4 = i3;
                while (i3 < i4 + 50 && i3 < size2) {
                    arrayList2.add(standardReconConfig.createSimpleJob(this.visitor.wikilinkedCells.get(i3).toURL(str)));
                    i3++;
                }
                List<Recon> batchRecon = standardReconConfig.batchRecon(arrayList2, 0L);
                for (int i5 = i4; i5 < i4 + 50 && i5 < size2; i5++) {
                    WikilinkedCell wikilinkedCell = this.visitor.wikilinkedCells.get(i5);
                    Recon recon = batchRecon.get(i5 - i4);
                    if (recon != null) {
                        this.reconList.get(wikilinkedCell.row).set(wikilinkedCell.col, recon);
                        this.columnReconciled.set(wikilinkedCell.col, true);
                    }
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/google/refine/importers/WikitextImporter$WikilinkedCell.class */
    public class WikilinkedCell {
        public String internalLink;
        public int row;
        public int col;

        WikilinkedCell(String str, int i, int i2) {
            this.internalLink = str;
            this.row = i;
            this.col = i2;
        }

        public String toURL(String str) {
            return str + this.internalLink;
        }
    }

    /* loaded from: input_file:com/google/refine/importers/WikitextImporter$WikitextTableVisitor.class */
    public class WikitextTableVisitor extends AstVisitor<WtNode> {
        private List<String> currentRow;
        private boolean blankSpanningCells;
        private boolean includeRawTemplates;
        private String currentXmlAttr;
        private final Pattern urlPattern = Pattern.compile("\\b(https?|ftp)://[-a-zA-Z0-9+&@#/%?=~_!:,.;]*[-a-zA-Z0-9+&@#/%=~_]", 2);
        public String caption = null;
        public List<List<String>> rows = new ArrayList();
        public List<List<String>> references = new ArrayList();
        public List<WikilinkedCell> wikilinkedCells = new ArrayList();
        private List<SpanningCell> spanningCells = new ArrayList();
        private StringBuilder cellStringBuilder = null;
        private StringBuilder xmlAttrStringBuilder = null;
        private List<String> currentRowReferences = null;
        private String currentInternalLink = null;
        private String currentExternalLink = null;
        private String lastExternalLink = null;
        private String currentReference = null;
        private String currentReferenceName = null;
        private int colspan = 0;
        private int rowspan = 0;
        private int rowId = 0;
        private int spanningCellIdx = 0;
        private List<String> internalLinksInCell = new ArrayList();
        private Map<String, String> namedReferences = new HashMap();

        public WikitextTableVisitor(boolean z, boolean z2) {
            this.blankSpanningCells = z;
            this.includeRawTemplates = z2;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public WtNode before(WtNode wtNode) {
            return (WtNode) super.before(wtNode);
        }

        public void visit(WtNode wtNode) {
        }

        public void visit(WtTable wtTable) {
            iterate(wtTable);
        }

        public void visit(WtTableCaption wtTableCaption) {
            this.caption = renderCellAsString(wtTableCaption);
        }

        public void visit(WtTableRow wtTableRow) {
            if (this.currentRow != null) {
                finishRow();
            }
            startRow();
            iterate(wtTableRow);
            finishRow();
        }

        private void startRow() {
            this.currentRow = new ArrayList();
            this.currentRowReferences = new ArrayList();
            this.spanningCellIdx = 0;
            addSpanningCells();
        }

        private void finishRow() {
            if (this.currentRow.size() > 0) {
                this.rows.add(this.currentRow);
                this.references.add(this.currentRowReferences);
                this.rowId++;
            }
            this.currentRow = null;
        }

        public void visit(WtTableCell wtTableCell) {
            addCell(wtTableCell);
        }

        public void visit(WtTableHeader wtTableHeader) {
            addCell(wtTableHeader);
        }

        public void addCell(WtNode wtNode) {
            if (this.currentRow == null) {
                startRow();
            }
            this.rowspan = 1;
            this.colspan = 1;
            this.internalLinksInCell.clear();
            this.currentReference = null;
            this.currentReferenceName = null;
            String renderCellAsString = renderCellAsString(wtNode);
            int size = this.currentRow.size();
            this.currentRow.add(renderCellAsString);
            this.currentRowReferences.add(this.currentReference);
            String str = null;
            if (this.internalLinksInCell.size() == 1) {
                str = this.internalLinksInCell.get(0);
                this.wikilinkedCells.add(new WikilinkedCell(str, this.rowId, size));
            }
            if (this.colspan > 1 || this.rowspan > 1) {
                this.spanningCells.add(this.spanningCellIdx, new SpanningCell(renderCellAsString, str, this.currentReference, this.rowId, size, this.rowspan, this.colspan));
            }
            addSpanningCells();
        }

        public String renderCellAsString(WtNode wtNode) {
            this.cellStringBuilder = new StringBuilder();
            iterate(wtNode);
            String sb = this.cellStringBuilder.toString();
            if (sb == null) {
                sb = "";
            }
            String trim = sb.trim();
            this.cellStringBuilder = null;
            return trim;
        }

        public void visit(WtText wtText) {
            writeText(wtText.getContent());
        }

        public void visit(WtNewline wtNewline) {
            writeText("\n");
        }

        public void visit(WtXmlEmptyTag wtXmlEmptyTag) {
            if ("br".equals(wtXmlEmptyTag.getName())) {
                writeText("\n");
            }
        }

        public void visit(WtXmlStartTag wtXmlStartTag) {
            if ("br".equals(wtXmlStartTag.getName())) {
                writeText("\n");
            }
        }

        public void visit(WtXmlEndTag wtXmlEndTag) {
            if ("br".equals(wtXmlEndTag.getName())) {
                writeText("\n");
            }
        }

        public void visit(WtTagExtension wtTagExtension) {
            if ("ref".equals(wtTagExtension.getName())) {
                this.lastExternalLink = null;
                this.currentReferenceName = null;
                iterate(wtTagExtension);
                if (this.currentReferenceName != null) {
                    this.currentReference = this.namedReferences.get(this.currentReferenceName);
                } else {
                    this.currentReferenceName = "";
                }
                if (this.lastExternalLink != null) {
                    this.currentReference = this.lastExternalLink;
                }
                if (this.currentReference == null || "".equals(this.currentReferenceName)) {
                    return;
                }
                this.namedReferences.put(this.currentReferenceName, this.currentReference);
            }
        }

        public void visit(WtTagExtensionBody wtTagExtensionBody) {
            String content = wtTagExtensionBody.getContent();
            Matcher matcher = this.urlPattern.matcher(content);
            while (matcher.find()) {
                this.lastExternalLink = content.substring(matcher.start(), matcher.end());
            }
        }

        public void writeText(String str) {
            if (this.currentReferenceName == null) {
                if (this.xmlAttrStringBuilder != null) {
                    this.xmlAttrStringBuilder.append(str);
                } else if (this.cellStringBuilder != null) {
                    this.cellStringBuilder.append(str);
                }
            }
        }

        private SpanningCell spanningCell() {
            return this.spanningCells.get(this.spanningCellIdx);
        }

        private void addSpanningCells() {
            while (this.spanningCellIdx < this.spanningCells.size() && this.currentRow.size() >= spanningCell().col) {
                SpanningCell spanningCell = spanningCell();
                if (spanningCell.row + spanningCell.rowspan >= this.rowId + 1) {
                    while (this.currentRow.size() < spanningCell.col + spanningCell.colspan) {
                        if (this.blankSpanningCells) {
                            this.currentRow.add(null);
                            this.currentRowReferences.add(null);
                        } else {
                            this.currentRow.add(spanningCell.value);
                            this.currentRowReferences.add(spanningCell.reference);
                            if (spanningCell.reconciled != null) {
                                this.wikilinkedCells.add(new WikilinkedCell(spanningCell.reconciled, this.rowId, this.currentRow.size() - 1));
                            }
                        }
                    }
                }
                if (spanningCell.row + spanningCell.rowspan <= this.rowId + 1) {
                    this.spanningCells.remove(this.spanningCellIdx);
                } else {
                    this.spanningCellIdx++;
                }
            }
        }

        public void visit(WtXmlAttributes wtXmlAttributes) {
            iterate(wtXmlAttributes);
        }

        public void visit(WtXmlAttribute wtXmlAttribute) {
            this.xmlAttrStringBuilder = new StringBuilder();
            iterate(wtXmlAttribute);
            try {
                if ("colspan".equals(this.currentXmlAttr)) {
                    this.colspan = Integer.parseInt(this.xmlAttrStringBuilder.toString());
                } else if ("rowspan".equals(this.currentXmlAttr)) {
                    this.rowspan = Integer.parseInt(this.xmlAttrStringBuilder.toString());
                } else if (ScatterplotFacet.NAME.equals(this.currentXmlAttr)) {
                    this.currentReferenceName = this.xmlAttrStringBuilder.toString();
                }
            } catch (NumberFormatException e) {
            }
            this.currentXmlAttr = null;
            this.xmlAttrStringBuilder = null;
        }

        public void visit(WtName wtName) {
            try {
                this.currentXmlAttr = wtName.getAsString();
            } catch (UnsupportedOperationException e) {
                this.currentXmlAttr = null;
            }
        }

        public void visit(WtValue wtValue) {
            iterate(wtValue);
        }

        public void visit(WtInternalLink wtInternalLink) {
            this.currentInternalLink = wtInternalLink.getTarget().getAsString();
            this.internalLinksInCell.add(this.currentInternalLink);
            iterate(wtInternalLink);
            this.currentInternalLink = null;
        }

        public void visit(WtExternalLink wtExternalLink) {
            WtUrl target = wtExternalLink.getTarget();
            String str = target.getProtocol() + ":" + target.getPath();
            if (this.cellStringBuilder != null) {
                if (this.rowId >= 0) {
                    this.cellStringBuilder.append(str);
                } else {
                    this.currentExternalLink = str;
                    iterate(wtExternalLink);
                    this.currentExternalLink = null;
                }
            }
            this.lastExternalLink = str;
        }

        public void visit(WtLinkTitle.WtNoLinkTitle wtNoLinkTitle) {
            if (this.cellStringBuilder != null) {
                if (this.currentInternalLink != null) {
                    this.cellStringBuilder.append(this.currentInternalLink);
                } else if (this.currentExternalLink != null) {
                    this.cellStringBuilder.append(this.currentExternalLink);
                }
            }
        }

        public void visit(WtLinkTitle wtLinkTitle) {
            iterate(wtLinkTitle);
        }

        public void visit(WtUrl wtUrl) {
        }

        public void visit(WtTemplate wtTemplate) {
            if (this.includeRawTemplates || this.currentReferenceName != null) {
                writeText("{{" + wtTemplate.getName().getAsString());
                WtTemplateArguments args = wtTemplate.getArgs();
                for (int i = 0; i != args.size(); i++) {
                    writeText("|");
                    iterate((WtNode) args.get(i));
                }
                writeText("}}");
            }
        }

        public void visit(WtTemplateArgument wtTemplateArgument) {
            if (this.currentReferenceName == null) {
                writeText("|");
                if (wtTemplateArgument.hasName()) {
                    writeText(wtTemplateArgument.getName().getAsString());
                    writeText("=");
                }
            }
            iterate(wtTemplateArgument.getValue());
        }

        public void visit(WtImageLink wtImageLink) {
            if (this.includeRawTemplates) {
                writeText("[[");
                writeText(wtImageLink.getTarget().getAsString());
                writeText("]]");
            }
        }

        public void visit(WtParsedWikitextPage wtParsedWikitextPage) {
            iterate(wtParsedWikitextPage);
        }

        public void visit(WtSection wtSection) {
            iterate(wtSection);
        }

        public void visit(WtBody wtBody) {
            iterate(wtBody);
        }

        public void visit(WtItalics wtItalics) {
            iterate(wtItalics);
        }

        public void visit(WtBold wtBold) {
            iterate(wtBold);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public Object after(WtNode wtNode, Object obj) {
            return this.rows;
        }
    }

    public WikitextImporter() {
        super(false);
    }

    @Override // com.google.refine.importers.TabularImportingParserBase, com.google.refine.importers.ImportingParserBase, com.google.refine.importing.ImportingParser
    public ObjectNode createParserUIInitializationData(ImportingJob importingJob, List<ObjectNode> list, String str) {
        ObjectNode createParserUIInitializationData = super.createParserUIInitializationData(importingJob, list, str);
        JSONUtilities.safePut(createParserUIInitializationData, "guessCellValueTypes", false);
        JSONUtilities.safePut(createParserUIInitializationData, "blankSpanningCells", true);
        JSONUtilities.safePut(createParserUIInitializationData, "includeRawTemplates", false);
        JSONUtilities.safePut(createParserUIInitializationData, "wikiUrl", "https://en.wikipedia.org/wiki/");
        JSONUtilities.safePut(createParserUIInitializationData, "parseReferences", true);
        return createParserUIInitializationData;
    }

    @Override // com.google.refine.importers.ImportingParserBase
    public void parseOneFile(Project project, ProjectMetadata projectMetadata, ImportingJob importingJob, String str, Reader reader, int i, ObjectNode objectNode, List<Exception> list) {
        SimpleParserConfig simpleParserConfig = new SimpleParserConfig();
        try {
            WtParsedWikitextPage parseArticle = new WikitextParser(simpleParserConfig).parseArticle(PreprocessorToParserTransformer.transform(new WikitextPreprocessor(simpleParserConfig).parseArticle(new WikitextEncodingValidator().validate(simpleParserConfig, CharStreams.toString(reader), "Page title"), "Page title", false)), "Page title");
            boolean z = JSONUtilities.getBoolean(objectNode, "blankSpanningCells", true);
            boolean z2 = JSONUtilities.getBoolean(objectNode, "includeRawTemplates", false);
            boolean z3 = JSONUtilities.getBoolean(objectNode, "parseReferences", true);
            WikitextTableVisitor wikitextTableVisitor = new WikitextTableVisitor(z, z2);
            wikitextTableVisitor.go(parseArticle);
            WikiTableDataReader wikiTableDataReader = new WikiTableDataReader(wikitextTableVisitor, z3);
            String string = JSONUtilities.getString(objectNode, "wikiUrl", null);
            StandardReconConfig reconConfig = getReconConfig(JSONUtilities.getString(objectNode, "reconService", "https://wikidata.reconci.link/en/api"));
            if (string != null) {
                wikiTableDataReader.reconcileToQids(string, reconConfig);
            }
            if (wikitextTableVisitor.caption != null && wikitextTableVisitor.caption.length() > 0) {
                projectMetadata.setName(wikitextTableVisitor.caption);
            }
            TabularImportingParserBase.readTable(project, importingJob, wikiTableDataReader, i, objectNode, list);
            if (wikiTableDataReader.columnReconciled != null) {
                for (int i2 = 0; i2 != wikiTableDataReader.columnReconciled.size(); i2++) {
                    if (wikiTableDataReader.columnReconciled.get(i2).booleanValue()) {
                        Column column = project.columnModel.columns.get(i2);
                        column.setReconStats(ReconStats.create(project, i2));
                        column.setReconConfig(reconConfig);
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e2) {
            list.add(e2);
            e2.printStackTrace();
        }
    }

    private StandardReconConfig getReconConfig(String str) {
        return new StandardReconConfig(str, "http://www.wikidata.org/entity/", "http://www.wikidata.org/prop/direct/", "", "entity", true, new ArrayList(), 1);
    }
}
