package net.sourceforge.tess4j.util;

import java.awt.Color;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;

/* loaded from: input_file:net/sourceforge/tess4j/util/Hocr2PdfParser.class */
public class Hocr2PdfParser implements ContentHandler, ErrorHandler {
    private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
    public static final String SPACE = " ";
    public static final String BBOX = "bbox";
    public static final String TITLE = "title";
    public static final String SEMICOL = ";";
    public static final String DIV = "div";
    public static final String SPAN = "span";
    public static final String OCRX_WORD = "ocrx_word";
    public static final String OCR_WORD = "ocr_word";
    public static final String OCR_LINE = "ocr_line";
    public static final String CLASS = "class";
    public static final String OCR_PAGE = "ocr_page";
    private final RenderingMode renderingMode;
    private final PDFont font;
    private final boolean useHocrLineToY;
    private float xPageScaling;
    private float yPageScaling;
    private String[] coordsText;
    private String[] coordsLine;
    private final PDDocument pdDocument;
    private final String hocrFilepath;
    private final StringBuilder text = new StringBuilder();
    private PDPageContentStream pdfPageCanvas = null;
    private PDRectangle pdfPageBBox = null;

    @Override // org.xml.sax.ContentHandler
    public void setDocumentLocator(Locator locator) {
    }

    @Override // org.xml.sax.ContentHandler
    public void startDocument() throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void endDocument() throws SAXException {
        finishCurrentPage();
    }

    @Override // org.xml.sax.ContentHandler
    public void startPrefixMapping(String str, String str2) throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void endPrefixMapping(String str) throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        this.text.setLength(0);
        this.coordsText = null;
        String value = attributes.getValue(CLASS);
        if (SPAN.equals(str3) || SPAN.equals(str2)) {
            logger.debug("start of span, there is a new text or line");
            if (OCRX_WORD.equals(value) || OCR_WORD.equals(value)) {
                this.coordsText = readBboxCoordsFromAttributs(attributes);
                return;
            } else if (OCR_LINE.equals(value)) {
                this.coordsLine = readBboxCoordsFromAttributs(attributes);
                return;
            } else {
                logger.debug("ignore {0} : {1}", new Object[]{CLASS, value});
                return;
            }
        }
        if (!DIV.equals(str3) && !DIV.equals(str2)) {
            logger.debug("ignore qName : {0} or localName :{1} ", new Object[]{str3, str2});
        } else {
            if (!OCR_PAGE.equals(value)) {
                logger.debug("ignore {0} : {1}", new Object[]{CLASS, value});
                return;
            }
            logger.debug("start div, start of page and close the previous page");
            finishCurrentPage();
            beginNewPage(convertHocrAttributsToPageNum(attributes), readBboxCoordsFromAttributs(attributes));
        }
    }

    @Override // org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (this.coordsText == null || this.text.length() <= 0) {
            logger.debug("ignore endElement no text readed");
        } else {
            logger.debug("OCRed word span closed, coords: {0} {1}", new Object[]{this.coordsText, this.text});
            if (this.useHocrLineToY) {
                this.coordsText[2] = this.coordsLine[2];
                this.coordsText[4] = this.coordsLine[4];
            }
            addTextToPDF(this.coordsText, this.text.toString());
        }
        this.coordsText = null;
    }

    @Override // org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        this.text.append(cArr, i, i2);
    }

    @Override // org.xml.sax.ContentHandler
    public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void processingInstruction(String str, String str2) throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void skippedEntity(String str) throws SAXException {
    }

    @Override // org.xml.sax.ErrorHandler
    public void warning(SAXParseException sAXParseException) throws SAXException {
        logger.warn("warning: " + sAXParseException.getMessage());
    }

    @Override // org.xml.sax.ErrorHandler
    public void error(SAXParseException sAXParseException) throws SAXException {
        logger.error("error: " + sAXParseException.getMessage());
    }

    @Override // org.xml.sax.ErrorHandler
    public void fatalError(SAXParseException sAXParseException) throws SAXException {
        logger.error("fatalError: " + sAXParseException.getMessage());
    }

    private String[] readBboxCoordsFromAttributs(Attributes attributes) {
        String value = attributes.getValue(TITLE);
        int indexOf = value.indexOf(BBOX);
        return value.substring(indexOf, value.indexOf(SEMICOL, indexOf)).split(SPACE);
    }

    private Integer convertHocrAttributsToPageNum(Attributes attributes) {
        return Integer.valueOf(Integer.valueOf(attributes.getValue("id").split("_")[1], 10).intValue() - 1);
    }

    private void beginNewPage(Integer num, String[] strArr) throws SAXException {
        try {
            PDPage page = this.pdDocument.getPage(num.intValue());
            this.pdfPageBBox = page.getBBox();
            this.pdfPageCanvas = new PDPageContentStream(this.pdDocument, page, PDPageContentStream.AppendMode.APPEND, true, true);
            float floatValue = Float.valueOf(strArr[3]).floatValue() - Float.valueOf(strArr[1]).floatValue();
            float floatValue2 = Float.valueOf(strArr[4]).floatValue() - Float.valueOf(strArr[2]).floatValue();
            this.xPageScaling = this.pdfPageBBox.getWidth() / floatValue;
            this.yPageScaling = this.pdfPageBBox.getHeight() / floatValue2;
            logger.debug("Load page {0} with scaling {1}x{2}", new Object[]{num, Float.valueOf(this.xPageScaling), Float.valueOf(this.yPageScaling)});
        } catch (IOException | NumberFormatException e) {
            throw new SAXException(e.getMessage());
        }
    }

    private void finishCurrentPage() {
        if (this.pdfPageCanvas != null) {
            IOUtils.closeQuietly(this.pdfPageCanvas);
        }
    }

    public void addTextToPDF(String[] strArr, String str) throws SAXException {
        try {
            PDRectangle pDRectangle = new PDRectangle(Integer.valueOf(strArr[1]).intValue(), Integer.valueOf(strArr[4]).intValue(), Integer.valueOf(strArr[3]).intValue() - Integer.valueOf(strArr[1]).intValue(), Integer.valueOf(strArr[4]).intValue() - Integer.valueOf(strArr[2]).intValue());
            printTextAtCoordinates(str, ((pDRectangle.getWidth() * this.xPageScaling) * 1000.0f) / this.font.getStringWidth(str), pDRectangle.getLowerLeftX() * this.xPageScaling, this.pdfPageBBox.getHeight() - (pDRectangle.getLowerLeftY() * this.yPageScaling));
        } catch (IOException e) {
            throw new SAXException(e.getMessage());
        }
    }

    private void printTextAtCoordinates(String str, float f, float f2, float f3) throws IOException {
        logger.debug("Text,{1},{2},{3},{4},{5},{6}", new Object[]{Float.valueOf(f2), Float.valueOf(f3), Float.valueOf(f), Float.valueOf(this.pdfPageBBox.getWidth()), Float.valueOf(this.pdfPageBBox.getHeight()), str});
        this.pdfPageCanvas.setFont(this.font, f);
        this.pdfPageCanvas.saveGraphicsState();
        this.pdfPageCanvas.beginText();
        this.pdfPageCanvas.setRenderingMode(this.renderingMode);
        this.pdfPageCanvas.setNonStrokingColor(Color.red);
        this.pdfPageCanvas.setLineWidth(0.0f);
        this.pdfPageCanvas.newLineAtOffset(f2, f3);
        this.pdfPageCanvas.showText(str);
        this.pdfPageCanvas.endText();
        this.pdfPageCanvas.restoreGraphicsState();
    }

    public Hocr2PdfParser(String str, PDDocument pDDocument, boolean z, boolean z2, String str2) throws IOException {
        this.hocrFilepath = str;
        this.pdDocument = pDDocument;
        this.useHocrLineToY = z2;
        if (z) {
            this.renderingMode = RenderingMode.FILL;
        } else {
            this.renderingMode = RenderingMode.NEITHER;
        }
        if (str2 != null) {
            this.font = new PDType1Font(Standard14Fonts.FontName.valueOf(str2));
        } else {
            this.font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
        }
    }

    public void parse() throws SAXException, IOException, ParserConfigurationException {
        XMLReader xMLReader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
        xMLReader.setContentHandler(this);
        xMLReader.setErrorHandler(this);
        xMLReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        xMLReader.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", false);
        FileInputStream fileInputStream = new FileInputStream(this.hocrFilepath);
        Throwable th = null;
        try {
            final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileInputStream, "UTF8"));
            bufferedReader.readLine();
            xMLReader.parse(new InputSource(new Reader() { // from class: net.sourceforge.tess4j.util.Hocr2PdfParser.1
                @Override // java.io.Reader
                public int read(char[] cArr, int i, int i2) throws IOException {
                    int read = bufferedReader.read(cArr, i, i2);
                    for (int i3 = 0; i3 < read; i3++) {
                        char c = cArr[i3 + i];
                        if (c != '\t' && c != '\n' && c != '\r' && ((c < ' ' || c > 55295) && (0 > c || c > 65535))) {
                            cArr[i3 + i] = ' ';
                        }
                    }
                    return read;
                }

                @Override // java.io.Reader, java.io.Closeable, java.lang.AutoCloseable
                public void close() throws IOException {
                    bufferedReader.close();
                }
            }));
            if (fileInputStream != null) {
                if (0 == 0) {
                    fileInputStream.close();
                    return;
                }
                try {
                    fileInputStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (fileInputStream != null) {
                if (0 != 0) {
                    try {
                        fileInputStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    fileInputStream.close();
                }
            }
            throw th3;
        }
    }
}
