package org.dspace.app.mediafilter;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.apache.commons.lang.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.dspace.content.Item;
import org.dspace.discovery.FullTextContentStreams;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/dspace/app/mediafilter/TikaTextExtractionFilter.class */
public class TikaTextExtractionFilter extends MediaFilter {
    private static final Logger log = LogManager.getLogger();

    @Override // org.dspace.app.mediafilter.FormatFilter
    public String getFilteredName(String str) {
        return str + ".txt";
    }

    @Override // org.dspace.app.mediafilter.FormatFilter
    public String getBundleName() {
        return FullTextContentStreams.FULLTEXT_BUNDLE;
    }

    @Override // org.dspace.app.mediafilter.FormatFilter
    public String getFormatString() {
        return "Text";
    }

    @Override // org.dspace.app.mediafilter.FormatFilter
    public String getDescription() {
        return "Extracted text";
    }

    @Override // org.dspace.app.mediafilter.FormatFilter
    public InputStream getDestinationStream(Item item, InputStream inputStream, boolean z) throws Exception {
        ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService();
        if (configurationService.getBooleanProperty("textextractor.use-temp-file", false)) {
            return extractUsingTempFile(inputStream, z);
        }
        int intProperty = configurationService.getIntProperty("textextractor.max-chars", 100000);
        try {
            Tika tika = new Tika();
            tika.setMaxStringLength(intProperty);
            String parseToString = tika.parseToString(inputStream);
            if (!StringUtils.isNotEmpty(parseToString)) {
                return null;
            }
            if (z) {
                System.out.println("(Verbose mode) Extracted text:");
                System.out.println(parseToString);
            }
            return new ByteArrayInputStream(parseToString.getBytes(StandardCharsets.UTF_8));
        } catch (IOException e) {
            System.err.format("Unable to extract text from bitstream in Item %s%n", item.getID().toString());
            e.printStackTrace();
            log.error("Unable to extract text from bitstream in Item {}", item.getID().toString(), e);
            throw e;
        } catch (OutOfMemoryError e2) {
            System.err.format("OutOfMemoryError occurred when extracting text from bitstream in Item %s. You may wish to enable 'textextractor.use-temp-file'.%n", item.getID().toString());
            e2.printStackTrace();
            log.error("OutOfMemoryError occurred when extracting text from bitstream in Item {}. You may wish to enable 'textextractor.use-temp-file'.", item.getID().toString(), e2);
            throw e2;
        }
    }

    private InputStream extractUsingTempFile(InputStream inputStream, boolean z) throws IOException, TikaException, SAXException {
        final File createTempFile = File.createTempFile("dspacetextextract" + inputStream.hashCode(), ".txt");
        if (z) {
            System.out.println("(Verbose mode) Extracted text was written to temporary file at " + createTempFile.getAbsolutePath());
        } else {
            createTempFile.deleteOnExit();
        }
        final FileWriter fileWriter = new FileWriter(createTempFile, StandardCharsets.UTF_8);
        try {
            new AutoDetectParser().parse(inputStream, new BodyContentHandler(new ContentHandlerDecorator() { // from class: org.dspace.app.mediafilter.TikaTextExtractionFilter.1
                public void characters(char[] cArr, int i, int i2) throws SAXException {
                    try {
                        fileWriter.append((CharSequence) new String(cArr), i, i2);
                    } catch (IOException e) {
                        String format = String.format("Could not append to temporary file at %s when performing text extraction", createTempFile.getAbsolutePath());
                        TikaTextExtractionFilter.log.error(format, e);
                        throw new SAXException(format, e);
                    }
                }

                public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
                    try {
                        fileWriter.append((CharSequence) new String(cArr), i, i2);
                    } catch (IOException e) {
                        String format = String.format("Could not append to temporary file at %s when performing text extraction", createTempFile.getAbsolutePath());
                        TikaTextExtractionFilter.log.error(format, e);
                        throw new SAXException(format, e);
                    }
                }
            }), new Metadata());
            fileWriter.close();
            return new FileInputStream(createTempFile);
        } catch (Throwable th) {
            try {
                fileWriter.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
