package com.google.refine.importers;

import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.CharMatcher;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
import com.univocity.parsers.common.TextParsingException;
import com.univocity.parsers.common.record.Record;
import com.univocity.parsers.csv.CsvFormat;
import com.univocity.parsers.csv.CsvParser;
import com.univocity.parsers.csv.CsvParserSettings;
import com.univocity.parsers.csv.UnescapedQuoteHandling;
import com.univocity.parsers.tsv.TsvParser;
import com.univocity.parsers.tsv.TsvParserSettings;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.text.StringEscapeUtils;

/* loaded from: input_file:com/google/refine/importers/SeparatorBasedImporter.class */
public class SeparatorBasedImporter extends TabularImportingParserBase {
    public static final int GUESSER_LINE_COUNT = 100;
    char DEFAULT_QUOTE_CHAR;

    /* loaded from: input_file:com/google/refine/importers/SeparatorBasedImporter$Separator.class */
    public static class Separator {
        char separator;
        int totalCount = 0;
        int totalOfSquaredCount = 0;
        int currentLineCount = 0;
        double averagePerLine;
        double stddev;
    }

    public SeparatorBasedImporter() {
        super(false);
        this.DEFAULT_QUOTE_CHAR = new CsvParserSettings().getFormat().getQuote();
    }

    @Override // com.google.refine.importers.TabularImportingParserBase, com.google.refine.importers.ImportingParserBase, com.google.refine.importing.ImportingParser
    public ObjectNode createParserUIInitializationData(ImportingJob importingJob, List<ObjectNode> list, String str) {
        ObjectNode createParserUIInitializationData = super.createParserUIInitializationData(importingJob, list, str);
        String guessSeparator = guessSeparator(importingJob, list);
        String str2 = guessSeparator != null ? guessSeparator : "\\t";
        JSONUtilities.safePut(createParserUIInitializationData, "separator", str2);
        JSONUtilities.safePut(createParserUIInitializationData, "guessCellValueTypes", false);
        JSONUtilities.safePut(createParserUIInitializationData, "processQuotes", !str2.equals("\\t"));
        JSONUtilities.safePut(createParserUIInitializationData, "quoteCharacter", String.valueOf(this.DEFAULT_QUOTE_CHAR));
        JSONUtilities.safePut(createParserUIInitializationData, "trimStrings", true);
        return createParserUIInitializationData;
    }

    @Override // com.google.refine.importers.ImportingParserBase
    public void parseOneFile(Project project, ProjectMetadata projectMetadata, ImportingJob importingJob, String str, Reader reader, int i, ObjectNode objectNode, List<Exception> list) {
        TsvParser csvParser;
        String string = JSONUtilities.getString(objectNode, "separator", "\\t");
        if (string == null || "".equals(string)) {
            string = "\\t";
        }
        String unescapeJava = StringEscapeUtils.unescapeJava(string);
        boolean z = JSONUtilities.getBoolean(objectNode, "processQuotes", true);
        boolean z2 = JSONUtilities.getBoolean(objectNode, "strictQuotes", false);
        boolean z3 = (!"\t".equals(unescapeJava) || z || z2) ? false : true;
        ArrayList arrayList = null;
        if (objectNode.has("columnNames")) {
            String[] stringArray = JSONUtilities.getStringArray(objectNode, "columnNames");
            if (stringArray.length > 0) {
                arrayList = new ArrayList();
                for (String str2 : stringArray) {
                    String trimFrom = CharMatcher.whitespace().trimFrom(str2);
                    if (!trimFrom.isEmpty()) {
                        arrayList.add(trimFrom);
                    }
                }
                if (arrayList.isEmpty()) {
                    arrayList = null;
                } else {
                    JSONUtilities.safePut(objectNode, "headerLines", 1L);
                }
            }
        }
        final ArrayList arrayList2 = arrayList;
        Character valueOf = Character.valueOf(this.DEFAULT_QUOTE_CHAR);
        String string2 = JSONUtilities.getString(objectNode, "quoteCharacter", null);
        if (string2 != null && CharMatcher.whitespace().trimFrom(string2).length() == 1) {
            valueOf = Character.valueOf(CharMatcher.whitespace().trimFrom(string2).charAt(0));
        }
        if (z3) {
            TsvParserSettings tsvParserSettings = new TsvParserSettings();
            tsvParserSettings.setMaxCharsPerColumn(262144);
            csvParser = new TsvParser(tsvParserSettings);
        } else {
            CsvParserSettings csvParserSettings = new CsvParserSettings();
            CsvFormat format = csvParserSettings.getFormat();
            format.setDelimiter(unescapeJava);
            format.setQuote(valueOf.charValue());
            format.setLineSeparator("\n");
            csvParserSettings.setIgnoreLeadingWhitespaces(false);
            csvParserSettings.setIgnoreTrailingWhitespaces(false);
            if (z2) {
                csvParserSettings.setUnescapedQuoteHandling(UnescapedQuoteHandling.RAISE_ERROR);
            }
            csvParserSettings.setKeepQuotes(!z);
            csvParserSettings.setMaxCharsPerColumn(262144);
            csvParser = new CsvParser(csvParserSettings);
        }
        try {
            LineNumberReader lineNumberReader = new LineNumberReader(reader);
            try {
                csvParser.beginParsing(lineNumberReader);
                final TsvParser tsvParser = csvParser;
                TabularImportingParserBase.readTable(project, importingJob, new TabularImportingParserBase.TableDataReader() { // from class: com.google.refine.importers.SeparatorBasedImporter.1
                    boolean usedColumnNames = false;

                    @Override // com.google.refine.importers.TabularImportingParserBase.TableDataReader
                    public List<Object> getNextRowOfCells() throws IOException {
                        if (arrayList2 != null && !this.usedColumnNames) {
                            this.usedColumnNames = true;
                            return arrayList2;
                        }
                        Record parseNextRecord = tsvParser.parseNextRecord();
                        if (parseNextRecord != null) {
                            return Arrays.asList(parseNextRecord.getValues());
                        }
                        return null;
                    }
                }, i, objectNode, list);
                lineNumberReader.close();
            } finally {
            }
        } catch (TextParsingException e) {
            list.add(e);
        } catch (IOException e2) {
            list.add(e2);
        }
    }

    public static String guessSeparator(ImportingJob importingJob, List<ObjectNode> list) {
        for (int i = 0; i < 5 && i < list.size(); i++) {
            ObjectNode objectNode = list.get(i);
            String encoding = ImportingUtilities.getEncoding(objectNode);
            String string = JSONUtilities.getString(objectNode, "location", null);
            if (string != null) {
                File file = new File(importingJob.getRawDataDir(), string);
                Separator guessSeparator = guessSeparator(file, encoding, true);
                CsvFormat guessFormat = guessFormat(file, encoding);
                if (guessFormat != null) {
                    if (guessSeparator == null) {
                        return StringEscapeUtils.escapeJava(guessFormat.getDelimiterString());
                    }
                    if (guessFormat.getDelimiter() != guessSeparator.separator) {
                        logger.warn("Delimiter guesses disagree - uniVocity: '{}' - internal: '{}'", Character.valueOf(guessFormat.getDelimiter()), Character.valueOf(guessSeparator.separator));
                    }
                    return StringEscapeUtils.escapeJava(Character.toString(guessSeparator.separator));
                }
                if (guessSeparator != null) {
                    return StringEscapeUtils.escapeJava(Character.toString(guessSeparator.separator));
                }
            }
        }
        return null;
    }

    public static CsvFormat guessFormat(File file, String str) {
        try {
            FileInputStream fileInputStream = new FileInputStream(file);
            try {
                InputStreamReader inputStreamReader = ImportingUtilities.getInputStreamReader(fileInputStream, str);
                try {
                    LineNumberReader lineNumberReader = new LineNumberReader(inputStreamReader);
                    try {
                        CsvParserSettings csvParserSettings = new CsvParserSettings();
                        csvParserSettings.detectFormatAutomatically();
                        csvParserSettings.setFormatDetectorRowSampleCount(100);
                        CsvParser csvParser = new CsvParser(csvParserSettings);
                        csvParser.beginParsing(lineNumberReader);
                        CsvFormat detectedFormat = csvParser.getDetectedFormat();
                        lineNumberReader.close();
                        if (inputStreamReader != null) {
                            inputStreamReader.close();
                        }
                        fileInputStream.close();
                        return detectedFormat;
                    } catch (Throwable th) {
                        try {
                            lineNumberReader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                        throw th;
                    }
                } catch (Throwable th3) {
                    if (inputStreamReader != null) {
                        try {
                            inputStreamReader.close();
                        } catch (Throwable th4) {
                            th3.addSuppressed(th4);
                        }
                    }
                    throw th3;
                }
            } finally {
            }
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static Separator guessSeparator(File file, String str) {
        return guessSeparator(file, str, false);
    }

    public static Separator guessSeparator(File file, String str, boolean z) {
        String readLine;
        try {
            FileInputStream fileInputStream = new FileInputStream(file);
            try {
                InputStreamReader inputStreamReader = ImportingUtilities.getInputStreamReader(fileInputStream, str);
                try {
                    LineNumberReader lineNumberReader = new LineNumberReader(inputStreamReader);
                    try {
                        ArrayList<Separator> arrayList = new ArrayList();
                        HashMap hashMap = new HashMap();
                        int i = 0;
                        int i2 = 0;
                        boolean z2 = false;
                        while (i < 65536 && i2 < 100 && (readLine = lineNumberReader.readLine()) != null) {
                            i += readLine.length() + 1;
                            if (readLine.length() != 0) {
                                if (!z2) {
                                    i2++;
                                }
                                for (int i3 = 0; i3 < readLine.length(); i3++) {
                                    char charAt = readLine.charAt(i3);
                                    if ('\"' == charAt) {
                                        z2 = !z2;
                                    }
                                    if (!Character.isLetterOrDigit(charAt) && !"\"' .-".contains(readLine.subSequence(i3, i3 + 1)) && (!z || !z2)) {
                                        Separator separator = (Separator) hashMap.get(Character.valueOf(charAt));
                                        if (separator == null) {
                                            separator = new Separator();
                                            separator.separator = charAt;
                                            hashMap.put(Character.valueOf(charAt), separator);
                                            arrayList.add(separator);
                                        }
                                        separator.currentLineCount++;
                                    }
                                }
                                if (!z2) {
                                    for (Separator separator2 : arrayList) {
                                        separator2.totalCount += separator2.currentLineCount;
                                        separator2.totalOfSquaredCount += separator2.currentLineCount * separator2.currentLineCount;
                                        separator2.currentLineCount = 0;
                                    }
                                }
                            }
                        }
                        if (arrayList.size() > 0) {
                            for (Separator separator3 : arrayList) {
                                separator3.averagePerLine = separator3.totalCount / i2;
                                separator3.stddev = Math.sqrt(((i2 * separator3.totalOfSquaredCount) - (separator3.totalCount * separator3.totalCount)) / (i2 * (i2 - 1)));
                            }
                            Collections.sort(arrayList, Comparator.comparingDouble(separator4 -> {
                                return separator4.stddev / separator4.averagePerLine;
                            }));
                            Separator separator5 = (Separator) arrayList.get(0);
                            if (separator5.stddev / separator5.averagePerLine < 0.1d) {
                                lineNumberReader.close();
                                if (inputStreamReader != null) {
                                    inputStreamReader.close();
                                }
                                fileInputStream.close();
                                return separator5;
                            }
                        }
                        lineNumberReader.close();
                        if (inputStreamReader != null) {
                            inputStreamReader.close();
                        }
                        fileInputStream.close();
                        return null;
                    } catch (Throwable th) {
                        try {
                            lineNumberReader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                        throw th;
                    }
                } catch (Throwable th3) {
                    if (inputStreamReader != null) {
                        try {
                            inputStreamReader.close();
                        } catch (Throwable th4) {
                            th3.addSuppressed(th4);
                        }
                    }
                    throw th3;
                }
            } catch (Throwable th5) {
                try {
                    fileInputStream.close();
                } catch (Throwable th6) {
                    th5.addSuppressed(th6);
                }
                throw th5;
            }
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
            return null;
        } catch (IOException e2) {
            e2.printStackTrace();
            return null;
        }
    }
}
