package org.languagetool.languagemodel;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.languagetool.Experimental;

/* loaded from: input_file:org/languagetool/languagemodel/LuceneSingleIndexLanguageModel.class */
public class LuceneSingleIndexLanguageModel extends BaseLanguageModel {
    private static final Map<File, LuceneSearcher> dirToSearcherMap = new HashMap();
    private final List<File> indexes;
    private final Map<Integer, LuceneSearcher> luceneSearcherMap;
    private final File topIndexDir;
    private final long maxNgram;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/languagetool/languagemodel/LuceneSingleIndexLanguageModel$LuceneSearcher.class */
    public static class LuceneSearcher {
        final FSDirectory directory;
        final IndexReader reader;
        final IndexSearcher searcher;

        private LuceneSearcher(File file) throws IOException {
            Path path = file.toPath();
            this.directory = FSDirectory.open(Files.isSymbolicLink(path) ? file.getCanonicalFile().toPath() : path);
            this.reader = DirectoryReader.open(this.directory);
            this.searcher = new IndexSearcher(this.reader);
        }

        public IndexReader getReader() {
            return this.reader;
        }
    }

    public static void validateDirectory(File file) {
        if (!file.exists() || !file.isDirectory()) {
            throw new RuntimeException("Not found or is not a directory:\n" + file + "\nAs ngram directory, please select the directory that has a subdirectory like 'en'\n(or whatever language code you're using).");
        }
        ArrayList arrayList = new ArrayList();
        for (String str : file.list()) {
            if (str.matches("[123]grams")) {
                arrayList.add(str);
            }
        }
        if (arrayList.size() == 0) {
            throw new RuntimeException("Directory must contain at least '1grams', '2grams', and '3grams': " + file.getAbsolutePath());
        }
        if (arrayList.size() < 3) {
            throw new RuntimeException("Expected at least '1grams', '2grams', and '3grams' sub directories but only got " + arrayList + " in " + file.getAbsolutePath());
        }
    }

    @Experimental
    public static void clearCaches() {
        dirToSearcherMap.clear();
    }

    public LuceneSingleIndexLanguageModel(File file) {
        this.indexes = new ArrayList();
        this.luceneSearcherMap = new HashMap();
        doValidateDirectory(file);
        this.topIndexDir = file;
        addIndex(file, 1);
        addIndex(file, 2);
        addIndex(file, 3);
        addIndex(file, 4);
        if (this.luceneSearcherMap.size() == 0) {
            throw new RuntimeException("No directories '1grams' ... '3grams' found in " + file);
        }
        this.maxNgram = ((Integer) Collections.max(this.luceneSearcherMap.keySet())).intValue();
    }

    @Experimental
    public LuceneSingleIndexLanguageModel(int i) {
        this.indexes = new ArrayList();
        this.luceneSearcherMap = new HashMap();
        this.maxNgram = i;
        this.topIndexDir = null;
    }

    protected void doValidateDirectory(File file) {
        validateDirectory(file);
    }

    private void addIndex(File file, int i) {
        File file2 = new File(file, i + "grams");
        if (file2.exists() && file2.isDirectory()) {
            if (this.luceneSearcherMap.containsKey(Integer.valueOf(i))) {
                throw new RuntimeException("Searcher for ngram size " + i + " already exists");
            }
            this.luceneSearcherMap.put(Integer.valueOf(i), getCachedLuceneSearcher(file2));
            this.indexes.add(file2);
        }
    }

    @Override // org.languagetool.languagemodel.BaseLanguageModel
    public long getCount(List<String> list) {
        if (list.size() > this.maxNgram) {
            throw new RuntimeException("Requested " + list.size() + "gram but index has only up to " + this.maxNgram + "gram: " + list);
        }
        Objects.requireNonNull(list);
        return getCount(new Term("ngram", StringUtils.join(list, " ")), getLuceneSearcher(list.size()));
    }

    @Override // org.languagetool.languagemodel.BaseLanguageModel
    public long getCount(String str) {
        Objects.requireNonNull(str);
        return getCount(Arrays.asList(str));
    }

    @Override // org.languagetool.languagemodel.BaseLanguageModel
    public long getTotalTokenCount() {
        LuceneSearcher luceneSearcher = getLuceneSearcher(1);
        try {
            TopDocs search = luceneSearcher.searcher.search(new RegexpQuery(new Term("totalTokenCount", ".*")), 1000);
            if (search.totalHits == 0) {
                throw new RuntimeException("Expected 'totalTokenCount' meta documents not found in 1grams index: " + luceneSearcher.directory);
            }
            if (search.totalHits > 1000) {
                throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + search.totalHits + " in " + luceneSearcher.directory);
            }
            long j = 0;
            for (ScoreDoc scoreDoc : search.scoreDocs) {
                j += Long.parseLong(luceneSearcher.reader.document(scoreDoc.doc).get("totalTokenCount"));
            }
            return j;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    protected LuceneSearcher getLuceneSearcher(int i) {
        LuceneSearcher luceneSearcher = this.luceneSearcherMap.get(Integer.valueOf(i));
        if (luceneSearcher == null) {
            throw new RuntimeException("No " + i + "grams directory found in " + this.topIndexDir);
        }
        return luceneSearcher;
    }

    private LuceneSearcher getCachedLuceneSearcher(File file) {
        LuceneSearcher luceneSearcher = dirToSearcherMap.get(file);
        if (luceneSearcher != null) {
            return luceneSearcher;
        }
        try {
            LuceneSearcher luceneSearcher2 = new LuceneSearcher(file);
            dirToSearcherMap.put(file, luceneSearcher2);
            return luceneSearcher2;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private long getCount(Term term, LuceneSearcher luceneSearcher) {
        long j = 0;
        try {
            TopDocs search = luceneSearcher.searcher.search(new TermQuery(term), 2000);
            if (search.totalHits > 2000) {
                throw new RuntimeException("More than 2000 matches for '" + term + "' not supported for performance reasons: " + search.totalHits + " matches in " + luceneSearcher.directory);
            }
            for (ScoreDoc scoreDoc : search.scoreDocs) {
                j += Long.parseLong(luceneSearcher.reader.document(scoreDoc.doc).get("count"));
            }
            return j;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // org.languagetool.languagemodel.LanguageModel, java.lang.AutoCloseable
    public void close() {
        for (LuceneSearcher luceneSearcher : this.luceneSearcherMap.values()) {
            try {
                luceneSearcher.reader.close();
                luceneSearcher.directory.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    public String toString() {
        return this.indexes.toString();
    }
}
