package com.lucidworks.spark.example.hadoop;

import com.lucidworks.spark.SparkApp;
import com.lucidworks.spark.util.SolrSupport;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.common.SolrInputDocument;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;

/* loaded from: input_file:com/lucidworks/spark/example/hadoop/HdfsToSolrRDDProcessor.class */
public class HdfsToSolrRDDProcessor implements SparkApp.RDDProcessor {
    public static Logger log = Logger.getLogger(HdfsToSolrRDDProcessor.class);
    private static final String[] pigSchema = "id,integer1_i,integer2_i,long1_l,long2_l,float1_f,float2_f,double1_d,double2_d,timestamp1_tdt,timestamp2_tdt,string1_s,string2_s,string3_s,boolean1_b,boolean2_b,text1_en,text2_en,text3_en,random_bucket".split(",");

    @Override // com.lucidworks.spark.SparkApp.RDDProcessor
    public String getName() {
        return "hdfs-to-solr";
    }

    @Override // com.lucidworks.spark.SparkApp.RDDProcessor
    public Option[] getOptions() {
        OptionBuilder.withArgName("PATH");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withDescription("HDFS path identifying the directories / files to index");
        OptionBuilder.withArgName("INT");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withDescription("Queue size for ConcurrentUpdateSolrClient; default is 1000");
        OptionBuilder.withArgName("INT");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withDescription("Number of runner threads per ConcurrentUpdateSolrClient instance; default is 2");
        OptionBuilder.withArgName("INT");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withDescription("Number of millis to wait until CUSS sees a doc on the queue before it closes the current request and starts another; default is 20 ms");
        return new Option[]{OptionBuilder.create("hdfsPath"), OptionBuilder.create("queueSize"), OptionBuilder.create("numRunners"), OptionBuilder.create("pollQueueTime")};
    }

    @Override // com.lucidworks.spark.SparkApp.RDDProcessor
    public int run(SparkConf sparkConf, CommandLine commandLine) throws Exception {
        JavaPairRDD mapToPair = new JavaSparkContext(sparkConf).textFile(commandLine.getOptionValue("hdfsPath")).mapToPair(new PairFunction<String, String, SolrInputDocument>() { // from class: com.lucidworks.spark.example.hadoop.HdfsToSolrRDDProcessor.1
            public Tuple2<String, SolrInputDocument> call(String str) throws Exception {
                SolrInputDocument solrInputDocument = new SolrInputDocument(new String[0]);
                String[] split = str.split("\t");
                if (split.length != HdfsToSolrRDDProcessor.pigSchema.length) {
                    return null;
                }
                for (int i = 0; i < split.length; i++) {
                    if (split[i] != null && split[i].length() > 0) {
                        solrInputDocument.setField(HdfsToSolrRDDProcessor.pigSchema[i], split[i]);
                    }
                }
                return new Tuple2<>((String) solrInputDocument.getFieldValue("id"), solrInputDocument);
            }
        });
        String optionValue = commandLine.getOptionValue("zkHost", "localhost:9983");
        String optionValue2 = commandLine.getOptionValue("collection", "collection1");
        Integer.parseInt(commandLine.getOptionValue("queueSize", "1000"));
        Integer.parseInt(commandLine.getOptionValue("numRunners", "2"));
        Integer.parseInt(commandLine.getOptionValue("pollQueueTime", "20"));
        SolrSupport.indexDocs(optionValue, optionValue2, 100, mapToPair.values().rdd());
        CloudSolrClient cachedCloudClient = SolrSupport.getCachedCloudClient(optionValue);
        cachedCloudClient.setDefaultCollection(optionValue2);
        cachedCloudClient.commit(true, true);
        return 0;
    }
}
