package com.lucidworks.spark.example.ml;

import com.lucidworks.spark.SparkApp;
import com.lucidworks.spark.analysis.LuceneTextAnalyzer;
import com.lucidworks.spark.fusion.FusionMLModelSupport;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.spark.SparkConf;
import org.apache.spark.mllib.classification.SVMModel;
import org.apache.spark.mllib.classification.SVMWithSGD$;
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
import org.apache.spark.mllib.feature.HashingTF;
import org.apache.spark.mllib.feature.Normalizer;
import org.apache.spark.mllib.feature.StandardScaler;
import org.apache.spark.mllib.feature.StandardScalerModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructType$;
import org.apache.spark.storage.StorageLevel$;
import scala.Array$;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.JavaConverters$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.HashMap;
import scala.collection.immutable.HashMap$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.RichInt$;

/* compiled from: SVMExample.scala */
@ScalaSignature(bytes = "\u0006\u0001A<Q!\u0001\u0002\t\u00025\t!b\u0015,N\u000bb\fW\u000e\u001d7f\u0015\t\u0019A!\u0001\u0002nY*\u0011QAB\u0001\bKb\fW\u000e\u001d7f\u0015\t9\u0001\"A\u0003ta\u0006\u00148N\u0003\u0002\n\u0015\u0005QA.^2jI^|'o[:\u000b\u0003-\t1aY8n\u0007\u0001\u0001\"AD\b\u000e\u0003\t1Q\u0001\u0005\u0002\t\u0002E\u0011!b\u0015,N\u000bb\fW\u000e\u001d7f'\ry!\u0003\u0007\t\u0003'Yi\u0011\u0001\u0006\u0006\u0002+\u0005)1oY1mC&\u0011q\u0003\u0006\u0002\u0007\u0003:L(+\u001a4\u0011\u0005MI\u0012B\u0001\u000e\u0015\u00051\u0019VM]5bY&T\u0018M\u00197f\u0011\u0015ar\u0002\"\u0001\u001e\u0003\u0019a\u0014N\\5u}Q\tQ\u0002C\u0004 \u001f\t\u0007I\u0011\u0001\u0011\u0002)\u0011+e)Q+M)~sU+T0G\u000b\u0006#VKU#T+\u0005\t\u0003C\u0001\u0012(\u001b\u0005\u0019#B\u0001\u0013&\u0003\u0011a\u0017M\\4\u000b\u0003\u0019\nAA[1wC&\u0011\u0001f\t\u0002\u0007'R\u0014\u0018N\\4\t\r)z\u0001\u0015!\u0003\"\u0003U!UIR!V\u0019R{f*V'`\r\u0016\u000bE+\u0016*F'\u0002Bq\u0001L\bC\u0002\u0013\u0005\u0001%\u0001\fE\u000b\u001a\u000bU\u000b\u0014+`\u001dVku,\u0013+F%\u0006#\u0016j\u0014(T\u0011\u0019qs\u0002)A\u0005C\u00059B)\u0012$B+2#vLT+N?&#VIU!U\u0013>s5\u000b\t\u0005\ba=\u0011\r\u0011\"\u0001!\u00035!UMZ1vYRT6\u000eS8ti\"1!g\u0004Q\u0001\n\u0005\na\u0002R3gCVdGOW6I_N$\b\u0005C\u00045\u001f\t\u0007I\u0011\u0001\u0011\u0002#\u0011+g-Y;mi\u000e{G\u000e\\3di&|g\u000e\u0003\u00047\u001f\u0001\u0006I!I\u0001\u0013\t\u00164\u0017-\u001e7u\u0007>dG.Z2uS>t\u0007\u0005C\u00049\u001f\u0005\u0005I\u0011B\u001d\u0002\u0017I,\u0017\r\u001a*fg>dg/\u001a\u000b\u0002uA\u0011!eO\u0005\u0003y\r\u0012aa\u00142kK\u000e$h\u0001\u0002\t\u0003\u0001y\u001a2!\u0010\n@!\t\u0001EI\u0004\u0002B\u00056\ta!\u0003\u0002D\r\u0005A1\u000b]1sW\u0006\u0003\b/\u0003\u0002F\r\na!\u000b\u0012#Qe>\u001cWm]:pe*\u00111I\u0002\u0005\u00069u\"\t\u0001\u0013\u000b\u0002\u0013B\u0011a\"\u0010\u0005\u0006\u0017v\"\t\u0001T\u0001\bO\u0016$h*Y7f)\u0005\t\u0003\"\u0002(>\t\u0003y\u0015AC4fi>\u0003H/[8ogR\t\u0001\u000bE\u0002\u0014#NK!A\u0015\u000b\u0003\u000b\u0005\u0013(/Y=\u0011\u0005QkV\"A+\u000b\u0005Y;\u0016aA2mS*\u0011\u0001,W\u0001\bG>lWn\u001c8t\u0015\tQ6,\u0001\u0004ba\u0006\u001c\u0007.\u001a\u0006\u00029\u0006\u0019qN]4\n\u0005y+&AB(qi&|g\u000eC\u0003a{\u0011\u0005\u0013-A\u0002sk:$2AY3m!\t\u00192-\u0003\u0002e)\t\u0019\u0011J\u001c;\t\u000b\u0019|\u0006\u0019A4\u0002\t\r|gN\u001a\t\u0003Q*l\u0011!\u001b\u0006\u0003\u000feK!a[5\u0003\u0013M\u0003\u0018M]6D_:4\u0007\"\u0002,`\u0001\u0004i\u0007C\u0001+o\u0013\tyWKA\u0006D_6l\u0017M\u001c3MS:,\u0007")
/* loaded from: input_file:com/lucidworks/spark/example/ml/SVMExample.class */
public class SVMExample implements SparkApp.RDDProcessor {
    public static String DefaultCollection() {
        return SVMExample$.MODULE$.DefaultCollection();
    }

    public static String DefaultZkHost() {
        return SVMExample$.MODULE$.DefaultZkHost();
    }

    public static String DEFAULT_NUM_ITERATIONS() {
        return SVMExample$.MODULE$.DEFAULT_NUM_ITERATIONS();
    }

    public static String DEFAULT_NUM_FEATURES() {
        return SVMExample$.MODULE$.DEFAULT_NUM_FEATURES();
    }

    @Override // com.lucidworks.spark.SparkApp.RDDProcessor
    public String getName() {
        return "mllib-svm-scala";
    }

    @Override // com.lucidworks.spark.SparkApp.RDDProcessor
    public Option[] getOptions() {
        return new Option[]{Option.builder().longOpt("indexTrainingData").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Path to training data to index"})).s(Nil$.MODULE$)).build(), Option.builder().longOpt("indexTestData").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Path to test data to index"})).s(Nil$.MODULE$)).build(), Option.builder().longOpt("sample").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Fraction (0 to 1) of full dataset to sample from Solr, default is 1"})).s(Nil$.MODULE$)).build(), Option.builder().longOpt("numFeatures").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Number of features; default is ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{SVMExample$.MODULE$.DEFAULT_NUM_FEATURES()}))).build(), Option.builder().longOpt("numIterations").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Number of iterations; default is ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{SVMExample$.MODULE$.DEFAULT_NUM_ITERATIONS()}))).build(), Option.builder().longOpt("modelOutput").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Model output path; default is mllib-svm-sentiment"})).s(Nil$.MODULE$)).build(), Option.builder().longOpt("fusionHostAndPort").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Fusion host and port; Example localhost:8764"})).s(Nil$.MODULE$)).build(), Option.builder().longOpt("fusionUser").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Fusion user name"})).s(Nil$.MODULE$)).build(), Option.builder().longOpt("fusionPassword").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Fusion password"})).s(Nil$.MODULE$)).build(), Option.builder().longOpt("fusionRealm").hasArg().required(false).desc(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Fusion Realm"})).s(Nil$.MODULE$)).build()};
    }

    @Override // com.lucidworks.spark.SparkApp.RDDProcessor
    public int run(SparkConf sparkConf, CommandLine commandLine) {
        SparkSession orCreate = SparkSession$.MODULE$.builder().config(sparkConf).getOrCreate();
        StructType apply = StructType$.MODULE$.apply(Nil$.MODULE$.$colon$colon(new StructField("tweet_txt", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())).$colon$colon(new StructField("username", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())).$colon$colon(new StructField("query", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())).$colon$colon(new StructField("date", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())).$colon$colon(new StructField("id", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())).$colon$colon(new StructField("polarity", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())));
        HashMap apply2 = HashMap$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("zkhost"), commandLine.getOptionValue("zkHost", SVMExample$.MODULE$.DefaultZkHost())), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("collection"), commandLine.getOptionValue("collection", SVMExample$.MODULE$.DefaultCollection())), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("soft_commit_secs"), "10")}));
        String optionValue = commandLine.getOptionValue("indexTrainingData");
        if (optionValue != null) {
            orCreate.read().format("com.databricks.spark.csv").schema(apply).option("header", "false").load(optionValue).repartition(4).write().format("solr").options(apply2).mode(SaveMode.Overwrite).save();
        }
        String optionValue2 = commandLine.getOptionValue("indexTestData");
        if (optionValue2 != null) {
            orCreate.read().format("com.databricks.spark.csv").schema(apply).option("header", "false").load(optionValue2).withColumnRenamed("polarity", "test_polarity").write().format("solr").options(apply2).mode(SaveMode.Overwrite).save();
        }
        Dataset sample = orCreate.read().format("solr").options(HashMap$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("zkhost"), commandLine.getOptionValue("zkHost", SVMExample$.MODULE$.DefaultZkHost())), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("collection"), commandLine.getOptionValue("collection", SVMExample$.MODULE$.DefaultCollection())), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("query"), "+polarity:(0 OR 4) +tweet_txt:[* TO *]"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("fields"), "id,polarity,tweet_txt"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("rows"), "10000"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("splits"), "true"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("split_field"), "_version_"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("splits_per_shard"), "8")}))).load().sample(false, new StringOps(Predef$.MODULE$.augmentString(commandLine.getOptionValue("sample", "1.0"))).toDouble());
        String[] strArr = (String[]) Predef$.MODULE$.refArrayOps("tweet_txt".split(" ")).map(new SVMExample$$anonfun$1(this), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        int i = new StringOps(Predef$.MODULE$.augmentString(commandLine.getOptionValue("numFeatures", SVMExample$.MODULE$.DEFAULT_NUM_FEATURES()))).toInt();
        int i2 = new StringOps(Predef$.MODULE$.augmentString(commandLine.getOptionValue("numIterations", SVMExample$.MODULE$.DEFAULT_NUM_ITERATIONS()))).toInt();
        RDD map = sample.rdd().map(new SVMExample$$anonfun$2(this, strArr, "{ \"analyzers\": [{ \"name\": \"std_tok_lower\", \"tokenizer\": { \"type\": \"standard\" },\n                \"filters\": [{ \"type\": \"lowercase\" }]}],\n  \"fields\": [{ \"regex\": \".+\", \"analyzer\": \"std_tok_lower\" }]}\n", i), ClassTag$.MODULE$.apply(LabeledPoint.class));
        ObjectRef create = ObjectRef.create(new StandardScaler().fit(map.map(new SVMExample$$anonfun$3(this), ClassTag$.MODULE$.apply(Vector.class))));
        SVMModel train = SVMWithSGD$.MODULE$.train(map.map(new SVMExample$$anonfun$4(this, create), ClassTag$.MODULE$.apply(LabeledPoint.class)).persist(StorageLevel$.MODULE$.MEMORY_ONLY_SER()), i2);
        Dataset withColumnRenamed = orCreate.read().format("solr").options(HashMap$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("zkhost"), commandLine.getOptionValue("zkHost", SVMExample$.MODULE$.DefaultZkHost())), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("collection"), commandLine.getOptionValue("collection", SVMExample$.MODULE$.DefaultCollection())), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("query"), "+test_polarity:[* TO *] +tweet_txt:[* TO *]"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("fields"), "id,test_polarity,tweet_txt")}))).load().withColumnRenamed("test_polarity", "polarity");
        withColumnRenamed.show();
        Predef$.MODULE$.println(new StringBuilder().append("Area under ROC = ").append(BoxesRunTime.boxToDouble(new BinaryClassificationMetrics(withColumnRenamed.rdd().map(new SVMExample$$anonfun$5(this, strArr, "{ \"analyzers\": [{ \"name\": \"std_tok_lower\", \"tokenizer\": { \"type\": \"standard\" },\n                \"filters\": [{ \"type\": \"lowercase\" }]}],\n  \"fields\": [{ \"regex\": \".+\", \"analyzer\": \"std_tok_lower\" }]}\n", i), ClassTag$.MODULE$.apply(LabeledPoint.class)).map(new SVMExample$$anonfun$6(this, create), ClassTag$.MODULE$.apply(LabeledPoint.class)).map(new SVMExample$$anonfun$7(this, train), ClassTag$.MODULE$.apply(Tuple2.class))).areaUnderROC())).toString());
        if (commandLine.getOptionValue("fusionHostAndPort") == null) {
            train.save(orCreate.sparkContext(), commandLine.getOptionValue("modelOutput", "mllib-svm-sentiment"));
            return 0;
        }
        java.util.HashMap hashMap = new java.util.HashMap();
        hashMap.put("numFeatures", "1000000");
        hashMap.put("featureFields", "tweet_txt");
        hashMap.put("analyzerJson", "{ \"analyzers\": [{ \"name\": \"std_tok_lower\", \"tokenizer\": { \"type\": \"standard\" },\n                \"filters\": [{ \"type\": \"lowercase\" }]}],\n  \"fields\": [{ \"regex\": \".+\", \"analyzer\": \"std_tok_lower\" }]}\n");
        hashMap.put("normalizer", "Y");
        hashMap.put("standardscaler", "Y");
        hashMap.put("mean", ((StandardScalerModel) create.elem).mean().toString());
        hashMap.put("std", ((StandardScalerModel) create.elem).std().toString());
        if (commandLine.getOptionValue("fusionUser") == null || commandLine.getOptionValue("fusionPassword") == null) {
            FusionMLModelSupport.saveModelInLocalFusion(orCreate.sparkContext(), commandLine.getOptionValue("modelOutput", "mllib-svm-sentiment"), train, hashMap);
            return 0;
        }
        FusionMLModelSupport.saveModelInFusion(commandLine.getOptionValue("fusionHostAndPort"), commandLine.getOptionValue("fusionUser"), commandLine.getOptionValue("fusionPassword"), commandLine.getOptionValue("fusionRealm", "native"), orCreate.sparkContext(), commandLine.getOptionValue("modelOutput", "mllib-svm-sentiment"), train, hashMap);
        return 0;
    }

    public final LabeledPoint com$lucidworks$spark$example$ml$SVMExample$$RowtoLab$1(Row row, int i, String[] strArr, String str) {
        LuceneTextAnalyzer luceneTextAnalyzer = new LuceneTextAnalyzer(str);
        HashingTF hashingTF = new HashingTF(i);
        Normalizer normalizer = new Normalizer();
        String string = row.getString(row.fieldIndex("polarity"));
        ObjectRef create = ObjectRef.create(new java.util.HashMap());
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), strArr.length).foreach(new SVMExample$$anonfun$com$lucidworks$spark$example$ml$SVMExample$$RowtoLab$1$1(this, row, strArr, create));
        Map<String, List<String>> analyzeJava = luceneTextAnalyzer.analyzeJava((java.util.HashMap) create.elem);
        ObjectRef create2 = ObjectRef.create(new LinkedList());
        ((TraversableOnce) JavaConverters$.MODULE$.collectionAsScalaIterableConverter(analyzeJava.values()).asScala()).toList().foreach(new SVMExample$$anonfun$com$lucidworks$spark$example$ml$SVMExample$$RowtoLab$1$2(this, create2));
        return new LabeledPoint("0".equals(string) ? 0 : 1, normalizer.transform(hashingTF.transform((LinkedList) create2.elem)));
    }
}
