/*
 * Decompiled with CFR 0.152.
 */
package com.lucidworks.spark.example.ml;

import com.lucidworks.spark.SparkApp;
import com.lucidworks.spark.example.ml.MLPipelineScala$;
import com.lucidworks.spark.ml.feature.LuceneTextAnalyzerTransformer;
import org.apache.commons.cli.CommandLine;
import org.apache.spark.SparkConf;
import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.classification.NaiveBayes;
import org.apache.spark.ml.evaluation.Evaluator;
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
import org.apache.spark.ml.feature.HashingTF;
import org.apache.spark.ml.feature.IndexToString;
import org.apache.spark.ml.feature.StringIndexer;
import org.apache.spark.ml.feature.StringIndexerModel;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.tuning.CrossValidator;
import org.apache.spark.ml.tuning.CrossValidatorModel;
import org.apache.spark.ml.tuning.CrossValidatorModel$;
import org.apache.spark.ml.tuning.ParamGridBuilder;
import org.apache.spark.mllib.evaluation.MulticlassMetrics;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import scala.Array$;
import scala.Function1;
import scala.MatchError;
import scala.Option;
import scala.Predef;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Map;
import scala.collection.Seq;
import scala.collection.SeqLike;
import scala.collection.immutable.StringOps;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

@ScalaSignature(bytes="\u0006\u0001\u0005]b\u0001B\u0001\u0003\u00015\u0011q\"\u0014'QSB,G.\u001b8f'\u000e\fG.\u0019\u0006\u0003\u0007\u0011\t!!\u001c7\u000b\u0005\u00151\u0011aB3yC6\u0004H.\u001a\u0006\u0003\u000f!\tQa\u001d9be.T!!\u0003\u0006\u0002\u00151,8-\u001b3x_J\\7OC\u0001\f\u0003\r\u0019w.\\\u0002\u0001'\r\u0001a\u0002\u0006\t\u0003\u001fIi\u0011\u0001\u0005\u0006\u0002#\u0005)1oY1mC&\u00111\u0003\u0005\u0002\u0007\u0003:L(+\u001a4\u0011\u0005UIbB\u0001\f\u0018\u001b\u00051\u0011B\u0001\r\u0007\u0003!\u0019\u0006/\u0019:l\u0003B\u0004\u0018B\u0001\u000e\u001c\u00051\u0011F\t\u0012)s_\u000e,7o]8s\u0015\tAb\u0001C\u0003\u001e\u0001\u0011\u0005a$\u0001\u0004=S:LGO\u0010\u000b\u0002?A\u0011\u0001\u0005A\u0007\u0002\u0005!)!\u0005\u0001C\u0001G\u00059q-\u001a;OC6,G#\u0001\u0013\u0011\u0005\u0015RS\"\u0001\u0014\u000b\u0005\u001dB\u0013\u0001\u00027b]\u001eT\u0011!K\u0001\u0005U\u00064\u0018-\u0003\u0002,M\t11\u000b\u001e:j]\u001eDQ!\f\u0001\u0005\u00029\n!bZ3u\u001fB$\u0018n\u001c8t)\u0005y\u0003cA\b1e%\u0011\u0011\u0007\u0005\u0002\u0006\u0003J\u0014\u0018-\u001f\t\u0003gqj\u0011\u0001\u000e\u0006\u0003kY\n1a\u00197j\u0015\t9\u0004(A\u0004d_6lwN\\:\u000b\u0005eR\u0014AB1qC\u000eDWMC\u0001<\u0003\ry'oZ\u0005\u0003{Q\u0012aa\u00149uS>t\u0007\"B \u0001\t\u0003\u0002\u0015a\u0001:v]R\u0019\u0011\tR&\u0011\u0005=\u0011\u0015BA\"\u0011\u0005\rIe\u000e\u001e\u0005\u0006\u000bz\u0002\rAR\u0001\u0005G>tg\r\u0005\u0002H\u00136\t\u0001J\u0003\u0002\bq%\u0011!\n\u0013\u0002\n'B\f'o[\"p]\u001aDQ!\u000e A\u00021\u0003\"aM'\n\u00059#$aC\"p[6\fg\u000e\u001a'j]\u0016<Q\u0001\u0015\u0002\t\u0002E\u000bq\"\u0014'QSB,G.\u001b8f'\u000e\fG.\u0019\t\u0003AI3Q!\u0001\u0002\t\u0002M\u001b2A\u0015\bU!\tyQ+\u0003\u0002W!\ta1+\u001a:jC2L'0\u00192mK\")QD\u0015C\u00011R\t\u0011\u000bC\u0004[%\n\u0007I\u0011A.\u0002\u00111\u000b'-\u001a7D_2,\u0012\u0001\n\u0005\u0007;J\u0003\u000b\u0011\u0002\u0013\u0002\u00131\u000b'-\u001a7D_2\u0004\u0003bB0S\u0005\u0004%\taW\u0001\t/>\u0014Hm]\"pY\"1\u0011M\u0015Q\u0001\n\u0011\n\u0011bV8sIN\u001cu\u000e\u001c\u0011\t\u000f\r\u0014&\u0019!C\u00017\u0006i\u0001K]3eS\u000e$\u0018n\u001c8D_2Da!\u001a*!\u0002\u0013!\u0013A\u0004)sK\u0012L7\r^5p]\u000e{G\u000e\t\u0005\bOJ\u0013\r\u0011\"\u0001\\\u0003-1U-\u0019;ve\u0016\u001c8i\u001c7\t\r%\u0014\u0006\u0015!\u0003%\u000311U-\u0019;ve\u0016\u001c8i\u001c7!\u0011\u001dY'K1A\u0005\u0002m\u000b\u0011\u0003\u0015:fI&\u001cG/\u001a3MC\n,GnQ8m\u0011\u0019i'\u000b)A\u0005I\u0005\u0011\u0002K]3eS\u000e$X\r\u001a'bE\u0016d7i\u001c7!\u0011\u001dy'K1A\u0005\u0002m\u000bQ\u0002R3gCVdGOW6I_N$\bBB9SA\u0003%A%\u0001\bEK\u001a\fW\u000f\u001c;[W\"{7\u000f\u001e\u0011\t\u000fM\u0014&\u0019!C\u00017\u0006aA)\u001a4bk2$\u0018+^3ss\"1QO\u0015Q\u0001\n\u0011\nQ\u0002R3gCVdG/U;fef\u0004\u0003bB<S\u0005\u0004%\taW\u0001\u0012\t\u00164\u0017-\u001e7u\u0019\u0006\u0014W\r\u001c$jK2$\u0007BB=SA\u0003%A%\u0001\nEK\u001a\fW\u000f\u001c;MC\n,GNR5fY\u0012\u0004\u0003bB>S\u0005\u0004%\taW\u0001\u0015\t\u00164\u0017-\u001e7u\u0007>tG/\u001a8u\r&,G\u000eZ:\t\ru\u0014\u0006\u0015!\u0003%\u0003U!UMZ1vYR\u001cuN\u001c;f]R4\u0015.\u001a7eg\u0002Bqa *C\u0002\u0013\u00051,A\tEK\u001a\fW\u000f\u001c;D_2dWm\u0019;j_:Dq!a\u0001SA\u0003%A%\u0001\nEK\u001a\fW\u000f\u001c;D_2dWm\u0019;j_:\u0004\u0003\u0002CA\u0004%\n\u0007I\u0011A.\u0002\u001b\u0011+g-Y;miN\u000bW\u000e\u001d7f\u0011\u001d\tYA\u0015Q\u0001\n\u0011\na\u0002R3gCVdGoU1na2,\u0007\u0005C\u0005\u0002\u0010I\u0013\r\u0011\"\u0001\u0002\u0012\u0005\u0019r\u000b[5uKN\u0004\u0018mY3U_.\u001c6\r[3nCV\u0011\u00111\u0003\t\u0005\u0003+\tYBD\u0002\u0010\u0003/I1!!\u0007\u0011\u0003\u0019\u0001&/\u001a3fM&\u00191&!\b\u000b\u0007\u0005e\u0001\u0003\u0003\u0005\u0002\"I\u0003\u000b\u0011BA\n\u0003Q9\u0006.\u001b;fgB\f7-\u001a+pWN\u001b\u0007.Z7bA!I\u0011Q\u0005*C\u0002\u0013\u0005\u0011\u0011C\u0001\u0012'R$Gk\\6M_^,'oU2iK6\f\u0007\u0002CA\u0015%\u0002\u0006I!a\u0005\u0002%M#H\rV8l\u0019><XM]*dQ\u0016l\u0017\r\t\u0005\n\u0003[\u0011\u0016\u0011!C\u0005\u0003_\t1B]3bIJ+7o\u001c7wKR\u0011\u0011\u0011\u0007\t\u0004K\u0005M\u0012bAA\u001bM\t1qJ\u00196fGR\u0004")
public class MLPipelineScala
implements SparkApp.RDDProcessor {
    public static String StdTokLowerSchema() {
        return MLPipelineScala$.MODULE$.StdTokLowerSchema();
    }

    public static String WhitespaceTokSchema() {
        return MLPipelineScala$.MODULE$.WhitespaceTokSchema();
    }

    public static String DefaultSample() {
        return MLPipelineScala$.MODULE$.DefaultSample();
    }

    public static String DefaultCollection() {
        return MLPipelineScala$.MODULE$.DefaultCollection();
    }

    public static String DefaultContentFields() {
        return MLPipelineScala$.MODULE$.DefaultContentFields();
    }

    public static String DefaultLabelField() {
        return MLPipelineScala$.MODULE$.DefaultLabelField();
    }

    public static String DefaultQuery() {
        return MLPipelineScala$.MODULE$.DefaultQuery();
    }

    public static String DefaultZkHost() {
        return MLPipelineScala$.MODULE$.DefaultZkHost();
    }

    public static String PredictedLabelCol() {
        return MLPipelineScala$.MODULE$.PredictedLabelCol();
    }

    public static String FeaturesCol() {
        return MLPipelineScala$.MODULE$.FeaturesCol();
    }

    public static String PredictionCol() {
        return MLPipelineScala$.MODULE$.PredictionCol();
    }

    public static String WordsCol() {
        return MLPipelineScala$.MODULE$.WordsCol();
    }

    public static String LabelCol() {
        return MLPipelineScala$.MODULE$.LabelCol();
    }

    @Override
    public String getName() {
        return "ml-pipeline-scala";
    }

    @Override
    public org.apache.commons.cli.Option[] getOptions() {
        return (org.apache.commons.cli.Option[])((Object[])new org.apache.commons.cli.Option[]{org.apache.commons.cli.Option.builder().longOpt("query").hasArg().argName("QUERY").required(false).desc(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Query to identify documents in the training set. Default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{MLPipelineScala$.MODULE$.DefaultQuery()}))).build(), org.apache.commons.cli.Option.builder().longOpt("labelField").hasArg().argName("FIELD").required(false).desc(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Field containing the label in Solr training set documents. Default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{MLPipelineScala$.MODULE$.DefaultLabelField()}))).build(), org.apache.commons.cli.Option.builder().longOpt("contentFields").hasArg().argName("FIELDS").required(false).desc(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Comma-separated list of text field(s) in Solr training set documents. Default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{MLPipelineScala$.MODULE$.DefaultContentFields()}))).build(), org.apache.commons.cli.Option.builder().longOpt("sample").hasArg().argName("FRACTION").required(false).desc(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Fraction (0 to 1) of full dataset to sample from Solr. Default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{MLPipelineScala$.MODULE$.DefaultSample()}))).build(), org.apache.commons.cli.Option.builder().longOpt("collection").hasArg().argName("NAME").required(false).desc(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Solr source collection. Default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{MLPipelineScala$.MODULE$.DefaultCollection()}))).build()});
    }

    @Override
    public int run(SparkConf conf, CommandLine cli) {
        NaiveBayes nb;
        SparkSession sparkSession = SparkSession$.MODULE$.builder().config(conf).getOrCreate();
        String labelField = cli.getOptionValue("labelField", MLPipelineScala$.MODULE$.DefaultLabelField());
        String[] contentFields = (String[])Predef$.MODULE$.refArrayOps((Object[])cli.getOptionValue("contentFields", MLPipelineScala$.MODULE$.DefaultContentFields()).split(",")).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply(String x$1) {
                return x$1.trim();
            }
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        double sampleFraction = new StringOps(Predef$.MODULE$.augmentString(cli.getOptionValue("sample", MLPipelineScala$.MODULE$.DefaultSample()))).toDouble();
        scala.collection.immutable.Map options = (scala.collection.immutable.Map)Predef$.MODULE$.Map().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Tuple2[]{Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"zkhost"), (Object)cli.getOptionValue("zkHost", MLPipelineScala$.MODULE$.DefaultZkHost())), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"collection"), (Object)cli.getOptionValue("collection", MLPipelineScala$.MODULE$.DefaultCollection())), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"query"), (Object)cli.getOptionValue("query", MLPipelineScala$.MODULE$.DefaultQuery())), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"fields"), (Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"id,", ",", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{labelField, Predef$.MODULE$.refArrayOps((Object[])contentFields).mkString(",")})))}));
        Dataset solrData = sparkSession.read().format("solr").options((Map)options).load();
        Dataset sampledSolrData = solrData.sample(false, sampleFraction);
        StringIndexerModel labelIndexer = new StringIndexer().setInputCol(labelField).setOutputCol(MLPipelineScala$.MODULE$.LabelCol()).fit(sampledSolrData);
        LuceneTextAnalyzerTransformer analyzer = new LuceneTextAnalyzerTransformer().setInputCols(contentFields).setOutputCol(MLPipelineScala$.MODULE$.WordsCol());
        HashingTF hashingTF = new HashingTF().setInputCol(MLPipelineScala$.MODULE$.WordsCol()).setOutputCol(MLPipelineScala$.MODULE$.FeaturesCol());
        NaiveBayes estimatorStage = nb = new NaiveBayes();
        Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Using estimator: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{estimatorStage})));
        IndexToString labelConverter = new IndexToString().setInputCol(MLPipelineScala$.MODULE$.PredictionCol()).setOutputCol(MLPipelineScala$.MODULE$.PredictedLabelCol()).setLabels(labelIndexer.labels());
        Pipeline pipeline = new Pipeline().setStages((PipelineStage[])((Object[])new PipelineStage[]{labelIndexer, analyzer, hashingTF, estimatorStage, labelConverter}));
        Dataset[] datasetArray = sampledSolrData.randomSplit(new double[]{0.7, 0.3});
        Option option = Array$.MODULE$.unapplySeq((Object)datasetArray);
        if (!option.isEmpty() && option.get() != null && ((SeqLike)option.get()).lengthCompare(2) == 0) {
            Tuple2 tuple2;
            Dataset trainingData = (Dataset)((SeqLike)option.get()).apply(0);
            Dataset testData = (Dataset)((SeqLike)option.get()).apply(1);
            Tuple2 tuple22 = tuple2 = new Tuple2((Object)trainingData, (Object)testData);
            Dataset trainingData2 = (Dataset)tuple22._1();
            Dataset testData2 = (Dataset)tuple22._2();
            MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator().setLabelCol(MLPipelineScala$.MODULE$.LabelCol()).setPredictionCol(MLPipelineScala$.MODULE$.PredictionCol()).setMetricName("precision");
            ParamMap[] paramGrid = new ParamGridBuilder().addGrid(hashingTF.numFeatures(), new int[]{1000, 5000}).addGrid(analyzer.analysisSchema(), (Iterable)Predef$.MODULE$.wrapRefArray((Object[])new String[]{MLPipelineScala$.MODULE$.WhitespaceTokSchema(), MLPipelineScala$.MODULE$.StdTokLowerSchema()})).addGrid(analyzer.prefixTokensWithInputCol()).addGrid(nb.smoothing(), new double[]{1.0, 0.5}).build();
            CrossValidator cv = new CrossValidator().setEstimator((Estimator)pipeline).setEvaluator((Evaluator)evaluator).setEstimatorParamMaps(paramGrid).setNumFolds(3);
            CrossValidatorModel cvModel = cv.fit(trainingData2);
            cvModel.write().overwrite().save("ml-pipeline-model");
            CrossValidatorModel loadedCvModel = CrossValidatorModel$.MODULE$.load("ml-pipeline-model");
            Dataset predictions = loadedCvModel.transform(testData2);
            predictions.cache();
            double accuracyCrossFold = evaluator.evaluate(predictions);
            Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Cross-Fold Test Error = ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)(1.0 - accuracyCrossFold))})));
            Predef$.MODULE$.refArrayOps((Object[])predictions.select("id", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{labelField, MLPipelineScala$.MODULE$.PredictedLabelCol()})).sample(false, 0.1).collect()).foreach((Function1)new Serializable(this){
                public static final long serialVersionUID = 0L;

                public final void apply(Row r) {
                    Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", ": actual=", ", predicted=", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{r.apply(0), r.apply(1), r.apply(2)})));
                }
            });
            MulticlassMetrics metrics = new MulticlassMetrics(predictions.select(MLPipelineScala$.MODULE$.PredictionCol(), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{MLPipelineScala$.MODULE$.LabelCol()})).rdd().map((Function1)new Serializable(this){
                public static final long serialVersionUID = 0L;

                public final Tuple2<Object, Object> apply(Row r) {
                    return new Tuple2.mcDD.sp(r.getDouble(0), r.getDouble(1));
                }
            }, ClassTag$.MODULE$.apply(Tuple2.class)));
            Predef$.MODULE$.println((Object)new StringOps(Predef$.MODULE$.augmentString(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Confusion Matrix\n                          |", "\\n"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{metrics.confusionMatrix()})))).stripMargin());
            Predef$.MODULE$.println((Object)new StringOps(Predef$.MODULE$.augmentString(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\\nF-Measure: ", "\n                          |label\\tfpr\\n"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)metrics.fMeasure())})))).stripMargin());
            String[] labels = labelConverter.getLabels();
            Predef$.MODULE$.refArrayOps((Object[])labels).indices().foreach$mVc$sp((Function1)new Serializable(this, metrics, labels){
                public static final long serialVersionUID = 0L;
                private final MulticlassMetrics metrics$1;
                private final String[] labels$1;

                public final void apply(int i) {
                    this.apply$mcVI$sp(i);
                }

                public void apply$mcVI$sp(int i) {
                    Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "\\t", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.labels$1[i], BoxesRunTime.boxToDouble((double)this.metrics$1.falsePositiveRate((double)i))})));
                }
                {
                    this.metrics$1 = metrics$1;
                    this.labels$1 = labels$1;
                }
            });
            return 0;
        }
        throw new MatchError((Object)datasetArray);
    }
}

