/*
 * Decompiled with CFR 0.152.
 */
package com.lucidworks.spark.example.ml;

import com.lucidworks.spark.SparkApp;
import com.lucidworks.spark.example.ml.DateConverter$;
import com.lucidworks.spark.example.ml.NewsgroupsIndexer$;
import com.lucidworks.spark.util.SolrSupport$;
import com.typesafe.scalalogging.LazyLogging;
import com.typesafe.scalalogging.Logger;
import java.io.DataInputStream;
import java.io.InputStream;
import java.net.URI;
import org.apache.commons.cli.CommandLine;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.common.SolrInputDocument;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.input.PortableDataStream;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Serializable;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Iterator;
import scala.collection.JavaConversions$;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ListBuffer;
import scala.collection.mutable.ListBuffer$;
import scala.collection.mutable.StringBuilder;
import scala.io.Source$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BooleanRef;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.util.matching.Regex;

@ScalaSignature(bytes="\u0006\u0001\u0005uc\u0001B\u0001\u0003\u00015\u0011\u0011CT3xg\u001e\u0014x.\u001e9t\u0013:$W\r_3s\u0015\t\u0019A!\u0001\u0002nY*\u0011QAB\u0001\bKb\fW\u000e\u001d7f\u0015\t9\u0001\"A\u0003ta\u0006\u00148N\u0003\u0002\n\u0015\u0005QA.^2jI^|'o[:\u000b\u0003-\t1aY8n\u0007\u0001\u0019B\u0001\u0001\b\u00159A\u0011qBE\u0007\u0002!)\t\u0011#A\u0003tG\u0006d\u0017-\u0003\u0002\u0014!\t1\u0011I\\=SK\u001a\u0004\"!F\r\u000f\u0005Y9R\"\u0001\u0004\n\u0005a1\u0011\u0001C*qCJ\\\u0017\t\u001d9\n\u0005iY\"\u0001\u0004*E\tB\u0013xnY3tg>\u0014(B\u0001\r\u0007!\ti\"%D\u0001\u001f\u0015\ty\u0002%\u0001\u0007tG\u0006d\u0017\r\\8hO&twM\u0003\u0002\"\u0015\u0005AA/\u001f9fg\u00064W-\u0003\u0002$=\tYA*\u0019>z\u0019><w-\u001b8h\u0011\u0015)\u0003\u0001\"\u0001'\u0003\u0019a\u0014N\\5u}Q\tq\u0005\u0005\u0002)\u00015\t!\u0001C\u0003+\u0001\u0011\u00051&A\u0004hKRt\u0015-\\3\u0015\u00031\u0002\"!\f\u001a\u000e\u00039R!a\f\u0019\u0002\t1\fgn\u001a\u0006\u0002c\u0005!!.\u0019<b\u0013\t\u0019dF\u0001\u0004TiJLgn\u001a\u0005\u0006k\u0001!\tAN\u0001\u000bO\u0016$x\n\u001d;j_:\u001cH#A\u001c\u0011\u0007=A$(\u0003\u0002:!\t)\u0011I\u001d:bsB\u00111\bR\u0007\u0002y)\u0011QHP\u0001\u0004G2L'BA A\u0003\u001d\u0019w.\\7p]NT!!\u0011\"\u0002\r\u0005\u0004\u0018m\u00195f\u0015\u0005\u0019\u0015aA8sO&\u0011Q\t\u0010\u0002\u0007\u001fB$\u0018n\u001c8\t\u000b\u001d\u0003A\u0011\u0001%\u0002\u0007I,h\u000eF\u0002J\u0019N\u0003\"a\u0004&\n\u0005-\u0003\"aA%oi\")QJ\u0012a\u0001\u001d\u0006!1m\u001c8g!\ty\u0015+D\u0001Q\u0015\t9\u0001)\u0003\u0002S!\nI1\u000b]1sW\u000e{gN\u001a\u0005\u0006{\u0019\u0003\r\u0001\u0016\t\u0003wUK!A\u0016\u001f\u0003\u0017\r{W.\\1oI2Kg.\u001a\u0005\u00061\u0002!\t!W\u0001\u000ea\u0006\u00148/\u001a$jY\u0016\u0004\u0016\r\u001e5\u0015\u0007i+w\r\u0005\u0003\u00107v{\u0016B\u0001/\u0011\u0005\u0019!V\u000f\u001d7feA\u0019qBX0\n\u0005\u0015\u0003\u0002C\u00011d\u001d\ty\u0011-\u0003\u0002c!\u00051\u0001K]3eK\u001aL!a\r3\u000b\u0005\t\u0004\u0002\"\u00024X\u0001\u0004y\u0016\u0001\u00032bg\u0016\u0004\u0016\r\u001e5\t\u000b!<\u0006\u0019A0\u0002\u0011\u0019LG.\u001a)bi\"DQA\u001b\u0001\u0005\u0002-\fA\u0003\\8bI:+wo]4s_V\u0004\u0018I\u001d;jG2,GC\u00017u!\ti'/D\u0001o\u0015\ty\u0007/\u0001\u0004d_6lwN\u001c\u0006\u0003c\u0002\u000bAa]8me&\u00111O\u001c\u0002\u0012'>d'/\u00138qkR$unY;nK:$\b\"B;j\u0001\u00041\u0018AB:ue\u0016\fW\u000e\u0005\u0002xu6\t\u0001P\u0003\u0002z!\u0006)\u0011N\u001c9vi&\u00111\u0010\u001f\u0002\u0013!>\u0014H/\u00192mK\u0012\u000bG/Y*ue\u0016\fWnB\u0003~\u0005!\u0005a0A\tOK^\u001cxM]8vaNLe\u000eZ3yKJ\u0004\"\u0001K@\u0007\r\u0005\u0011\u0001\u0012AA\u0001'\u0011yh\"a\u0001\u0011\u0007=\t)!C\u0002\u0002\bA\u0011AbU3sS\u0006d\u0017N_1cY\u0016Da!J@\u0005\u0002\u0005-A#\u0001@\t\u0013\u0005=qP1A\u0005\u0002\u0005E\u0011!\u0004#fM\u0006,H\u000e\u001e.l\u0011>\u001cH/F\u0001-\u0011\u001d\t)b Q\u0001\n1\na\u0002R3gCVdGOW6I_N$\b\u0005C\u0005\u0002\u001a}\u0014\r\u0011\"\u0001\u0002\u0012\u0005\u0001B)\u001a4bk2$()\u0019;dQNK'0\u001a\u0005\b\u0003;y\b\u0015!\u0003-\u0003E!UMZ1vYR\u0014\u0015\r^2i'&TX\r\t\u0005\n\u0003Cy(\u0019!C\u0001\u0003#\t\u0011\u0003R3gCVdGoQ8mY\u0016\u001cG/[8o\u0011\u001d\t)c Q\u0001\n1\n!\u0003R3gCVdGoQ8mY\u0016\u001cG/[8oA!I\u0011\u0011F@C\u0002\u0013\u0005\u00111F\u0001\u0011\u001d>t\u0007,\u001c7DQ\u0006\u00148OU3hKb,\"!!\f\u0011\t\u0005=\u0012\u0011H\u0007\u0003\u0003cQA!a\r\u00026\u0005AQ.\u0019;dQ&twMC\u0002\u00028A\tA!\u001e;jY&!\u00111HA\u0019\u0005\u0015\u0011VmZ3y\u0011!\tyd Q\u0001\n\u00055\u0012!\u0005(p]bkGn\u00115beN\u0014VmZ3yA!I\u00111I@C\u0002\u0013\u0005\u00111F\u0001\u0015\u001d\u0016<8o\u001a:pkBDU-\u00193feJ+w-\u001a=\t\u0011\u0005\u001ds\u0010)A\u0005\u0003[\tQCT3xg\u001e\u0014x.\u001e9IK\u0006$WM\u001d*fO\u0016D\b\u0005C\u0005\u0002L}\u0014\r\u0011\"\u0001\u0002,\u0005)bj\u001c8BYBD\u0017MT;n\u0007\"\f'o\u001d*fO\u0016D\b\u0002CA(\u007f\u0002\u0006I!!\f\u0002-9{g.\u00117qQ\u0006tU/\\\"iCJ\u001c(+Z4fq\u0002B\u0011\"a\u0015\u0000\u0003\u0003%I!!\u0016\u0002\u0017I,\u0017\r\u001a*fg>dg/\u001a\u000b\u0003\u0003/\u00022!LA-\u0013\r\tYF\f\u0002\u0007\u001f\nTWm\u0019;")
public class NewsgroupsIndexer
implements SparkApp.RDDProcessor,
LazyLogging {
    private final Logger logger;
    private volatile boolean bitmap$0;

    public static Regex NonAlphaNumCharsRegex() {
        return NewsgroupsIndexer$.MODULE$.NonAlphaNumCharsRegex();
    }

    public static Regex NewsgroupHeaderRegex() {
        return NewsgroupsIndexer$.MODULE$.NewsgroupHeaderRegex();
    }

    public static Regex NonXmlCharsRegex() {
        return NewsgroupsIndexer$.MODULE$.NonXmlCharsRegex();
    }

    public static String DefaultCollection() {
        return NewsgroupsIndexer$.MODULE$.DefaultCollection();
    }

    public static String DefaultBatchSize() {
        return NewsgroupsIndexer$.MODULE$.DefaultBatchSize();
    }

    public static String DefaultZkHost() {
        return NewsgroupsIndexer$.MODULE$.DefaultZkHost();
    }

    private Logger logger$lzycompute() {
        NewsgroupsIndexer newsgroupsIndexer = this;
        synchronized (newsgroupsIndexer) {
            if (!this.bitmap$0) {
                this.logger = LazyLogging.class.logger((LazyLogging)this);
                this.bitmap$0 = true;
            }
            return this.logger;
        }
    }

    public Logger logger() {
        return this.bitmap$0 ? this.logger : this.logger$lzycompute();
    }

    @Override
    public String getName() {
        return "newsgroups2solr";
    }

    @Override
    public org.apache.commons.cli.Option[] getOptions() {
        return (org.apache.commons.cli.Option[])((Object[])new org.apache.commons.cli.Option[]{org.apache.commons.cli.Option.builder().longOpt("path").hasArg().argName("PATH").required().desc("Path from which to recursively load newsgroup articles").build(), org.apache.commons.cli.Option.builder().longOpt("collection").hasArg().argName("NAME").required(false).desc("Target Solr collection; default: $DefaultCollection").build()});
    }

    @Override
    public int run(SparkConf conf, CommandLine cli) {
        String path = cli.getOptionValue("path");
        String collection = cli.getOptionValue("collection", NewsgroupsIndexer$.MODULE$.DefaultCollection());
        String zkHost = cli.getOptionValue("zkHost", NewsgroupsIndexer$.MODULE$.DefaultZkHost());
        int batchSize = new StringOps(Predef$.MODULE$.augmentString(cli.getOptionValue("batchSize", NewsgroupsIndexer$.MODULE$.DefaultBatchSize()))).toInt();
        SparkContext sc = new SparkContext(conf);
        sc.hadoopConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
        sc.binaryFiles(path, sc.binaryFiles$default$2()).foreachPartition((Function1)new Serializable(this, path, collection, zkHost, batchSize){
            public static final long serialVersionUID = 0L;
            private final /* synthetic */ NewsgroupsIndexer $outer;
            public final String path$1;
            private final String collection$1;
            private final String zkHost$1;
            public final int batchSize$1;

            public final void apply(Iterator<Tuple2<String, PortableDataStream>> rows) {
                IntRef numDocs = IntRef.create((int)0);
                CloudSolrClient solrServer = SolrSupport$.MODULE$.getCachedCloudClient(this.zkHost$1);
                ListBuffer batch = (ListBuffer)ListBuffer$.MODULE$.empty();
                rows.foreach((Function1)new Serializable(this, numDocs, solrServer, batch){
                    public static final long serialVersionUID = 0L;
                    private final /* synthetic */ $anonfun$run$1 $outer;
                    private final IntRef numDocs$1;
                    private final CloudSolrClient solrServer$1;
                    private final ListBuffer batch$1;

                    public final void apply(Tuple2<String, PortableDataStream> row) {
                        Tuple2<Option<String>, String> tuple2 = this.$outer.com$lucidworks$spark$example$ml$NewsgroupsIndexer$$anonfun$$$outer().parseFilePath(this.$outer.path$1, (String)row._1());
                        if (tuple2 != null) {
                            Tuple2 tuple22;
                            Option group = (Option)tuple2._1();
                            String articleNum = (String)tuple2._2();
                            Tuple2 tuple23 = tuple22 = new Tuple2((Object)group, (Object)articleNum);
                            Option group2 = (Option)tuple23._1();
                            String articleNum2 = (String)tuple23._2();
                            SolrInputDocument doc = this.$outer.com$lucidworks$spark$example$ml$NewsgroupsIndexer$$anonfun$$$outer().loadNewsgroupArticle((PortableDataStream)row._2());
                            Object newsgroup = group2.getOrElse((Function0)new Serializable(this, doc){
                                public static final long serialVersionUID = 0L;
                                private final SolrInputDocument doc$1;

                                public final Object apply() {
                                    return JavaConversions$.MODULE$.collectionAsScalaIterable(this.doc$1.getFieldValues("Newsgroups_ss")).head();
                                }
                                {
                                    this.doc$1 = doc$1;
                                }
                            });
                            doc.addField("id", (Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "_", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{newsgroup, articleNum2})));
                            doc.addField("newsgroup_s", newsgroup);
                            doc.addField("filepath_s", row._1());
                            this.batch$1.$plus$eq((Object)doc);
                            if (this.batch$1.size() >= this.$outer.batchSize$1) {
                                this.$outer.com$lucidworks$spark$example$ml$NewsgroupsIndexer$$anonfun$$sendBatch$1(this.numDocs$1, this.solrServer$1, this.batch$1);
                            }
                            return;
                        }
                        throw new MatchError(tuple2);
                    }
                    {
                        if ($outer == null) {
                            throw null;
                        }
                        this.$outer = $outer;
                        this.numDocs$1 = numDocs$1;
                        this.solrServer$1 = solrServer$1;
                        this.batch$1 = batch$1;
                    }
                });
                if (batch.nonEmpty()) {
                    this.com$lucidworks$spark$example$ml$NewsgroupsIndexer$$anonfun$$sendBatch$1(numDocs, solrServer, batch);
                }
            }

            public /* synthetic */ NewsgroupsIndexer com$lucidworks$spark$example$ml$NewsgroupsIndexer$$anonfun$$$outer() {
                return this.$outer;
            }

            public final void com$lucidworks$spark$example$ml$NewsgroupsIndexer$$anonfun$$sendBatch$1(IntRef numDocs$1, CloudSolrClient solrServer$1, ListBuffer batch$1) {
                BoxedUnit boxedUnit;
                SolrSupport$.MODULE$.sendBatchToSolr((SolrClient)solrServer$1, this.collection$1, (Iterable<SolrInputDocument>)batch$1.toList());
                numDocs$1.elem += batch$1.size();
                if (this.$outer.logger().underlying().isInfoEnabled()) {
                    this.$outer.logger().underlying().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Sent ", " docs to Solr from ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)numDocs$1.elem), this.path$1})));
                    boxedUnit = BoxedUnit.UNIT;
                } else {
                    boxedUnit = BoxedUnit.UNIT;
                }
                batch$1.clear();
            }
            {
                if ($outer == null) {
                    throw null;
                }
                this.$outer = $outer;
                this.path$1 = path$1;
                this.collection$1 = collection$1;
                this.zkHost$1 = zkHost$1;
                this.batchSize$1 = batchSize$1;
            }
        });
        CloudSolrClient solrServer = SolrSupport$.MODULE$.getCachedCloudClient(zkHost);
        solrServer.commit(collection, true, true);
        sc.stop();
        return 0;
    }

    public Tuple2<Option<String>, String> parseFilePath(String basePath, String filePath) {
        String[] segments = (String[])Predef$.MODULE$.refArrayOps((Object[])new StringOps(Predef$.MODULE$.augmentString(new URI(basePath).relativize(new URI(filePath)).getPath())).split('/')).reverse();
        String articleNum = segments[0];
        None$ newsgroup = segments.length > 1 ? new Some((Object)segments[1]) : None$.MODULE$;
        return new Tuple2((Object)newsgroup, (Object)articleNum);
    }

    /*
     * WARNING - void declaration
     */
    public SolrInputDocument loadNewsgroupArticle(PortableDataStream stream) {
        void var2_2;
        SolrInputDocument doc = new SolrInputDocument(new String[0]);
        DataInputStream inputStream = stream.open();
        try {
            BooleanRef noMoreHeaders = BooleanRef.create((boolean)false);
            StringBuilder content = new StringBuilder();
            Source$.MODULE$.fromInputStream((InputStream)inputStream, "ISO-8859-1").getLines().foreach((Function1)new Serializable(this, doc, noMoreHeaders, content){
                public static final long serialVersionUID = 0L;
                public final SolrInputDocument doc$2;
                private final BooleanRef noMoreHeaders$1;
                private final StringBuilder content$1;

                public final Object apply(String line) {
                    Option option;
                    block15: {
                        StringBuilder stringBuilder;
                        block12: {
                            StringBuilder stringBuilder2;
                            block14: {
                                block13: {
                                    String cleanedLine;
                                    block11: {
                                        cleanedLine = NewsgroupsIndexer$.MODULE$.NonXmlCharsRegex().replaceAllIn((CharSequence)line, " ");
                                        if (!this.noMoreHeaders$1.elem) break block11;
                                        stringBuilder = this.content$1.append(cleanedLine).append("\n");
                                        break block12;
                                    }
                                    option = NewsgroupsIndexer$.MODULE$.NewsgroupHeaderRegex().findFirstMatchIn((CharSequence)cleanedLine);
                                    if (!None$.MODULE$.equals(option)) break block13;
                                    this.noMoreHeaders$1.elem = true;
                                    stringBuilder2 = this.content$1.append(cleanedLine).append("\n");
                                    break block14;
                                }
                                if (!(option instanceof Some)) break block15;
                                Some some = (Some)option;
                                Regex.Match fieldValue = (Regex.Match)some.x();
                                String field = NewsgroupsIndexer$.MODULE$.NonAlphaNumCharsRegex().replaceAllIn((CharSequence)fieldValue.group(1), "_");
                                String value = fieldValue.group(2);
                                String string = field;
                                if ("Message-ID".equals(string)) {
                                    this.doc$2.addField(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "_s"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{field})), (Object)value.trim());
                                    BoxedUnit boxedUnit = BoxedUnit.UNIT;
                                } else {
                                    boolean bl = "From".equals(string) ? true : ("Subject".equals(string) ? true : "Sender".equals(string));
                                    if (bl) {
                                        this.doc$2.addField(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "_txt_en"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{field})), (Object)value.trim());
                                        BoxedUnit boxedUnit = BoxedUnit.UNIT;
                                    } else if ("Newsgroups".equals(string)) {
                                        Predef$.MODULE$.refArrayOps((Object[])Predef$.MODULE$.refArrayOps((Object[])Predef$.MODULE$.refArrayOps((Object[])value.split(",")).map((Function1)new Serializable(this){
                                            public static final long serialVersionUID = 0L;

                                            public final String apply(String x$2) {
                                                return x$2.trim();
                                            }
                                        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).filter((Function1)new Serializable(this){
                                            public static final long serialVersionUID = 0L;

                                            public final boolean apply(String x$3) {
                                                return x$3.length() > 0;
                                            }
                                        })).foreach((Function1)new Serializable(this, field){
                                            public static final long serialVersionUID = 0L;
                                            private final /* synthetic */ $anonfun$loadNewsgroupArticle$1 $outer;
                                            private final String field$1;

                                            public final void apply(String newsgroup) {
                                                this.$outer.doc$2.addField(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "_ss"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.field$1})), (Object)newsgroup);
                                            }
                                            {
                                                if ($outer == null) {
                                                    throw null;
                                                }
                                                this.$outer = $outer;
                                                this.field$1 = field$1;
                                            }
                                        });
                                        BoxedUnit boxedUnit = BoxedUnit.UNIT;
                                    } else if ("Date".equals(string)) {
                                        String trimmedValue = value.trim();
                                        this.doc$2.addField(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "_s"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{field})), (Object)trimmedValue);
                                        DateConverter$.MODULE$.toISO8601(trimmedValue).foreach((Function1)new Serializable(this, field){
                                            public static final long serialVersionUID = 0L;
                                            private final /* synthetic */ $anonfun$loadNewsgroupArticle$1 $outer;
                                            private final String field$1;

                                            public final void apply(String x$4) {
                                                this.$outer.doc$2.addField(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "_tdt"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.field$1})), (Object)x$4);
                                            }
                                            {
                                                if ($outer == null) {
                                                    throw null;
                                                }
                                                this.$outer = $outer;
                                                this.field$1 = field$1;
                                            }
                                        });
                                        BoxedUnit boxedUnit = BoxedUnit.UNIT;
                                    } else {
                                        this.doc$2.addField(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "_txt"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{field})), (Object)value);
                                        BoxedUnit boxedUnit = BoxedUnit.UNIT;
                                    }
                                }
                                stringBuilder2 = BoxedUnit.UNIT;
                            }
                            stringBuilder = stringBuilder2;
                        }
                        return stringBuilder;
                    }
                    throw new MatchError((Object)option);
                }
                {
                    this.doc$2 = doc$2;
                    this.noMoreHeaders$1 = noMoreHeaders$1;
                    this.content$1 = content$1;
                }
            });
            doc.addField("content_txt_en", (Object)content.toString());
        }
        catch (Throwable throwable) {
            void var3_3;
            var3_3.close();
            throw throwable;
        }
        inputStream.close();
        return var2_2;
    }

    public NewsgroupsIndexer() {
        LazyLogging.class.$init$((LazyLogging)this);
    }
}

