/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.streaming.tools;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.commons.cli2.util.HelpFormatter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.clustering.ClusteringUtils;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.clustering.streaming.mapreduce.CentroidWritable;
import org.apache.mahout.clustering.streaming.tools.IOUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.stats.OnlineSummarizer;

public class ClusterQualitySummarizer {
    private String outputFile;
    private PrintWriter fileOut;
    private String trainFile;
    private String testFile;
    private String centroidFile;
    private String centroidCompareFile;
    private boolean mahoutKMeansFormat;
    private boolean mahoutKMeansFormatCompare;
    private DistanceMeasure distanceMeasure = new SquaredEuclideanDistanceMeasure();

    public void printSummaries(List<OnlineSummarizer> summarizers, String type) {
        ClusterQualitySummarizer.printSummaries(summarizers, type, this.fileOut);
    }

    public static void printSummaries(List<OnlineSummarizer> summarizers, String type, PrintWriter fileOut) {
        double maxDistance = 0.0;
        for (int i = 0; i < summarizers.size(); ++i) {
            boolean moreThanOne;
            OnlineSummarizer summarizer = summarizers.get(i);
            if (summarizer.getCount() == 0) {
                System.out.printf("Cluster %d is empty\n", i);
                continue;
            }
            maxDistance = Math.max(maxDistance, summarizer.getMax());
            System.out.printf("Average distance in cluster %d [%d]: %f\n", i, summarizer.getCount(), summarizer.getMean());
            boolean bl = moreThanOne = summarizer.getCount() > 1;
            if (fileOut == null) continue;
            fileOut.printf("%d,%f,%f,%f,%f,%f,%f,%f,%d,%s\n", i, summarizer.getMean(), summarizer.getSD(), summarizer.getQuartile(0), moreThanOne ? summarizer.getQuartile(1) : summarizer.getQuartile(0), moreThanOne ? summarizer.getQuartile(2) : summarizer.getQuartile(0), moreThanOne ? summarizer.getQuartile(3) : summarizer.getQuartile(0), summarizer.getQuartile(4), summarizer.getCount(), type);
        }
        System.out.printf("Num clusters: %d; maxDistance: %f\n", summarizers.size(), maxDistance);
    }

    public void run(String[] args) {
        if (!this.parseArgs(args)) {
            return;
        }
        Configuration conf = new Configuration();
        try {
            Iterable trainDatapoints;
            ArrayList centroids;
            Configuration.dumpConfiguration((Configuration)conf, (Writer)new OutputStreamWriter(System.out));
            this.fileOut = new PrintWriter(new FileOutputStream(this.outputFile));
            this.fileOut.printf("cluster,distance.mean,distance.sd,distance.q0,distance.q1,distance.q2,distance.q3,distance.q4,count,is.train\n", new Object[0]);
            ArrayList centroidsCompare = null;
            if (this.mahoutKMeansFormat) {
                SequenceFileDirValueIterable clusterIterable = new SequenceFileDirValueIterable(new Path(this.centroidFile), PathType.GLOB, conf);
                centroids = Lists.newArrayList(IOUtils.getCentroidsFromClusterWritableIterable((Iterable<ClusterWritable>)clusterIterable));
            } else {
                SequenceFileDirValueIterable centroidIterable = new SequenceFileDirValueIterable(new Path(this.centroidFile), PathType.GLOB, conf);
                centroids = Lists.newArrayList(IOUtils.getCentroidsFromCentroidWritableIterable((Iterable<CentroidWritable>)centroidIterable));
            }
            if (this.centroidCompareFile != null) {
                if (this.mahoutKMeansFormatCompare) {
                    SequenceFileDirValueIterable clusterCompareIterable = new SequenceFileDirValueIterable(new Path(this.centroidCompareFile), PathType.GLOB, conf);
                    centroidsCompare = Lists.newArrayList(IOUtils.getCentroidsFromClusterWritableIterable((Iterable<ClusterWritable>)clusterCompareIterable));
                } else {
                    SequenceFileDirValueIterable centroidCompareIterable = new SequenceFileDirValueIterable(new Path(this.centroidCompareFile), PathType.GLOB, conf);
                    centroidsCompare = Lists.newArrayList(IOUtils.getCentroidsFromCentroidWritableIterable((Iterable<CentroidWritable>)centroidCompareIterable));
                }
            }
            SequenceFileDirValueIterable trainIterable = new SequenceFileDirValueIterable(new Path(this.trainFile), PathType.GLOB, conf);
            Iterable datapoints = trainDatapoints = IOUtils.getVectorsFromVectorWritableIterable((Iterable<VectorWritable>)trainIterable);
            this.printSummaries(ClusteringUtils.summarizeClusterDistances(trainDatapoints, (Iterable)centroids, (DistanceMeasure)new SquaredEuclideanDistanceMeasure()), "train");
            if (this.testFile != null) {
                SequenceFileDirValueIterable testIterable = new SequenceFileDirValueIterable(new Path(this.testFile), PathType.GLOB, conf);
                Iterable<Vector> testDatapoints = IOUtils.getVectorsFromVectorWritableIterable((Iterable<VectorWritable>)testIterable);
                this.printSummaries(ClusteringUtils.summarizeClusterDistances(testDatapoints, (Iterable)centroids, (DistanceMeasure)new SquaredEuclideanDistanceMeasure()), "test");
                datapoints = Iterables.concat(trainDatapoints, testDatapoints);
            }
            List summaries = ClusteringUtils.summarizeClusterDistances(datapoints, (Iterable)centroids, (DistanceMeasure)this.distanceMeasure);
            List compareSummaries = null;
            if (centroidsCompare != null) {
                compareSummaries = ClusteringUtils.summarizeClusterDistances(datapoints, (Iterable)centroidsCompare, (DistanceMeasure)this.distanceMeasure);
            }
            System.out.printf("[Dunn Index] First: %f", ClusteringUtils.dunnIndex((List)centroids, (DistanceMeasure)this.distanceMeasure, (List)summaries));
            if (compareSummaries != null) {
                System.out.printf(" Second: %f\n", ClusteringUtils.dunnIndex((List)centroidsCompare, (DistanceMeasure)this.distanceMeasure, (List)compareSummaries));
            } else {
                System.out.printf("\n", new Object[0]);
            }
            System.out.printf("[Davies-Bouldin Index] First: %f", ClusteringUtils.daviesBouldinIndex((List)centroids, (DistanceMeasure)this.distanceMeasure, (List)summaries));
            if (compareSummaries != null) {
                System.out.printf(" Second: %f\n", ClusteringUtils.daviesBouldinIndex((List)centroidsCompare, (DistanceMeasure)this.distanceMeasure, (List)compareSummaries));
            } else {
                System.out.printf("\n", new Object[0]);
            }
            if (this.outputFile != null) {
                this.fileOut.close();
            }
        }
        catch (IOException e) {
            System.out.println(e.getMessage());
        }
    }

    private boolean parseArgs(String[] args) {
        DefaultOptionBuilder builder = new DefaultOptionBuilder();
        DefaultOption help = builder.withLongName("help").withDescription("print this list").create();
        ArgumentBuilder argumentBuilder = new ArgumentBuilder();
        DefaultOption inputFileOption = builder.withLongName("input").withShortName("i").withRequired(true).withArgument(argumentBuilder.withName("input").withMaximum(1).create()).withDescription("where to get seq files with the vectors (training set)").create();
        DefaultOption testInputFileOption = builder.withLongName("testInput").withShortName("itest").withArgument(argumentBuilder.withName("testInput").withMaximum(1).create()).withDescription("where to get seq files with the vectors (test set)").create();
        DefaultOption centroidsFileOption = builder.withLongName("centroids").withShortName("c").withRequired(true).withArgument(argumentBuilder.withName("centroids").withMaximum(1).create()).withDescription("where to get seq files with the centroids (from Mahout KMeans or StreamingKMeansDriver)").create();
        DefaultOption centroidsCompareFileOption = builder.withLongName("centroidsCompare").withShortName("cc").withRequired(false).withArgument(argumentBuilder.withName("centroidsCompare").withMaximum(1).create()).withDescription("where to get seq files with the second set of centroids (from Mahout KMeans or StreamingKMeansDriver)").create();
        DefaultOption outputFileOption = builder.withLongName("output").withShortName("o").withRequired(true).withArgument(argumentBuilder.withName("output").withMaximum(1).create()).withDescription("where to dump the CSV file with the results").create();
        DefaultOption mahoutKMeansFormatOption = builder.withLongName("mahoutkmeansformat").withShortName("mkm").withDescription("if set, read files as (IntWritable, ClusterWritable) pairs").withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create();
        DefaultOption mahoutKMeansCompareFormatOption = builder.withLongName("mahoutkmeansformatCompare").withShortName("mkmc").withDescription("if set, read files as (IntWritable, ClusterWritable) pairs").withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create();
        Group normalArgs = new GroupBuilder().withOption((Option)help).withOption((Option)inputFileOption).withOption((Option)testInputFileOption).withOption((Option)outputFileOption).withOption((Option)centroidsFileOption).withOption((Option)centroidsCompareFileOption).withOption((Option)mahoutKMeansFormatOption).withOption((Option)mahoutKMeansCompareFormatOption).create();
        Parser parser = new Parser();
        parser.setHelpOption((Option)help);
        parser.setHelpTrigger("--help");
        parser.setGroup(normalArgs);
        parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 150));
        CommandLine cmdLine = parser.parseAndHelp(args);
        if (cmdLine == null) {
            return false;
        }
        this.trainFile = (String)cmdLine.getValue((Option)inputFileOption);
        if (cmdLine.hasOption((Option)testInputFileOption)) {
            this.testFile = (String)cmdLine.getValue((Option)testInputFileOption);
        }
        this.centroidFile = (String)cmdLine.getValue((Option)centroidsFileOption);
        if (cmdLine.hasOption((Option)centroidsCompareFileOption)) {
            this.centroidCompareFile = (String)cmdLine.getValue((Option)centroidsCompareFileOption);
        }
        this.outputFile = (String)cmdLine.getValue((Option)outputFileOption);
        if (cmdLine.hasOption((Option)mahoutKMeansFormatOption)) {
            this.mahoutKMeansFormat = true;
        }
        if (cmdLine.hasOption((Option)mahoutKMeansCompareFormatOption)) {
            this.mahoutKMeansFormatCompare = true;
        }
        return true;
    }

    public static void main(String[] args) {
        new ClusterQualitySummarizer().run(args);
    }
}

