/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.text;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import java.io.Closeable;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.text.MultipleTextFileInputFormat;
import org.apache.mahout.text.SequenceFilesFromMailArchivesMapper;
import org.apache.mahout.utils.email.MailOptions;
import org.apache.mahout.utils.email.MailProcessor;
import org.apache.mahout.utils.io.ChunkedWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class SequenceFilesFromMailArchives
extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromMailArchives.class);
    public static final String[] CHUNK_SIZE_OPTION = new String[]{"chunkSize", "chunk"};
    public static final String[] KEY_PREFIX_OPTION = new String[]{"keyPrefix", "prefix"};
    public static final String[] CHARSET_OPTION = new String[]{"charset", "c"};
    public static final String[] SUBJECT_OPTION = new String[]{"subject", "s"};
    public static final String[] TO_OPTION = new String[]{"to", "to"};
    public static final String[] FROM_OPTION = new String[]{"from", "from"};
    public static final String[] REFERENCES_OPTION = new String[]{"references", "refs"};
    public static final String[] BODY_OPTION = new String[]{"body", "b"};
    public static final String[] STRIP_QUOTED_OPTION = new String[]{"stripQuoted", "q"};
    public static final String[] QUOTED_REGEX_OPTION = new String[]{"quotedRegex", "regex"};
    public static final String[] SEPARATOR_OPTION = new String[]{"separator", "sep"};
    public static final String[] BODY_SEPARATOR_OPTION = new String[]{"bodySeparator", "bodySep"};
    public static final String BASE_INPUT_PATH = "baseinputpath";
    private static final int MAX_JOB_SPLIT_LOCATIONS = 1000000;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void createSequenceFiles(MailOptions options) throws IOException {
        ChunkedWriter writer = new ChunkedWriter(this.getConf(), options.getChunkSize(), new Path(options.getOutputDir()));
        MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
        try {
            if (options.getInput().isDirectory()) {
                PrefixAdditionFilter filter = new PrefixAdditionFilter(processor, writer);
                options.getInput().listFiles(filter);
                log.info("Parsed {} messages from {}", (Object)filter.getMessageCount(), (Object)options.getInput().getAbsolutePath());
            } else {
                long start = System.currentTimeMillis();
                long cnt = processor.parseMboxLineByLine(options.getInput());
                long finish = System.currentTimeMillis();
                log.info("Parsed {} messages from {} in time: {}", new Object[]{cnt, options.getInput().getAbsolutePath(), finish - start});
            }
        }
        finally {
            Closeables.close((Closeable)writer, (boolean)false);
        }
    }

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Configuration)new Configuration(), (Tool)new SequenceFilesFromMailArchives(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption((Option)DefaultOptionCreator.methodOption().create());
        this.addOption(CHUNK_SIZE_OPTION[0], CHUNK_SIZE_OPTION[1], "The chunkSize in MegaBytes. Defaults to 64", "64");
        this.addOption(KEY_PREFIX_OPTION[0], KEY_PREFIX_OPTION[1], "The prefix to be prepended to the key", "");
        this.addOption(CHARSET_OPTION[0], CHARSET_OPTION[1], "The name of the character encoding of the input files. Default to UTF-8", "UTF-8");
        this.addFlag(SUBJECT_OPTION[0], SUBJECT_OPTION[1], "Include the Mail subject as part of the text.  Default is false");
        this.addFlag(TO_OPTION[0], TO_OPTION[1], "Include the to field in the text.  Default is false");
        this.addFlag(FROM_OPTION[0], FROM_OPTION[1], "Include the from field in the text.  Default is false");
        this.addFlag(REFERENCES_OPTION[0], REFERENCES_OPTION[1], "Include the references field in the text.  Default is false");
        this.addFlag(BODY_OPTION[0], BODY_OPTION[1], "Include the body in the output.  Default is false");
        this.addFlag(STRIP_QUOTED_OPTION[0], STRIP_QUOTED_OPTION[1], "Strip (remove) quoted email text in the body.  Default is false");
        this.addOption(QUOTED_REGEX_OPTION[0], QUOTED_REGEX_OPTION[1], "Specify the regex that identifies quoted text.  Default is to look for > or | at the beginning of the line.");
        this.addOption(SEPARATOR_OPTION[0], SEPARATOR_OPTION[1], "The separator to use between metadata items (to, from, etc.).  Default is \\n", "\n");
        this.addOption(BODY_SEPARATOR_OPTION[0], BODY_SEPARATOR_OPTION[1], "The separator to use between lines in the body.  Default is \\n.  Useful to change if you wish to have the message be on one line", "\n");
        this.addOption(DefaultOptionCreator.helpOption());
        Map parsedArgs = this.parseArguments(args);
        if (parsedArgs == null) {
            return -1;
        }
        File input = this.getInputFile();
        String outputDir = this.getOutputPath().toString();
        int chunkSize = 64;
        if (this.hasOption(CHUNK_SIZE_OPTION[0])) {
            chunkSize = Integer.parseInt(this.getOption(CHUNK_SIZE_OPTION[0]));
        }
        String prefix = "";
        if (this.hasOption(KEY_PREFIX_OPTION[0])) {
            prefix = this.getOption(KEY_PREFIX_OPTION[0]);
        }
        Charset charset = Charset.forName(this.getOption(CHARSET_OPTION[0]));
        MailOptions options = new MailOptions();
        options.setInput(input);
        options.setOutputDir(outputDir);
        options.setPrefix(prefix);
        options.setChunkSize(chunkSize);
        options.setCharset(charset);
        ArrayList patterns = Lists.newArrayListWithCapacity((int)5);
        HashMap patternOrder = Maps.newHashMap();
        int order = 0;
        if (this.hasOption(FROM_OPTION[0])) {
            patterns.add(MailProcessor.FROM_PREFIX);
            patternOrder.put("FROM", order++);
        }
        if (this.hasOption(TO_OPTION[0])) {
            patterns.add(MailProcessor.TO_PREFIX);
            patternOrder.put("TO", order++);
        }
        if (this.hasOption(REFERENCES_OPTION[0])) {
            patterns.add(MailProcessor.REFS_PREFIX);
            patternOrder.put("REFS", order++);
        }
        if (this.hasOption(SUBJECT_OPTION[0])) {
            patterns.add(MailProcessor.SUBJECT_PREFIX);
            patternOrder.put("SUBJECT", ++order);
        }
        options.setStripQuotedText(this.hasOption(STRIP_QUOTED_OPTION[0]));
        options.setPatternsToMatch(patterns.toArray(new Pattern[patterns.size()]));
        options.setPatternOrder(patternOrder);
        options.setIncludeBody(this.hasOption(BODY_OPTION[0]));
        if (this.hasOption(SEPARATOR_OPTION[0])) {
            options.setSeparator(this.getOption(SEPARATOR_OPTION[0]));
        } else {
            options.setSeparator("\n");
        }
        if (this.hasOption(BODY_SEPARATOR_OPTION[0])) {
            options.setBodySeparator(this.getOption(BODY_SEPARATOR_OPTION[0]));
        }
        if (this.hasOption(QUOTED_REGEX_OPTION[0])) {
            options.setQuotedTextPattern(Pattern.compile(this.getOption(QUOTED_REGEX_OPTION[0])));
        }
        if (this.getOption("method", "mapreduce").equals("sequential")) {
            this.runSequential(options);
        } else {
            this.runMapReduce(this.getInputPath(), this.getOutputPath());
        }
        return 0;
    }

    private int runSequential(MailOptions options) throws IOException, InterruptedException, NoSuchMethodException {
        long start = System.currentTimeMillis();
        this.createSequenceFiles(options);
        long finish = System.currentTimeMillis();
        log.info("Conversion took {}ms", (Object)(finish - start));
        return 0;
    }

    private int runMapReduce(Path input, Path output) throws IOException, InterruptedException, ClassNotFoundException {
        Job job = this.prepareJob(input, output, MultipleTextFileInputFormat.class, SequenceFilesFromMailArchivesMapper.class, Text.class, Text.class, SequenceFileOutputFormat.class, "SequentialFilesFromMailArchives");
        Configuration jobConfig = job.getConfiguration();
        if (this.hasOption(KEY_PREFIX_OPTION[0])) {
            jobConfig.set(KEY_PREFIX_OPTION[1], this.getOption(KEY_PREFIX_OPTION[0]));
        }
        int chunkSize = 0;
        if (this.hasOption(CHUNK_SIZE_OPTION[0])) {
            chunkSize = Integer.parseInt(this.getOption(CHUNK_SIZE_OPTION[0]));
            jobConfig.set(CHUNK_SIZE_OPTION[0], String.valueOf(chunkSize));
        }
        if (this.hasOption(CHARSET_OPTION[0])) {
            Charset charset = Charset.forName(this.getOption(CHARSET_OPTION[0]));
            jobConfig.set(CHARSET_OPTION[0], charset.displayName());
        }
        if (this.hasOption(FROM_OPTION[0])) {
            jobConfig.set(FROM_OPTION[1], "true");
        }
        if (this.hasOption(TO_OPTION[0])) {
            jobConfig.set(TO_OPTION[1], "true");
        }
        if (this.hasOption(REFERENCES_OPTION[0])) {
            jobConfig.set(REFERENCES_OPTION[1], "true");
        }
        if (this.hasOption(SUBJECT_OPTION[0])) {
            jobConfig.set(SUBJECT_OPTION[1], "true");
        }
        if (this.hasOption(QUOTED_REGEX_OPTION[0])) {
            jobConfig.set(QUOTED_REGEX_OPTION[1], Pattern.compile(this.getOption(QUOTED_REGEX_OPTION[0])).toString());
        }
        if (this.hasOption(SEPARATOR_OPTION[0])) {
            jobConfig.set(SEPARATOR_OPTION[1], this.getOption(SEPARATOR_OPTION[0]));
        } else {
            jobConfig.set(SEPARATOR_OPTION[1], "\n");
        }
        if (this.hasOption(BODY_OPTION[0])) {
            jobConfig.set(BODY_OPTION[1], "true");
        } else {
            jobConfig.set(BODY_OPTION[1], "false");
        }
        if (this.hasOption(BODY_SEPARATOR_OPTION[0])) {
            jobConfig.set(BODY_SEPARATOR_OPTION[1], this.getOption(BODY_SEPARATOR_OPTION[0]));
        } else {
            jobConfig.set(BODY_SEPARATOR_OPTION[1], "\n");
        }
        FileSystem fs = FileSystem.get((Configuration)jobConfig);
        FileStatus fsFileStatus = fs.getFileStatus(this.inputPath);
        jobConfig.set(BASE_INPUT_PATH, this.inputPath.toString());
        String inputDirList = HadoopUtil.buildDirList((FileSystem)fs, (FileStatus)fsFileStatus);
        FileInputFormat.setInputPaths((Job)job, (String)inputDirList);
        long chunkSizeInBytes = chunkSize * 1024 * 1024;
        FileInputFormat.setMaxInputSplitSize((Job)job, (long)chunkSizeInBytes);
        jobConfig.set("mapreduce.job.max.split.locations", String.valueOf(1000000));
        boolean succeeded = job.waitForCompletion(true);
        if (!succeeded) {
            return -1;
        }
        return 0;
    }

    public class PrefixAdditionFilter
    implements FileFilter {
        private final MailProcessor processor;
        private final ChunkedWriter writer;
        private long messageCount;

        public PrefixAdditionFilter(MailProcessor processor, ChunkedWriter writer) {
            this.processor = processor;
            this.writer = writer;
            this.messageCount = 0L;
        }

        public long getMessageCount() {
            return this.messageCount;
        }

        @Override
        public boolean accept(File current) {
            if (current.isDirectory()) {
                log.info("At {}", (Object)current.getAbsolutePath());
                PrefixAdditionFilter nested = new PrefixAdditionFilter(new MailProcessor(this.processor.getOptions(), this.processor.getPrefix() + File.separator + current.getName(), this.writer), this.writer);
                current.listFiles(nested);
                long dirCount = nested.getMessageCount();
                log.info("Parsed {} messages from directory {}", (Object)dirCount, (Object)current.getAbsolutePath());
                this.messageCount += dirCount;
            } else {
                try {
                    this.messageCount += this.processor.parseMboxLineByLine(current);
                }
                catch (IOException e) {
                    throw new IllegalStateException("Error processing " + current, e);
                }
            }
            return false;
        }
    }
}

