/*
Copyright 2013 McDowell

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
 */

package net.sf.jsonunicode;

import java.io.*;
import java.nio.charset.*;

import static java.nio.charset.CodingErrorAction.REPORT;
import static net.sf.jsonunicode.JsonCharsets.*;

/**
 * Methods for detecting JSON document encodings.
 * <p/>
 * To open a stream to write JSON, use:
 * <p/>
 * <pre>
 * OutputStream out = new FileOutputStream(file);
 * Closeable res = out;
 * try {
 *   Writer writer = JsonIo.newJsonWriter(out);
 *   res = writer;
 *   // write JSON
 * } finally {
 *   res.close();
 * }
 * </pre>
 * <p/>
 * To open a stream to read JSON, use:
 * <p/>
 * <pre>
 * InputStream in = new FileInputStream(file);
 * Closeable res = in;
 * try {
 *   Reader reader = JsonIo.newJsonReader(in);
 *   res = reader;
 *   // read JSON
 * } finally {
 *   res.close();
 * }
 * </pre>
 * <p/>
 * This type does a best-guess detection of the encoding of a JSON data source
 * by inspecting the first four bytes as described in <A
 * href="http://tools.ietf.org/html/rfc4627#section-3">RFC 4627</A>.
 * <p/>
 * Note: byte-order-marks are not supported; UTF-32 is not a standard Java
 * encoding and may no be supported by the runtime.
 * <p/>
 * Streams are intollerant of malformed Unicode data by default.
 *
 * @see JsonCharsets
 */
public final class JsonIo {
  private JsonIo() {
  }

  /**
   * Detects the JSON encoding and decorates the input with the appropriate
   * {@link Reader}.
   *
   * @param in            a JSON source
   * @param errorHandling how to handle malformed character data
   * @return the character decoder
   * @throws IOException                 on error
   * @throws UnsupportedCharsetException UTF-32 is unsupported on Sun Java 5
   */
  public static Reader newJsonReader(InputStream in, CodingErrorAction errorHandling) throws IOException, UnsupportedCharsetException {
    Asserts.assertNotNull(in, "InputStream");
    Asserts.assertNotNull(errorHandling, "CodingErrorAction");

    in = in.markSupported() ? in : new BufferedInputStream(in);
    CharsetDecoder decoder = detectJsonEncoding(in).newDecoder()
        .onMalformedInput(errorHandling)
        .onUnmappableCharacter(errorHandling);
    return new InputStreamReader(in, decoder);
  }

  /**
   * As {@link #newJsonReader(InputStream, CodingErrorAction)} with a default of
   * {@link CodingErrorAction#REPORT}.
   */
  public static Reader newJsonReader(InputStream in) throws IOException, UnsupportedCharsetException {
    return newJsonReader(in, REPORT);
  }

  /**
   * Detects the character encoding of the given JSON data stream.
   * <p/>
   * The stream must return true when {@link InputStream#markSupported()} is
   * called.
   *
   * @param in a stream that supports mark
   * @return the detected encoding
   * @throws IOException                 on error
   * @throws UnsupportedCharsetException UTF-32 is unsupported on Sun Java 5
   * @see JsonCharsets
   */
  public static Charset detectJsonEncoding(InputStream in) throws IOException, UnsupportedCharsetException {
    Asserts.assertNotNull(in, "InputStream");
    Asserts.assertTrue(in.markSupported(), "InputStream.markSupported()");

    in.mark(4);
    int mask = 0;
    for (int count = 0; count < 4; count++) {
      int r = in.read();
      if (r == -1) {
        break;
      }
      mask = mask << 1;
      mask |= (r == 0) ? 0 : 1;
    }
    in.reset();
    return match(mask);
  }

  private static Charset match(int mask) {
    switch (mask) {
      case 1:
        return UTF_32BE();
      case 5:
        return UTF_16BE;
      case 8:
        return UTF_32LE();
      case 10:
        return UTF_16LE;
      default:
        return UTF_8;
    }
  }

  /**
   * Creates a UTF-8 writer.
   * Convenience method; UTF-8 is the default encoding for JSON.
   *
   * @param out           the JSON target
   * @param errorHandling the action on malformed data
   * @return the encoder
   * @throws IOException on IO error
   */
  public static Writer newJsonWriter(OutputStream out, CodingErrorAction errorHandling) throws IOException {
    Asserts.assertNotNull(out, "OutputStream");
    Asserts.assertNotNull(errorHandling, "CodingErrorAction");

    CharsetEncoder encoder = UTF_8.newEncoder()
        .onMalformedInput(errorHandling)
        .onUnmappableCharacter(errorHandling);
    return new OutputStreamWriter(out, encoder);
  }

  /**
   * As {@link #newJsonWriter(OutputStream, CodingErrorAction)} with
   * {@link CodingErrorAction#REPORT}.
   */
  public static Writer newJsonWriter(OutputStream out) throws IOException {
    Asserts.assertNotNull(out, "OutputStream");

    CharsetEncoder encoder = UTF_8.newEncoder().onMalformedInput(REPORT);
    return new OutputStreamWriter(out, encoder);
  }

  private static final int DEFAULT_BUFFER_SIZE = 1024;

  /**
   * As {@link #newCompactJsonWriter(OutputStream, int, CodingErrorAction)} with
   * {@link CodingErrorAction#REPORT} and a default buffer size.
   */
  public static Writer newCompactJsonWriter(OutputStream out) {
    return newCompactJsonWriter(out, DEFAULT_BUFFER_SIZE, REPORT);
  }

  /**
   * As {@link #newCompactJsonWriter(OutputStream, int, CodingErrorAction)} with
   * {@link CodingErrorAction#REPORT}.
   */
  public static Writer newCompactJsonWriter(OutputStream out, int bufferSize) {
    return newCompactJsonWriter(out, bufferSize, REPORT);
  }

  /**
   * A writer that chooses between UTF-8 and UTF-16 by buffering a certain
   * amount of output and measuring how compact it will be when encoded using
   * either of those encodings.
   * <p/>
   * Note: this method is only useful for certain ranges of Unicode data (e.g. a
   * high proportion of Japanese natural language data.) Clients that assume
   * JSON is UTF-8 may fail when faced with the results. The compression may be
   * redundant when combined with over-the-wire compression methods (e.g. GZIP.)
   * <p/>
   * If in doubt, use {@link #newJsonWriter(OutputStream)} instead.
   * <p/>
   * Calling {@link Writer#flush()} will cause the measuring phase to finish and
   * an encoding to be chosen if this has not happened already.
   *
   * @param out           the data target
   * @param bufferSize    the maximum size of the in-memory buffer
   * @param errorHandling how to react to malformed data
   * @return the encoder
   */
  public static Writer newCompactJsonWriter(OutputStream out, int bufferSize, CodingErrorAction errorHandling) {
    Asserts.assertNotNull(out, "OutputStream");
    Asserts.assertTrue(bufferSize > 0, "bufferSize > 0");
    Asserts.assertNotNull(errorHandling, "CodingErrorAction");

    return new JsonWriter(out, bufferSize, errorHandling);
  }
}
