/*
 * Copyright 2010 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.replication;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.util.StringUtils;
import org.apache.zookeeper.KeeperException;

/**
 * This class serves as a helper for all things related to zookeeper
 * in replication.
 * <p/>
 * The layout looks something like this under zookeeper.znode.parent
 * for the master cluster:
 * <p/>
 * <pre>
 * replication/
 *  state      {contains true or false}
 *  clusterId  {contains a byte}
 *  peers/
 *    1/   {contains a full cluster address}
 *    2/
 *    ...
 *  rs/ {lists all RS that replicate}
 *    startcode1/ {lists all peer clusters}
 *      1/ {lists hlogs to process}
 *        10.10.1.76%3A53488.123456789 {contains nothing or a position}
 *        10.10.1.76%3A53488.123456790
 *        ...
 *      2/
 *      ...
 *    startcode2/
 *    ...
 * </pre>
 */
public class ReplicationZookeeper {
  private static final Log LOG =
    LogFactory.getLog(ReplicationZookeeper.class);
  // Name of znode we use to lock when failover
  private final static String RS_LOCK_ZNODE = "lock";
  // Our handle on zookeeper
  private final ZooKeeperWatcher zookeeper;
  // Map of peer clusters keyed by their id
  private Map<String, ReplicationPeer> peerClusters;
  // Path to the root replication znode
  private String replicationZNode;
  // Path to the peer clusters znode
  private String peersZNode;
  // Path to the znode that contains all RS that replicates
  private String rsZNode;
  // Path to this region server's name under rsZNode
  private String rsServerNameZnode;
  // Name node if the replicationState znode
  private String replicationStateNodeName;
  private final Configuration conf;
  // Is this cluster replicating at the moment?
  private AtomicBoolean replicating;
  // Byte (stored as string here) that identifies this cluster
  private String clusterId;
  // The key to our own cluster
  private String ourClusterKey;
  // Abortable
  private Abortable abortable;
  private ReplicationStatusTracker statusTracker;

  /**
   * Constructor used by clients of replication (like master and HBase clients)
   * @param conf  conf to use
   * @param zk    zk connection to use
   * @throws IOException
   */
  public ReplicationZookeeper(final Abortable abortable, final Configuration conf,
                              final ZooKeeperWatcher zk)
    throws KeeperException {

    this.conf = conf;
    this.zookeeper = zk;
    this.replicating = new AtomicBoolean();
    setZNodes(abortable);
  }

  /**
   * Constructor used by region servers, connects to the peer cluster right away.
   *
   * @param server
   * @param replicating    atomic boolean to start/stop replication
   * @throws IOException
   * @throws KeeperException 
   */
  public ReplicationZookeeper(final Server server, final AtomicBoolean replicating)
  throws IOException, KeeperException {
    this.abortable = server;
    this.zookeeper = server.getZooKeeper();
    this.conf = server.getConfiguration();
    this.replicating = replicating;
    setZNodes(server);

    this.peerClusters = new HashMap<String, ReplicationPeer>();
    ZKUtil.createWithParents(this.zookeeper,
        ZKUtil.joinZNode(this.replicationZNode, this.replicationStateNodeName));
    this.rsServerNameZnode = ZKUtil.joinZNode(rsZNode, server.getServerName());
    ZKUtil.createWithParents(this.zookeeper, this.rsServerNameZnode);
    connectExistingPeers();
  }

  private void setZNodes(Abortable abortable) throws KeeperException {
    String replicationZNodeName =
        conf.get("zookeeper.znode.replication", "replication");
    String peersZNodeName =
        conf.get("zookeeper.znode.replication.peers", "peers");
    String repMasterZNodeName =
        conf.get("zookeeper.znode.replication.master", "master");
    this.replicationStateNodeName =
        conf.get("zookeeper.znode.replication.state", "state");
    String clusterIdZNodeName =
        conf.get("zookeeper.znode.replication.clusterId", "clusterId");
    String rsZNodeName =
        conf.get("zookeeper.znode.replication.rs", "rs");
    this.ourClusterKey = ZKUtil.getZooKeeperClusterKey(this.conf);
    this.replicationZNode =
      ZKUtil.joinZNode(this.zookeeper.baseZNode, replicationZNodeName);
    this.peersZNode = ZKUtil.joinZNode(replicationZNode, peersZNodeName);
    ZKUtil.createWithParents(this.zookeeper, this.peersZNode);
    this.rsZNode = ZKUtil.joinZNode(replicationZNode, rsZNodeName);
    ZKUtil.createWithParents(this.zookeeper, this.rsZNode);

    String znode = ZKUtil.joinZNode(this.replicationZNode, clusterIdZNodeName);
    byte [] data = ZKUtil.getData(this.zookeeper, znode);
    String idResult = Bytes.toString(data);
    this.clusterId = idResult == null?
      Byte.toString(HConstants.DEFAULT_CLUSTER_ID): idResult;
    // Set a tracker on replicationStateNodeNode
    this.statusTracker =
        new ReplicationStatusTracker(this.zookeeper, abortable);
    statusTracker.start();
    readReplicationStateZnode();
  }

  private void connectExistingPeers() throws IOException, KeeperException {
    List<String> znodes = ZKUtil.listChildrenNoWatch(this.zookeeper, this.peersZNode);
    if (znodes != null) {
      for (String z : znodes) {
        connectToPeer(z);
      }
    }
  }

  /**
   * List this cluster's peers' IDs
   * @return list of all peers' identifiers
   */
  public List<String> listPeersIdsAndWatch() {
    List<String> ids = null;
    try {
      ids = ZKUtil.listChildrenAndWatchThem(this.zookeeper, this.peersZNode);
    } catch (KeeperException e) {
      this.abortable.abort("Cannot get the list of peers ", e);
    }
    return ids;
  }

  /**
   * Returns all region servers from given peer
   *
   * @param peerClusterId (byte) the cluster to interrogate
   * @return addresses of all region servers
   */
  public List<HServerAddress> getSlavesAddresses(String peerClusterId)
      throws KeeperException {
    if (this.peerClusters.size() == 0) {
      return new ArrayList<HServerAddress>(0);
    }
    ReplicationPeer peer = this.peerClusters.get(peerClusterId);
    if (peer == null) {
      return new ArrayList<HServerAddress>(0);
    }
    peer.setRegionServers(fetchSlavesAddresses(peer.getZkw()));
    return peer.getRegionServers();
  }

  /**
   * Get the list of all the region servers from the specified peer
   * @param zkw zk connection to use
   * @return list of region server addresses
   */
  private List<HServerAddress> fetchSlavesAddresses(ZooKeeperWatcher zkw) {
    List<HServerAddress> rss = null;
    try {
      rss = ZKUtil.listChildrenAndGetAsAddresses(zkw, zkw.rsZNode);
    } catch (KeeperException e) {
      LOG.warn("Cannot get peer's region server addresses", e);
    }
    return rss;
  }

  /**
   * This method connects this cluster to another one and registers it
   * in this region server's replication znode
   * @param peerId id of the peer cluster
   * @throws KeeperException 
   */
  public boolean connectToPeer(String peerId)
      throws IOException, KeeperException {
    if (peerClusters == null) {
      return false;
    }
    if (this.peerClusters.containsKey(peerId)) {
      return false;
      // TODO remove when we support it
    } else if (this.peerClusters.size() > 0) {
      LOG.warn("Multiple slaves feature not supported");
      return false;
    }
    ReplicationPeer peer = getPeer(peerId);
    if (peer == null) {
      return false;
    }
    this.peerClusters.put(peerId, peer);
    ZKUtil.createWithParents(this.zookeeper, ZKUtil.joinZNode(
        this.rsServerNameZnode, peerId));
    LOG.info("Added new peer cluster " + peer.getClusterKey());
    return true;
  }

  /**
   * Helper method to connect to a peer
   * @param peerId peer's identifier
   * @return object representing the peer
   * @throws IOException
   * @throws KeeperException
   */
  public ReplicationPeer getPeer(String peerId) throws IOException, KeeperException{
    String znode = ZKUtil.joinZNode(this.peersZNode, peerId);
    byte [] data = ZKUtil.getData(this.zookeeper, znode);
    String otherClusterKey = Bytes.toString(data);
    if (this.ourClusterKey.equals(otherClusterKey)) {
      LOG.debug("Not connecting to " + peerId + " because it's us");
      return null;
    }
    // Construct the connection to the new peer
    Configuration otherConf = new Configuration(this.conf);
    try {
      ZKUtil.applyClusterKeyToConf(otherConf, otherClusterKey);
    } catch (IOException e) {
      LOG.error("Can't get peer because:", e);
      return null;
    }

    ZooKeeperWatcher zkw = new ZooKeeperWatcher(otherConf,
        "connection to cluster: " + peerId, this.abortable);
    return new ReplicationPeer(otherConf, peerId,
        otherClusterKey, zkw);
  }

  /**
   * Set the new replication state for this cluster
   * @param newState
   */
  public void setReplicating(boolean newState) throws KeeperException {
    ZKUtil.createWithParents(this.zookeeper,
        ZKUtil.joinZNode(this.replicationZNode, this.replicationStateNodeName));
    ZKUtil.setData(this.zookeeper,
        ZKUtil.joinZNode(this.replicationZNode, this.replicationStateNodeName),
        Bytes.toBytes(Boolean.toString(newState)));
  }

  /**
   * Remove the peer from zookeeper. which will trigger the watchers on every
   * region server and close their sources
   * @param id
   * @throws IllegalArgumentException Thrown when the peer doesn't exist
   */
  public void removePeer(String id) throws IOException {
    try {
      if (!peerExists(id)) {
        throw new IllegalArgumentException("Cannot remove inexisting peer");
      }
      ZKUtil.deleteNode(this.zookeeper, ZKUtil.joinZNode(this.peersZNode, id));
    } catch (KeeperException e) {
      throw new IOException("Unable to remove a peer", e);
    }
  }

  /**
   * Add a new peer to this cluster
   * @param id peer's identifier
   * @param clusterKey ZK ensemble's addresses, client port and root znode
   * @throws IllegalArgumentException Thrown when the peer doesn't exist
   * @throws IllegalStateException Thrown when a peer already exists, since
   *         multi-slave isn't supported yet.
   */
  public void addPeer(String id, String clusterKey) throws IOException {
    try {
      if (peerExists(id)) {
        throw new IllegalArgumentException("Cannot add existing peer");
      } else if (countPeers() > 0) {
        throw new IllegalStateException("Multi-slave isn't supported yet");
      }
      ZKUtil.createWithParents(this.zookeeper, this.peersZNode);
      ZKUtil.createAndWatch(this.zookeeper,
          ZKUtil.joinZNode(this.peersZNode, id), Bytes.toBytes(clusterKey));
    } catch (KeeperException e) {
      throw new IOException("Unable to add peer", e);
    }
  }

  private boolean peerExists(String id) throws KeeperException {
    return ZKUtil.checkExists(this.zookeeper,
          ZKUtil.joinZNode(this.peersZNode, id)) >= 0;
  }

  private int countPeers() throws KeeperException {
    List<String> peers =
        ZKUtil.listChildrenNoWatch(this.zookeeper, this.peersZNode);
    return peers == null ? 0 : peers.size();
  }

  /**
   * This reads the state znode for replication and sets the atomic boolean
   */
  private void readReplicationStateZnode() {
    try {
      this.replicating.set(getReplication());
      LOG.info("Replication is now " + (this.replicating.get()?
        "started" : "stopped"));
    } catch (KeeperException e) {
      this.abortable.abort("Failed getting data on from " + getRepStateNode(), e);
    }
  }

  /**
   * Get the replication status of this cluster. If the state znode doesn't
   * exist it will also create it and set it true.
   * @return returns true when it's enabled, else false
   * @throws KeeperException
   */
  public boolean getReplication() throws KeeperException {
    byte [] data = this.statusTracker.getData();
    if (data == null || data.length == 0) {
      setReplicating(true);
      return true;
    }
    return Boolean.parseBoolean(Bytes.toString(data));
  }

  private String getRepStateNode() {
    return ZKUtil.joinZNode(this.replicationZNode, this.replicationStateNodeName);
  }

  /**
   * Add a new log to the list of hlogs in zookeeper
   * @param filename name of the hlog's znode
   * @param clusterId name of the cluster's znode
   */
  public void addLogToList(String filename, String clusterId) {
    try {
      String znode = ZKUtil.joinZNode(this.rsServerNameZnode, clusterId);
      znode = ZKUtil.joinZNode(znode, filename);
      ZKUtil.createWithParents(this.zookeeper, znode);
    } catch (KeeperException e) {
      this.abortable.abort("Failed add log to list", e);
    }
  }

  /**
   * Remove a log from the list of hlogs in zookeeper
   * @param filename name of the hlog's znode
   * @param clusterId name of the cluster's znode
   */
  public void removeLogFromList(String filename, String clusterId) {
    try {
      String znode = ZKUtil.joinZNode(rsServerNameZnode, clusterId);
      znode = ZKUtil.joinZNode(znode, filename);
      ZKUtil.deleteNode(this.zookeeper, znode);
    } catch (KeeperException e) {
      this.abortable.abort("Failed remove from list", e);
    }
  }

  /**
   * Set the current position of the specified cluster in the current hlog
   * @param filename filename name of the hlog's znode
   * @param clusterId clusterId name of the cluster's znode
   * @param position the position in the file
   * @throws IOException
   */
  public void writeReplicationStatus(String filename, String clusterId,
      long position) {
    try {
      String znode = ZKUtil.joinZNode(this.rsServerNameZnode, clusterId);
      znode = ZKUtil.joinZNode(znode, filename);
      // Why serialize String of Long and note Long as bytes?
      ZKUtil.setData(this.zookeeper, znode,
        Bytes.toBytes(Long.toString(position)));
    } catch (KeeperException e) {
      this.abortable.abort("Writing replication status", e);
    }
  }

  /**
   * Get a list of all the other region servers in this cluster
   * and set a watch
   * @return a list of server nanes
   */
  public List<String> getRegisteredRegionServers() {
    List<String> result = null;
    try {
      result = ZKUtil.listChildrenAndWatchThem(
          this.zookeeper, this.zookeeper.rsZNode);
    } catch (KeeperException e) {
      this.abortable.abort("Get list of registered region servers", e);
    }
    return result;
  }

  /**
   * Get the list of the replicators that have queues, they can be alive, dead
   * or simply from a previous run
   * @return a list of server names
   */
  public List<String> getListOfReplicators() {
    List<String> result = null;
    try {
      result = ZKUtil.listChildrenNoWatch(this.zookeeper, rsZNode);
    } catch (KeeperException e) {
      this.abortable.abort("Get list of replicators", e);
    }
    return result;
  }

  /**
   * Get the list of peer clusters for the specified server names
   * @param rs server names of the rs
   * @return a list of peer cluster
   */
  public List<String> getListPeersForRS(String rs) {
    String znode = ZKUtil.joinZNode(rsZNode, rs);
    List<String> result = null;
    try {
      result = ZKUtil.listChildrenNoWatch(this.zookeeper, znode);
    } catch (KeeperException e) {
      this.abortable.abort("Get list of peers for rs", e);
    }
    return result;
  }

  /**
   * Get the list of hlogs for the specified region server and peer cluster
   * @param rs server names of the rs
   * @param id peer cluster
   * @return a list of hlogs
   */
  public List<String> getListHLogsForPeerForRS(String rs, String id) {
    String znode = ZKUtil.joinZNode(rsZNode, rs);
    znode = ZKUtil.joinZNode(znode, id);
    List<String> result = null;
    try {
      result = ZKUtil.listChildrenNoWatch(this.zookeeper, znode);
    } catch (KeeperException e) {
      this.abortable.abort("Get list of hlogs for peer", e);
    }
    return result;
  }

  /**
   * Try to set a lock in another server's znode.
   * @param znode the server names of the other server
   * @return true if the lock was acquired, false in every other cases
   */
  public boolean lockOtherRS(String znode) {
    try {
      String parent = ZKUtil.joinZNode(this.rsZNode, znode);
      if (parent.equals(rsServerNameZnode)) {
        LOG.warn("Won't lock because this is us, we're dead!");
        return false;
      }
      String p = ZKUtil.joinZNode(parent, RS_LOCK_ZNODE);
      ZKUtil.createAndWatch(this.zookeeper, p, Bytes.toBytes(rsServerNameZnode));
    } catch (KeeperException e) {
      LOG.info("Failed lock other rs", e);
      return false;
    }
    return true;
  }

  /**
   * This methods copies all the hlogs queues from another region server
   * and returns them all sorted per peer cluster (appended with the dead
   * server's znode)
   * @param znode server names to copy
   * @return all hlogs for all peers of that cluster, null if an error occurred
   */
  public SortedMap<String, SortedSet<String>> copyQueuesFromRS(String znode) {
    // TODO this method isn't atomic enough, we could start copying and then
    // TODO fail for some reason and we would end up with znodes we don't want.
    SortedMap<String,SortedSet<String>> queues =
        new TreeMap<String,SortedSet<String>>();
    try {
      String nodePath = ZKUtil.joinZNode(rsZNode, znode);
      List<String> clusters =
        ZKUtil.listChildrenNoWatch(this.zookeeper, nodePath);
      // We have a lock znode in there, it will count as one.
      if (clusters == null || clusters.size() <= 1) {
        return queues;
      }
      // The lock isn't a peer cluster, remove it
      clusters.remove(RS_LOCK_ZNODE);
      for (String cluster : clusters) {
        // We add the name of the recovered RS to the new znode, we can even
        // do that for queues that were recovered 10 times giving a znode like
        // number-startcode-number-otherstartcode-number-anotherstartcode-etc
        String newCluster = cluster+"-"+znode;
        String newClusterZnode = ZKUtil.joinZNode(rsServerNameZnode, newCluster);
        ZKUtil.createNodeIfNotExistsAndWatch(this.zookeeper, newClusterZnode,
          HConstants.EMPTY_BYTE_ARRAY);
        String clusterPath = ZKUtil.joinZNode(nodePath, cluster);
        List<String> hlogs = ZKUtil.listChildrenNoWatch(this.zookeeper, clusterPath);
        // That region server didn't have anything to replicate for this cluster
        if (hlogs == null || hlogs.size() == 0) {
          continue;
        }
        SortedSet<String> logQueue = new TreeSet<String>();
        queues.put(newCluster, logQueue);
        for (String hlog : hlogs) {
          String z = ZKUtil.joinZNode(clusterPath, hlog);
          byte [] position = ZKUtil.getData(this.zookeeper, z);
          LOG.debug("Creating " + hlog + " with data " + Bytes.toString(position));
          String child = ZKUtil.joinZNode(newClusterZnode, hlog);
          ZKUtil.createAndWatch(this.zookeeper, child, position);
          logQueue.add(hlog);
        }
      }
    } catch (KeeperException e) {
      this.abortable.abort("Copy queues from rs", e);
    }
    return queues;
  }

  /**
   * Delete a complete queue of hlogs
   * @param peerZnode znode of the peer cluster queue of hlogs to delete
   */
  public void deleteSource(String peerZnode, boolean closeConnection) {
    try {
      ZKUtil.deleteNodeRecursively(this.zookeeper,
          ZKUtil.joinZNode(rsServerNameZnode, peerZnode));
      if (closeConnection) {
        this.peerClusters.get(peerZnode).getZkw().close();
        this.peerClusters.remove(peerZnode);
      }
    } catch (KeeperException e) {
      this.abortable.abort("Failed delete of " + peerZnode, e);
    }
  }

  /**
   * Recursive deletion of all znodes in specified rs' znode
   * @param znode
   */
  public void deleteRsQueues(String znode) {
    try {
      ZKUtil.deleteNodeRecursively(this.zookeeper,
          ZKUtil.joinZNode(rsZNode, znode));
    } catch (KeeperException e) {
      this.abortable.abort("Failed delete of " + znode, e);
    }
  }

  /**
   * Delete this cluster's queues
   */
  public void deleteOwnRSZNode() {
    try {
      ZKUtil.deleteNodeRecursively(this.zookeeper,
          this.rsServerNameZnode);
    } catch (KeeperException e) {
      // if the znode is already expired, don't bother going further
      if (e instanceof KeeperException.SessionExpiredException) {
        return;
      }
      this.abortable.abort("Failed delete of " + this.rsServerNameZnode, e);
    }
  }

  /**
   * Get the position of the specified hlog in the specified peer znode
   * @param peerId znode of the peer cluster
   * @param hlog name of the hlog
   * @return the position in that hlog
   * @throws KeeperException 
   */
  public long getHLogRepPosition(String peerId, String hlog)
  throws KeeperException {
    String clusterZnode = ZKUtil.joinZNode(rsServerNameZnode, peerId);
    String znode = ZKUtil.joinZNode(clusterZnode, hlog);
    String data = Bytes.toString(ZKUtil.getData(this.zookeeper, znode));
    return data == null || data.length() == 0 ? 0 : Long.parseLong(data);
  }

  public void registerRegionServerListener(ZooKeeperListener listener) {
    this.zookeeper.registerListener(listener);
  }

  /**
   * Get the identification of the cluster
   *
   * @return the id for the cluster
   */
  public String getClusterId() {
    return this.clusterId;
  }

  /**
   * Get a map of all peer clusters
   * @return map of peer cluster keyed by id
   */
  public Map<String, ReplicationPeer> getPeerClusters() {
    return this.peerClusters;
  }

  /**
   * Extracts the znode name of a peer cluster from a ZK path
   * @param fullPath Path to extract the id from
   * @return the id or an empty string if path is invalid
   */
  public static String getZNodeName(String fullPath) {
    String[] parts = fullPath.split("/");
    return parts.length > 0 ? parts[parts.length-1] : "";
  }

  /**
   * Get this cluster's zk connection
   * @return zk connection
   */
  public ZooKeeperWatcher getZookeeperWatcher() {
    return this.zookeeper;
  }


  /**
   * Get the full path to the peers' znode
   * @return path to peers in zk
   */
  public String getPeersZNode() {
    return peersZNode;
  }

  /**
   * Tracker for status of the replication
   */
  public class ReplicationStatusTracker extends ZooKeeperNodeTracker {
    public ReplicationStatusTracker(ZooKeeperWatcher watcher,
        Abortable abortable) {
      super(watcher, getRepStateNode(), abortable);
    }

    @Override
    public synchronized void nodeDataChanged(String path) {
      if (path.equals(node)) {
        super.nodeDataChanged(path);
        readReplicationStateZnode();
      }
    }
  }
}