package com.github.chen0040.rl.learning.rlearn;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.serializer.SerializerFeature;
import com.github.chen0040.rl.actionselection.AbstractActionSelectionStrategy;
import com.github.chen0040.rl.actionselection.ActionSelectionStrategy;
import com.github.chen0040.rl.actionselection.ActionSelectionStrategyFactory;
import com.github.chen0040.rl.actionselection.EpsilonGreedyActionSelectionStrategy;
import com.github.chen0040.rl.models.QModel;
import com.github.chen0040.rl.utils.IndexValue;
import java.io.Serializable;
import java.util.Set;

/* loaded from: input_file:com/github/chen0040/rl/learning/rlearn/RLearner.class */
public class RLearner implements Serializable, Cloneable {
    private QModel model;
    private ActionSelectionStrategy actionSelectionStrategy;
    private double rho;
    private double beta;

    public String toJson() {
        return JSON.toJSONString(this, new SerializerFeature[]{SerializerFeature.BrowserCompatible});
    }

    public static RLearner fromJson(String str) {
        return (RLearner) JSON.parseObject(str, RLearner.class);
    }

    public RLearner makeCopy() {
        RLearner rLearner = new RLearner();
        rLearner.copy(this);
        return rLearner;
    }

    public void copy(RLearner rLearner) {
        this.model = rLearner.model.makeCopy();
        this.actionSelectionStrategy = (ActionSelectionStrategy) ((AbstractActionSelectionStrategy) rLearner.actionSelectionStrategy).clone();
        this.rho = rLearner.rho;
        this.beta = rLearner.beta;
    }

    public boolean equals(Object obj) {
        if (obj == null || !(obj instanceof RLearner)) {
            return false;
        }
        RLearner rLearner = (RLearner) obj;
        return this.model.equals(rLearner.model) && this.actionSelectionStrategy.equals(rLearner.actionSelectionStrategy) && this.rho == rLearner.rho && this.beta == rLearner.beta;
    }

    public RLearner() {
    }

    public double getRho() {
        return this.rho;
    }

    public void setRho(double d) {
        this.rho = d;
    }

    public double getBeta() {
        return this.beta;
    }

    public void setBeta(double d) {
        this.beta = d;
    }

    public QModel getModel() {
        return this.model;
    }

    public void setModel(QModel qModel) {
        this.model = qModel;
    }

    public String getActionSelection() {
        return ActionSelectionStrategyFactory.serialize(this.actionSelectionStrategy);
    }

    public void setActionSelection(String str) {
        this.actionSelectionStrategy = ActionSelectionStrategyFactory.deserialize(str);
    }

    public RLearner(int i, int i2) {
        this(i, i2, 0.1d, 0.1d, 0.7d, 0.1d);
    }

    public RLearner(int i, int i2, double d, double d2, double d3, double d4) {
        this.model = new QModel(i, i2, d4);
        this.model.setAlpha(d);
        this.rho = d3;
        this.beta = d2;
        this.actionSelectionStrategy = new EpsilonGreedyActionSelectionStrategy();
    }

    private double maxQAtState(int i, Set<Integer> set) {
        return this.model.actionWithMaxQAtState(i, set).getValue();
    }

    public void update(int i, int i2, int i3, Set<Integer> set, double d) {
        double q = this.model.getQ(i, i2);
        double alpha = this.model.getAlpha(i, i2);
        double maxQAtState = maxQAtState(i3, set);
        double d2 = q + (alpha * (((d - this.rho) + maxQAtState) - q));
        double maxQAtState2 = maxQAtState(i, null);
        if (d2 == maxQAtState2) {
            this.rho += this.beta * (((d - this.rho) + maxQAtState) - maxQAtState2);
        }
        this.model.setQ(i, i2, d2);
    }

    public IndexValue selectAction(int i, Set<Integer> set) {
        return this.actionSelectionStrategy.selectAction(i, this.model, set);
    }
}
