package aima.core.probability.mdp.search;

import aima.core.agent.Action;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.Policy;
import aima.core.probability.mdp.PolicyEvaluation;
import aima.core.probability.mdp.impl.LookupPolicy;
import aima.core.util.Util;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.Map;

/* loaded from: input_file:aima/core/probability/mdp/search/PolicyIteration.class */
public class PolicyIteration<S, A extends Action> {
    private PolicyEvaluation<S, A> policyEvaluation;

    public PolicyIteration(PolicyEvaluation<S, A> policyEvaluation) {
        this.policyEvaluation = null;
        this.policyEvaluation = policyEvaluation;
    }

    public Policy<S, A> policyIteration(MarkovDecisionProcess<S, A> markovDecisionProcess) {
        boolean z;
        Map<S, Double> create = Util.create(markovDecisionProcess.states(), new Double(0.0d));
        Map<S, A> initialPolicyVector = initialPolicyVector(markovDecisionProcess);
        do {
            create = this.policyEvaluation.evaluate(initialPolicyVector, create, markovDecisionProcess);
            z = true;
            for (S s : markovDecisionProcess.states()) {
                double d = Double.NEGATIVE_INFINITY;
                double d2 = 0.0d;
                A a = initialPolicyVector.get(s);
                for (A a2 : markovDecisionProcess.actions(s)) {
                    double d3 = 0.0d;
                    for (S s2 : markovDecisionProcess.states()) {
                        d3 += markovDecisionProcess.transitionProbability(s2, s, a2) * create.get(s2).doubleValue();
                    }
                    if (d3 > d) {
                        d = d3;
                        a = a2;
                    }
                    if (a2.equals(initialPolicyVector.get(s))) {
                        d2 = d3;
                    }
                }
                if (d > d2) {
                    initialPolicyVector.put(s, a);
                    z = false;
                }
            }
        } while (!z);
        return new LookupPolicy(initialPolicyVector);
    }

    public static <S, A extends Action> Map<S, A> initialPolicyVector(MarkovDecisionProcess<S, A> markovDecisionProcess) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        ArrayList arrayList = new ArrayList();
        for (S s : markovDecisionProcess.states()) {
            arrayList.clear();
            arrayList.addAll(markovDecisionProcess.actions(s));
            if (arrayList.size() > 0) {
                linkedHashMap.put(s, Util.selectRandomlyFromList(arrayList));
            }
        }
        return linkedHashMap;
    }
}
