JuliaRL_Minimax_OpenSpiel(tictactoe)

Source code Author Update time

using ReinforcementLearning
using OpenSpiel

function RL.Experiment(::Val{:JuliaRL}, ::Val{:Minimax}, ::Val{:OpenSpiel}, game;)
    env = OpenSpielEnv(string(game))
    agents = MultiAgentManager(
        NamedPolicy(0 => MinimaxPolicy()),
        NamedPolicy(1 => MinimaxPolicy()),
    )
    hooks = MultiAgentHook(0 => TotalRewardPerEpisode(), 1 => TotalRewardPerEpisode())
    description = "# Play `$game` in OpenSpiel with Minimax"
    Experiment(agents, env, StopAfterEpisode(1, is_show_progress=!haskey(ENV, "CI")), hooks, description)
end

using Plots
ex = E`JuliaRL_Minimax_OpenSpiel(tic_tac_toe)`
run(ex)
typename(Experiment)
├─ policy => typename(MultiAgentManager)
│  └─ agents => typename(Dict)
├─ env => typename(OpenSpielEnv)
├─ stop_condition => typename(StopAfterEpisode)
│  ├─ episode => 1
│  ├─ cur => 1
│  └─ progress => typename(Nothing)
├─ hook => typename(MultiAgentHook)
│  └─ hooks => typename(Dict)
└─ description => "# Play `tic_tac_toe` in OpenSpiel with Minimax"

The final reward is:

ex.hook
MultiAgentHook(Dict{Any, Any}(0 => TotalRewardPerEpisode([0.0], 0.0, true), 1 => TotalRewardPerEpisode([0.0], 0.0, true)))

This page was generated using DemoCards.jl and Literate.jl.