JuliaRL_TD3_Pendulum
using ReinforcementLearning
using StableRNGs
using Flux
using Flux.Losses
using IntervalSets
function RL.Experiment(
::Val{:JuliaRL},
::Val{:TD3},
::Val{:Pendulum},
::Nothing;
seed = 123,
)
rng = StableRNG(seed)
inner_env = PendulumEnv(T = Float32, rng = rng)
A = action_space(inner_env)
low = A.left
high = A.right
ns = length(state(inner_env))
env = ActionTransformedEnv(
inner_env;
action_mapping = x -> low + (x + 1) * 0.5 * (high - low),
)
init = glorot_uniform(rng)
create_actor() = Chain(
Dense(ns, 30, relu; init = init),
Dense(30, 30, relu; init = init),
Dense(30, 1, tanh; init = init),
) |> gpu
create_critic_model() = Chain(
Dense(ns + 1, 30, relu; init = init),
Dense(30, 30, relu; init = init),
Dense(30, 1; init = init),
) |> gpu
create_critic() = TD3Critic(create_critic_model(), create_critic_model())
agent = Agent(
policy = TD3Policy(
behavior_actor = NeuralNetworkApproximator(
model = create_actor(),
optimizer = ADAM(),
),
behavior_critic = NeuralNetworkApproximator(
model = create_critic(),
optimizer = ADAM(),
),
target_actor = NeuralNetworkApproximator(
model = create_actor(),
optimizer = ADAM(),
),
target_critic = NeuralNetworkApproximator(
model = create_critic(),
optimizer = ADAM(),
),
γ = 0.99f0,
ρ = 0.99f0,
batch_size = 64,
start_steps = 1000,
start_policy = RandomPolicy(-1.0..1.0; rng = rng),
update_after = 1000,
update_freq = 1,
policy_freq = 2,
target_act_limit = 1.0,
target_act_noise = 0.1,
act_limit = 1.0,
act_noise = 0.1,
rng = rng,
),
trajectory = CircularArraySARTTrajectory(
capacity = 10_000,
state = Vector{Float32} => (ns,),
action = Float32 => (),
),
)
stop_condition = StopAfterStep(10_000, is_show_progress=!haskey(ENV, "CI"))
hook = TotalRewardPerEpisode()
Experiment(agent, env, stop_condition, hook, "# Play Pendulum with TD3")
end
using Plots
ex = E`JuliaRL_TD3_Pendulum`
run(ex)
plot(ex.hook.rewards)
Total reward per episode
┌────────────────────────────────────────┐
0 │⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⠀⠀⠀⠀⠀⠀⡀⢀⠀⡀⠀⠀⠀⠀⡀⠀⢀⡀⠀⠀⠀⣀⡀│
│⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡇⠀⢸⡄⡏⢣⡀⠀⡰⠉⠉⠳⠙⣄⡎⣆⣰⠉⢣⠎⢸⢸⡇⢠⠃⠱│
│⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣷⠀⢸⡇⡇⠀⢣⠀⡇⠀⠀⠀⠀⠀⠀⠀⠁⠀⠀⠀⠈⠇⠸⠋⠀⠀│
│⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⠀⡸⣧⠃⠀⢸⠀⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
│⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⠀⡇⣿⠀⠀⠀⣷⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
│⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢰⡇⢰⢹⠀⡇⢹⠀⠀⠀⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
│⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡜⢣⢸⢸⠀⡇⠀⠀⠀⠀⠘⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
Score │⠀⡠⢄⠀⠀⠀⠀⠀⠀⠀⢀⠇⠸⣸⢸⠀⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
│⠀⡇⢸⠀⠀⠀⠀⠀⡰⠒⠊⠀⠀⢿⠀⡇⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
│⠘⠀⠀⡇⠀⠀⠀⢠⠇⠀⠀⠀⠀⠈⠀⡇⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
│⠀⠀⠀⢸⠀⣴⠀⡜⠀⠀⠀⠀⠀⠀⠀⠋⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
│⠀⠀⠀⠀⠉⠈⡶⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
│⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
│⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
-2000 │⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀│
└────────────────────────────────────────┘
0 50
Episode
This page was generated using DemoCards.jl and Literate.jl.