TensorFlow version
%matplotlib inline
import sys
import logging
import itertools
import numpy as np
np.random.seed(0)
import pandas as pd
import gym
import matplotlib.pyplot as plt
import tensorflow.compat.v2 as tf
tf.random.set_seed(0)
from tensorflow import nn
from tensorflow import losses
from tensorflow import optimizers
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import models
logging.basicConfig(level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
stream=sys.stdout, datefmt='%H:%M:%S')
env = gym.make('MountainCar-v0')
for key in vars(env):
logging.info('%s: %s', key, vars(env)[key])
for key in vars(env.spec):
logging.info('%s: %s', key, vars(env.spec)[key])
22:21:31 [INFO] env: <MountainCarEnv<MountainCar-v0>> 22:21:31 [INFO] action_space: Discrete(3) 22:21:31 [INFO] observation_space: Box(-1.2000000476837158, 0.6000000238418579, (2,), float32) 22:21:31 [INFO] reward_range: (-inf, inf) 22:21:31 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 30} 22:21:31 [INFO] _max_episode_steps: 200 22:21:31 [INFO] _elapsed_steps: None 22:21:31 [INFO] id: MountainCar-v0 22:21:31 [INFO] entry_point: gym.envs.classic_control:MountainCarEnv 22:21:31 [INFO] reward_threshold: -110.0 22:21:31 [INFO] nondeterministic: False 22:21:31 [INFO] max_episode_steps: 200 22:21:31 [INFO] _kwargs: {} 22:21:31 [INFO] _env_name: MountainCar
class DQNReplayer:
def __init__(self, capacity):
self.memory = pd.DataFrame(index=range(capacity),
columns=['state', 'action', 'reward', 'next_state', 'terminated'])
self.i = 0
self.count = 0
self.capacity = capacity
def store(self, *args):
self.memory.loc[self.i] = np.asarray(args, dtype=object)
self.i = (self.i + 1) % self.capacity
self.count = min(self.count + 1, self.capacity)
def sample(self, size):
indices = np.random.choice(self.count, size=size)
return (np.stack(self.memory.loc[indices, field]) for field in
self.memory.columns)
class DoubleDQNAgent:
def __init__(self, env):
self.action_n = env.action_space.n
self.gamma = 0.99
self.replayer = DQNReplayer(10000)
self.evaluate_net = self.build_net(
input_size=env.observation_space.shape[0],
hidden_sizes=[64, 64], output_size=self.action_n)
self.target_net = models.clone_model(self.evaluate_net)
def build_net(self, input_size, hidden_sizes, output_size):
model = keras.Sequential()
for layer, hidden_size in enumerate(hidden_sizes):
kwargs = dict(input_shape=(input_size,)) if not layer else {}
model.add(layers.Dense(units=hidden_size,
activation=nn.relu, **kwargs))
model.add(layers.Dense(units=output_size))
optimizer = optimizers.Adam(0.001)
model.compile(loss=losses.mse, optimizer=optimizer)
return model
def reset(self, mode=None):
self.mode = mode
if self.mode == 'train':
self.trajectory = []
self.target_net.set_weights(self.evaluate_net.get_weights())
def step(self, observation, reward, terminated):
if self.mode == 'train' and np.random.rand() < 0.001:
# epsilon-greedy policy in train mode
action = np.random.randint(self.action_n)
else:
qs = self.evaluate_net.predict(observation[np.newaxis], verbose=0)
action = np.argmax(qs)
if self.mode == 'train':
self.trajectory += [observation, reward, terminated, action]
if len(self.trajectory) >= 8:
state, _, _, act, next_state, reward, terminated, _ = \
self.trajectory[-8:]
self.replayer.store(state, act, reward, next_state, terminated)
if self.replayer.count >= self.replayer.capacity * 0.95:
# skip first few episodes for speed
self.learn()
return action
def close(self):
pass
def learn(self):
# replay
states, actions, rewards, next_states, terminateds = \
self.replayer.sample(1024)
# update value net
next_eval_qs = self.evaluate_net.predict(next_states, verbose=0)
next_actions = next_eval_qs.argmax(axis=-1)
next_qs = self.target_net.predict(next_states, verbose=0)
next_max_qs = next_qs[np.arange(next_qs.shape[0]), next_actions]
us = rewards + self.gamma * next_max_qs * (1. - terminateds)
targets = self.evaluate_net.predict(states, verbose=0)
targets[np.arange(us.shape[0]), actions] = us
self.evaluate_net.fit(states, targets, verbose=0)
agent = DoubleDQNAgent(env)
def play_episode(env, agent, seed=None, mode=None, render=False):
observation, _ = env.reset(seed=seed)
reward, terminated, truncated = 0., False, False
agent.reset(mode=mode)
episode_reward, elapsed_steps = 0., 0
while True:
action = agent.step(observation, reward, terminated)
if render:
env.render()
if terminated or truncated:
break
observation, reward, terminated, truncated, _ = env.step(action)
episode_reward += reward
elapsed_steps += 1
agent.close()
return episode_reward, elapsed_steps
logging.info('==== train ====')
episode_rewards = []
for episode in itertools.count():
episode_reward, elapsed_steps = play_episode(env, agent, seed=episode,
mode='train')
episode_rewards.append(episode_reward)
logging.info('train episode %d: reward = %.2f, steps = %d',
episode, episode_reward, elapsed_steps)
if np.mean(episode_rewards[-10:]) > -110:
break
plt.plot(episode_rewards)
logging.info('==== test ====')
episode_rewards = []
for episode in range(100):
episode_reward, elapsed_steps = play_episode(env, agent)
episode_rewards.append(episode_reward)
logging.info('test episode %d: reward = %.2f, steps = %d',
episode, episode_reward, elapsed_steps)
logging.info('average episode reward = %.2f ± %.2f',
np.mean(episode_rewards), np.std(episode_rewards))
22:21:32 [INFO] ==== train ==== 22:21:40 [INFO] train episode 0: reward = -200.00, steps = 200 22:21:51 [INFO] train episode 1: reward = -200.00, steps = 200 22:22:00 [INFO] train episode 2: reward = -200.00, steps = 200 22:22:12 [INFO] train episode 3: reward = -200.00, steps = 200 22:22:24 [INFO] train episode 4: reward = -200.00, steps = 200 22:22:36 [INFO] train episode 5: reward = -200.00, steps = 200 22:22:50 [INFO] train episode 6: reward = -200.00, steps = 200 22:23:04 [INFO] train episode 7: reward = -200.00, steps = 200 22:23:18 [INFO] train episode 8: reward = -200.00, steps = 200 22:23:33 [INFO] train episode 9: reward = -200.00, steps = 200 22:23:54 [INFO] train episode 10: reward = -200.00, steps = 200 22:24:16 [INFO] train episode 11: reward = -200.00, steps = 200 22:24:30 [INFO] train episode 12: reward = -200.00, steps = 200 22:24:52 [INFO] train episode 13: reward = -200.00, steps = 200 22:25:16 [INFO] train episode 14: reward = -200.00, steps = 200 22:25:32 [INFO] train episode 15: reward = -200.00, steps = 200 22:25:45 [INFO] train episode 16: reward = -200.00, steps = 200 22:25:59 [INFO] train episode 17: reward = -200.00, steps = 200 22:26:12 [INFO] train episode 18: reward = -200.00, steps = 200 22:26:25 [INFO] train episode 19: reward = -200.00, steps = 200 22:26:39 [INFO] train episode 20: reward = -200.00, steps = 200 22:26:54 [INFO] train episode 21: reward = -200.00, steps = 200 22:27:08 [INFO] train episode 22: reward = -200.00, steps = 200 22:27:21 [INFO] train episode 23: reward = -200.00, steps = 200 22:27:36 [INFO] train episode 24: reward = -200.00, steps = 200 22:27:51 [INFO] train episode 25: reward = -200.00, steps = 200 22:28:06 [INFO] train episode 26: reward = -200.00, steps = 200 22:28:20 [INFO] train episode 27: reward = -200.00, steps = 200 22:28:34 [INFO] train episode 28: reward = -200.00, steps = 200 22:28:49 [INFO] train episode 29: reward = -200.00, steps = 200 22:29:03 [INFO] train episode 30: reward = -200.00, steps = 200 22:29:18 [INFO] train episode 31: reward = -200.00, steps = 200 22:29:32 [INFO] train episode 32: reward = -200.00, steps = 200 22:29:44 [INFO] train episode 33: reward = -200.00, steps = 200 22:29:57 [INFO] train episode 34: reward = -200.00, steps = 200 22:30:10 [INFO] train episode 35: reward = -200.00, steps = 200 22:30:24 [INFO] train episode 36: reward = -200.00, steps = 200 22:30:44 [INFO] train episode 37: reward = -200.00, steps = 200 22:31:03 [INFO] train episode 38: reward = -200.00, steps = 200 22:31:16 [INFO] train episode 39: reward = -200.00, steps = 200 22:31:31 [INFO] train episode 40: reward = -200.00, steps = 200 22:31:46 [INFO] train episode 41: reward = -200.00, steps = 200 22:32:00 [INFO] train episode 42: reward = -200.00, steps = 200 22:32:16 [INFO] train episode 43: reward = -200.00, steps = 200 22:32:30 [INFO] train episode 44: reward = -200.00, steps = 200 22:32:44 [INFO] train episode 45: reward = -200.00, steps = 200 22:33:01 [INFO] train episode 46: reward = -200.00, steps = 200 22:33:36 [INFO] train episode 47: reward = -200.00, steps = 200 22:35:04 [INFO] train episode 48: reward = -200.00, steps = 200 22:36:48 [INFO] train episode 49: reward = -200.00, steps = 200 22:38:43 [INFO] train episode 50: reward = -200.00, steps = 200 22:40:51 [INFO] train episode 51: reward = -200.00, steps = 200 22:43:12 [INFO] train episode 52: reward = -200.00, steps = 200 22:45:32 [INFO] train episode 53: reward = -200.00, steps = 200 22:47:46 [INFO] train episode 54: reward = -200.00, steps = 200 22:49:48 [INFO] train episode 55: reward = -200.00, steps = 200 22:52:03 [INFO] train episode 56: reward = -200.00, steps = 200 22:54:16 [INFO] train episode 57: reward = -200.00, steps = 200 22:56:27 [INFO] train episode 58: reward = -200.00, steps = 200 22:58:36 [INFO] train episode 59: reward = -200.00, steps = 200 23:00:55 [INFO] train episode 60: reward = -200.00, steps = 200 23:03:22 [INFO] train episode 61: reward = -200.00, steps = 200 23:05:41 [INFO] train episode 62: reward = -200.00, steps = 200 23:07:57 [INFO] train episode 63: reward = -200.00, steps = 200 23:10:15 [INFO] train episode 64: reward = -200.00, steps = 200 23:12:59 [INFO] train episode 65: reward = -200.00, steps = 200 23:15:58 [INFO] train episode 66: reward = -200.00, steps = 200 23:18:54 [INFO] train episode 67: reward = -200.00, steps = 200 23:21:51 [INFO] train episode 68: reward = -200.00, steps = 200 23:24:52 [INFO] train episode 69: reward = -200.00, steps = 200 23:27:53 [INFO] train episode 70: reward = -200.00, steps = 200 23:30:54 [INFO] train episode 71: reward = -200.00, steps = 200 23:33:49 [INFO] train episode 72: reward = -200.00, steps = 200 23:36:48 [INFO] train episode 73: reward = -200.00, steps = 200 23:39:48 [INFO] train episode 74: reward = -200.00, steps = 200 23:42:58 [INFO] train episode 75: reward = -200.00, steps = 200 23:46:10 [INFO] train episode 76: reward = -200.00, steps = 200 23:49:28 [INFO] train episode 77: reward = -200.00, steps = 200 23:52:42 [INFO] train episode 78: reward = -200.00, steps = 200 23:55:46 [INFO] train episode 79: reward = -200.00, steps = 200 23:58:41 [INFO] train episode 80: reward = -200.00, steps = 200 00:01:36 [INFO] train episode 81: reward = -200.00, steps = 200 00:04:32 [INFO] train episode 82: reward = -200.00, steps = 200 00:07:24 [INFO] train episode 83: reward = -200.00, steps = 200 00:10:21 [INFO] train episode 84: reward = -200.00, steps = 200 00:13:15 [INFO] train episode 85: reward = -200.00, steps = 200 00:16:10 [INFO] train episode 86: reward = -200.00, steps = 200 00:19:04 [INFO] train episode 87: reward = -200.00, steps = 200 00:21:58 [INFO] train episode 88: reward = -200.00, steps = 200 00:24:52 [INFO] train episode 89: reward = -200.00, steps = 200 00:27:49 [INFO] train episode 90: reward = -200.00, steps = 200 00:30:42 [INFO] train episode 91: reward = -200.00, steps = 200 00:33:43 [INFO] train episode 92: reward = -200.00, steps = 200 00:36:36 [INFO] train episode 93: reward = -200.00, steps = 200 00:39:30 [INFO] train episode 94: reward = -200.00, steps = 200 00:42:25 [INFO] train episode 95: reward = -200.00, steps = 200 00:45:19 [INFO] train episode 96: reward = -200.00, steps = 200 00:48:11 [INFO] train episode 97: reward = -200.00, steps = 200 00:51:04 [INFO] train episode 98: reward = -200.00, steps = 200 00:53:57 [INFO] train episode 99: reward = -200.00, steps = 200 00:56:51 [INFO] train episode 100: reward = -200.00, steps = 200 00:59:47 [INFO] train episode 101: reward = -200.00, steps = 200 01:02:39 [INFO] train episode 102: reward = -200.00, steps = 200 01:05:33 [INFO] train episode 103: reward = -200.00, steps = 200 01:08:27 [INFO] train episode 104: reward = -200.00, steps = 200 01:11:20 [INFO] train episode 105: reward = -200.00, steps = 200 01:14:14 [INFO] train episode 106: reward = -200.00, steps = 200 01:17:06 [INFO] train episode 107: reward = -200.00, steps = 200 01:20:00 [INFO] train episode 108: reward = -200.00, steps = 200 01:22:54 [INFO] train episode 109: reward = -200.00, steps = 200 01:25:48 [INFO] train episode 110: reward = -200.00, steps = 200 01:28:43 [INFO] train episode 111: reward = -200.00, steps = 200 01:31:37 [INFO] train episode 112: reward = -200.00, steps = 200 01:34:31 [INFO] train episode 113: reward = -200.00, steps = 200 01:37:24 [INFO] train episode 114: reward = -200.00, steps = 200 01:40:18 [INFO] train episode 115: reward = -200.00, steps = 200 01:43:13 [INFO] train episode 116: reward = -200.00, steps = 200 01:46:08 [INFO] train episode 117: reward = -200.00, steps = 200 01:49:01 [INFO] train episode 118: reward = -200.00, steps = 200 01:51:53 [INFO] train episode 119: reward = -200.00, steps = 200 01:54:48 [INFO] train episode 120: reward = -200.00, steps = 200 01:57:41 [INFO] train episode 121: reward = -200.00, steps = 200 02:00:34 [INFO] train episode 122: reward = -200.00, steps = 200 02:03:26 [INFO] train episode 123: reward = -200.00, steps = 200 02:06:18 [INFO] train episode 124: reward = -200.00, steps = 200 02:09:10 [INFO] train episode 125: reward = -200.00, steps = 200 02:12:03 [INFO] train episode 126: reward = -200.00, steps = 200 02:14:57 [INFO] train episode 127: reward = -200.00, steps = 200 02:17:50 [INFO] train episode 128: reward = -200.00, steps = 200 02:20:42 [INFO] train episode 129: reward = -200.00, steps = 200 02:23:36 [INFO] train episode 130: reward = -200.00, steps = 200 02:26:31 [INFO] train episode 131: reward = -200.00, steps = 200 02:29:25 [INFO] train episode 132: reward = -200.00, steps = 200 02:32:18 [INFO] train episode 133: reward = -200.00, steps = 200 02:35:10 [INFO] train episode 134: reward = -200.00, steps = 200 02:38:03 [INFO] train episode 135: reward = -200.00, steps = 200 02:40:56 [INFO] train episode 136: reward = -200.00, steps = 200 02:43:48 [INFO] train episode 137: reward = -200.00, steps = 200 02:46:30 [INFO] train episode 138: reward = -186.00, steps = 186 02:49:23 [INFO] train episode 139: reward = -200.00, steps = 200 02:51:56 [INFO] train episode 140: reward = -174.00, steps = 174 02:54:51 [INFO] train episode 141: reward = -200.00, steps = 200 02:57:34 [INFO] train episode 142: reward = -200.00, steps = 200 03:00:08 [INFO] train episode 143: reward = -200.00, steps = 200 03:02:42 [INFO] train episode 144: reward = -200.00, steps = 200 03:05:17 [INFO] train episode 145: reward = -200.00, steps = 200 03:07:51 [INFO] train episode 146: reward = -200.00, steps = 200 03:10:25 [INFO] train episode 147: reward = -200.00, steps = 200 03:12:59 [INFO] train episode 148: reward = -200.00, steps = 200 03:15:33 [INFO] train episode 149: reward = -200.00, steps = 200 03:18:07 [INFO] train episode 150: reward = -200.00, steps = 200 03:20:38 [INFO] train episode 151: reward = -200.00, steps = 200 03:23:08 [INFO] train episode 152: reward = -200.00, steps = 200 03:25:37 [INFO] train episode 153: reward = -200.00, steps = 200 03:28:08 [INFO] train episode 154: reward = -200.00, steps = 200 03:30:39 [INFO] train episode 155: reward = -200.00, steps = 200 03:33:10 [INFO] train episode 156: reward = -200.00, steps = 200 03:35:39 [INFO] train episode 157: reward = -200.00, steps = 200 03:38:09 [INFO] train episode 158: reward = -200.00, steps = 200 03:40:40 [INFO] train episode 159: reward = -200.00, steps = 200 03:43:10 [INFO] train episode 160: reward = -200.00, steps = 200 03:45:38 [INFO] train episode 161: reward = -200.00, steps = 200 03:48:07 [INFO] train episode 162: reward = -200.00, steps = 200 03:50:38 [INFO] train episode 163: reward = -200.00, steps = 200 03:53:09 [INFO] train episode 164: reward = -200.00, steps = 200 03:55:40 [INFO] train episode 165: reward = -200.00, steps = 200 03:58:10 [INFO] train episode 166: reward = -200.00, steps = 200 03:59:26 [INFO] train episode 167: reward = -101.00, steps = 101 04:01:31 [INFO] train episode 168: reward = -165.00, steps = 165 04:04:00 [INFO] train episode 169: reward = -200.00, steps = 200 04:06:02 [INFO] train episode 170: reward = -165.00, steps = 165 04:07:38 [INFO] train episode 171: reward = -129.00, steps = 129 04:09:05 [INFO] train episode 172: reward = -113.00, steps = 113 04:11:11 [INFO] train episode 173: reward = -156.00, steps = 156 04:13:19 [INFO] train episode 174: reward = -172.00, steps = 172 04:14:54 [INFO] train episode 175: reward = -126.00, steps = 126 04:17:22 [INFO] train episode 176: reward = -200.00, steps = 200 04:19:44 [INFO] train episode 177: reward = -200.00, steps = 200 04:21:57 [INFO] train episode 178: reward = -200.00, steps = 200 04:23:35 [INFO] train episode 179: reward = -145.00, steps = 145 04:25:47 [INFO] train episode 180: reward = -200.00, steps = 200 04:27:23 [INFO] train episode 181: reward = -141.00, steps = 141 04:29:35 [INFO] train episode 182: reward = -200.00, steps = 200 04:31:46 [INFO] train episode 183: reward = -200.00, steps = 200 04:33:46 [INFO] train episode 184: reward = -200.00, steps = 200 04:35:45 [INFO] train episode 185: reward = -200.00, steps = 200 04:37:41 [INFO] train episode 186: reward = -200.00, steps = 200 04:39:35 [INFO] train episode 187: reward = -200.00, steps = 200 04:41:29 [INFO] train episode 188: reward = -200.00, steps = 200 04:43:23 [INFO] train episode 189: reward = -200.00, steps = 200 04:45:09 [INFO] train episode 190: reward = -185.00, steps = 185 04:47:03 [INFO] train episode 191: reward = -200.00, steps = 200 04:48:58 [INFO] train episode 192: reward = -200.00, steps = 200 04:50:49 [INFO] train episode 193: reward = -200.00, steps = 200 04:52:37 [INFO] train episode 194: reward = -200.00, steps = 200 04:54:24 [INFO] train episode 195: reward = -200.00, steps = 200 04:56:07 [INFO] train episode 196: reward = -200.00, steps = 200 04:57:50 [INFO] train episode 197: reward = -200.00, steps = 200 04:59:29 [INFO] train episode 198: reward = -192.00, steps = 192 05:01:06 [INFO] train episode 199: reward = -162.00, steps = 162 05:02:49 [INFO] train episode 200: reward = -200.00, steps = 200 05:03:41 [INFO] train episode 201: reward = -93.00, steps = 93 05:04:34 [INFO] train episode 202: reward = -102.00, steps = 102 05:06:19 [INFO] train episode 203: reward = -200.00, steps = 200 05:08:09 [INFO] train episode 204: reward = -200.00, steps = 200 05:09:54 [INFO] train episode 205: reward = -189.00, steps = 189 05:11:17 [INFO] train episode 206: reward = -149.00, steps = 149 05:13:03 [INFO] train episode 207: reward = -200.00, steps = 200 05:14:46 [INFO] train episode 208: reward = -200.00, steps = 200 05:16:30 [INFO] train episode 209: reward = -200.00, steps = 200 05:18:19 [INFO] train episode 210: reward = -200.00, steps = 200 05:19:05 [INFO] train episode 211: reward = -84.00, steps = 84 05:20:48 [INFO] train episode 212: reward = -200.00, steps = 200 05:21:33 [INFO] train episode 213: reward = -86.00, steps = 86 05:22:18 [INFO] train episode 214: reward = -86.00, steps = 86 05:23:27 [INFO] train episode 215: reward = -133.00, steps = 133 05:24:16 [INFO] train episode 216: reward = -94.00, steps = 94 05:25:02 [INFO] train episode 217: reward = -89.00, steps = 89 05:25:46 [INFO] train episode 218: reward = -84.00, steps = 84 05:26:41 [INFO] train episode 219: reward = -105.00, steps = 105 05:28:27 [INFO] train episode 220: reward = -200.00, steps = 200 05:29:30 [INFO] train episode 221: reward = -120.00, steps = 120 05:30:24 [INFO] train episode 222: reward = -106.00, steps = 106 05:31:13 [INFO] train episode 223: reward = -94.00, steps = 94 05:31:59 [INFO] train episode 224: reward = -86.00, steps = 86 05:32:52 [INFO] train episode 225: reward = -100.00, steps = 100 05:32:52 [INFO] ==== test ==== 05:33:02 [INFO] test episode 0: reward = -109.00, steps = 109 05:33:11 [INFO] test episode 1: reward = -104.00, steps = 104 05:33:20 [INFO] test episode 2: reward = -109.00, steps = 109 05:33:30 [INFO] test episode 3: reward = -109.00, steps = 109 05:33:39 [INFO] test episode 4: reward = -110.00, steps = 110 05:33:52 [INFO] test episode 5: reward = -150.00, steps = 150 05:34:00 [INFO] test episode 6: reward = -86.00, steps = 86 05:34:09 [INFO] test episode 7: reward = -107.00, steps = 107 05:34:18 [INFO] test episode 8: reward = -104.00, steps = 104 05:34:27 [INFO] test episode 9: reward = -104.00, steps = 104 05:34:35 [INFO] test episode 10: reward = -88.00, steps = 88 05:34:44 [INFO] test episode 11: reward = -104.00, steps = 104 05:34:53 [INFO] test episode 12: reward = -105.00, steps = 105 05:35:01 [INFO] test episode 13: reward = -104.00, steps = 104 05:35:09 [INFO] test episode 14: reward = -87.00, steps = 87 05:35:18 [INFO] test episode 15: reward = -106.00, steps = 106 05:35:28 [INFO] test episode 16: reward = -109.00, steps = 109 05:35:37 [INFO] test episode 17: reward = -104.00, steps = 104 05:35:46 [INFO] test episode 18: reward = -106.00, steps = 106 05:35:55 [INFO] test episode 19: reward = -102.00, steps = 102 05:36:02 [INFO] test episode 20: reward = -87.00, steps = 87 05:36:12 [INFO] test episode 21: reward = -107.00, steps = 107 05:36:21 [INFO] test episode 22: reward = -103.00, steps = 103 05:36:30 [INFO] test episode 23: reward = -104.00, steps = 104 05:36:39 [INFO] test episode 24: reward = -107.00, steps = 107 05:36:48 [INFO] test episode 25: reward = -103.00, steps = 103 05:36:57 [INFO] test episode 26: reward = -105.00, steps = 105 05:37:05 [INFO] test episode 27: reward = -99.00, steps = 99 05:37:15 [INFO] test episode 28: reward = -105.00, steps = 105 05:37:24 [INFO] test episode 29: reward = -104.00, steps = 104 05:37:32 [INFO] test episode 30: reward = -102.00, steps = 102 05:37:41 [INFO] test episode 31: reward = -103.00, steps = 103 05:37:51 [INFO] test episode 32: reward = -107.00, steps = 107 05:38:00 [INFO] test episode 33: reward = -110.00, steps = 110 05:38:09 [INFO] test episode 34: reward = -104.00, steps = 104 05:38:18 [INFO] test episode 35: reward = -108.00, steps = 108 05:38:26 [INFO] test episode 36: reward = -87.00, steps = 87 05:38:35 [INFO] test episode 37: reward = -105.00, steps = 105 05:38:44 [INFO] test episode 38: reward = -104.00, steps = 104 05:38:52 [INFO] test episode 39: reward = -91.00, steps = 91 05:39:00 [INFO] test episode 40: reward = -101.00, steps = 101 05:39:08 [INFO] test episode 41: reward = -87.00, steps = 87 05:39:15 [INFO] test episode 42: reward = -86.00, steps = 86 05:39:24 [INFO] test episode 43: reward = -105.00, steps = 105 05:39:32 [INFO] test episode 44: reward = -86.00, steps = 86 05:39:41 [INFO] test episode 45: reward = -106.00, steps = 106 05:39:50 [INFO] test episode 46: reward = -105.00, steps = 105 05:39:59 [INFO] test episode 47: reward = -106.00, steps = 106 05:40:07 [INFO] test episode 48: reward = -86.00, steps = 86 05:40:15 [INFO] test episode 49: reward = -99.00, steps = 99 05:40:24 [INFO] test episode 50: reward = -107.00, steps = 107 05:40:32 [INFO] test episode 51: reward = -88.00, steps = 88 05:40:39 [INFO] test episode 52: reward = -87.00, steps = 87 05:40:48 [INFO] test episode 53: reward = -104.00, steps = 104 05:40:58 [INFO] test episode 54: reward = -106.00, steps = 106 05:41:07 [INFO] test episode 55: reward = -104.00, steps = 104 05:41:16 [INFO] test episode 56: reward = -106.00, steps = 106 05:41:25 [INFO] test episode 57: reward = -109.00, steps = 109 05:41:32 [INFO] test episode 58: reward = -86.00, steps = 86 05:41:42 [INFO] test episode 59: reward = -106.00, steps = 106 05:41:51 [INFO] test episode 60: reward = -105.00, steps = 105 05:42:00 [INFO] test episode 61: reward = -104.00, steps = 104 05:42:09 [INFO] test episode 62: reward = -105.00, steps = 105 05:42:18 [INFO] test episode 63: reward = -108.00, steps = 108 05:42:27 [INFO] test episode 64: reward = -103.00, steps = 103 05:42:34 [INFO] test episode 65: reward = -87.00, steps = 87 05:42:43 [INFO] test episode 66: reward = -104.00, steps = 104 05:42:52 [INFO] test episode 67: reward = -106.00, steps = 106 05:43:00 [INFO] test episode 68: reward = -89.00, steps = 89 05:43:09 [INFO] test episode 69: reward = -107.00, steps = 107 05:43:18 [INFO] test episode 70: reward = -104.00, steps = 104 05:43:26 [INFO] test episode 71: reward = -87.00, steps = 87 05:43:33 [INFO] test episode 72: reward = -85.00, steps = 85 05:43:41 [INFO] test episode 73: reward = -87.00, steps = 87 05:43:48 [INFO] test episode 74: reward = -85.00, steps = 85 05:43:57 [INFO] test episode 75: reward = -104.00, steps = 104 05:44:06 [INFO] test episode 76: reward = -106.00, steps = 106 05:44:15 [INFO] test episode 77: reward = -102.00, steps = 102 05:44:24 [INFO] test episode 78: reward = -104.00, steps = 104 05:44:31 [INFO] test episode 79: reward = -85.00, steps = 85 05:44:40 [INFO] test episode 80: reward = -105.00, steps = 105 05:44:53 [INFO] test episode 81: reward = -150.00, steps = 150 05:45:02 [INFO] test episode 82: reward = -106.00, steps = 106 05:45:10 [INFO] test episode 83: reward = -85.00, steps = 85 05:45:19 [INFO] test episode 84: reward = -107.00, steps = 107 05:45:26 [INFO] test episode 85: reward = -87.00, steps = 87 05:45:34 [INFO] test episode 86: reward = -90.00, steps = 90 05:45:41 [INFO] test episode 87: reward = -86.00, steps = 86 05:45:50 [INFO] test episode 88: reward = -103.00, steps = 103 05:45:59 [INFO] test episode 89: reward = -107.00, steps = 107 05:46:08 [INFO] test episode 90: reward = -107.00, steps = 107 05:46:18 [INFO] test episode 91: reward = -105.00, steps = 105 05:46:27 [INFO] test episode 92: reward = -105.00, steps = 105 05:46:36 [INFO] test episode 93: reward = -108.00, steps = 108 05:46:45 [INFO] test episode 94: reward = -106.00, steps = 106 05:46:54 [INFO] test episode 95: reward = -108.00, steps = 108 05:47:03 [INFO] test episode 96: reward = -106.00, steps = 106 05:47:11 [INFO] test episode 97: reward = -87.00, steps = 87 05:47:24 [INFO] test episode 98: reward = -150.00, steps = 150 05:47:33 [INFO] test episode 99: reward = -102.00, steps = 102 05:47:33 [INFO] average episode reward = -101.99 ± 11.75
env.close()