TensorFlow version
%matplotlib inline
import sys
import logging
import itertools
import numpy as np
np.random.seed(0)
import pandas as pd
import gym
import matplotlib.pyplot as plt
import tensorflow.compat.v2 as tf
from tensorflow import nn
from tensorflow import losses
from tensorflow import optimizers
from tensorflow import keras
from tensorflow.keras import layers
logging.basicConfig(level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
stream=sys.stdout, datefmt='%H:%M:%S')
env = gym.make('MountainCar-v0')
for key in vars(env):
logging.info('%s: %s', key, vars(env)[key])
for key in vars(env.spec):
logging.info('%s: %s', key, vars(env.spec)[key])
22:22:06 [INFO] env: <MountainCarEnv<MountainCar-v0>> 22:22:06 [INFO] action_space: Discrete(3) 22:22:06 [INFO] observation_space: Box(-1.2000000476837158, 0.6000000238418579, (2,), float32) 22:22:06 [INFO] reward_range: (-inf, inf) 22:22:06 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 30} 22:22:06 [INFO] _max_episode_steps: 200 22:22:06 [INFO] _elapsed_steps: None 22:22:06 [INFO] id: MountainCar-v0 22:22:06 [INFO] entry_point: gym.envs.classic_control:MountainCarEnv 22:22:06 [INFO] reward_threshold: -110.0 22:22:06 [INFO] nondeterministic: False 22:22:06 [INFO] max_episode_steps: 200 22:22:06 [INFO] _kwargs: {} 22:22:06 [INFO] _env_name: MountainCar
class DQNReplayer:
def __init__(self, capacity):
self.memory = pd.DataFrame(index=range(capacity),
columns=['state', 'action', 'reward', 'next_state', 'terminated'])
self.i = 0
self.count = 0
self.capacity = capacity
def store(self, *args):
self.memory.loc[self.i] = np.asarray(args, dtype=object)
self.i = (self.i + 1) % self.capacity
self.count = min(self.count + 1, self.capacity)
def sample(self, size):
indices = np.random.choice(self.count, size=size)
return (np.stack(self.memory.loc[indices, field]) for field in
self.memory.columns)
class DuelNet(keras.Model):
def __init__(self, input_size, output_size):
super().__init__()
self.common_net = keras.Sequential([
layers.Dense(64, input_shape=(input_size,), activation=nn.relu)])
self.advantage_net = keras.Sequential([
layers.Dense(32, input_shape=(64,), activation=nn.relu),
layers.Dense(output_size)])
self.v_net = keras.Sequential([
layers.Dense(32, input_shape=(64,), activation=nn.relu),
layers.Dense(1)])
def call(self, s):
h = self.common_net(s)
adv = self.advantage_net(h)
adv = adv - tf.math.reduce_mean(adv, axis=1, keepdims=True)
v = self.v_net(h)
q = v + adv
return q
class DuelDQNAgent:
def __init__(self, env):
self.action_n = env.action_space.n
self.gamma = 0.99
self.replayer = DQNReplayer(10000)
self.evaluate_net = self.build_net(
input_size=env.observation_space.shape[0],
output_size=self.action_n)
self.target_net = self.build_net(
input_size=env.observation_space.shape[0],
output_size=self.action_n)
def build_net(self, input_size, output_size):
net = DuelNet(input_size=input_size, output_size=output_size)
optimizer = optimizers.Adam(0.001)
net.compile(loss=losses.mse, optimizer=optimizer)
return net
def reset(self, mode=None):
self.mode = mode
if self.mode == 'train':
self.trajectory = []
self.target_net.set_weights(self.evaluate_net.get_weights())
def step(self, observation, reward, terminated):
if self.mode == 'train' and np.random.rand() < 0.001:
# epsilon-greedy policy in train mode
action = np.random.randint(self.action_n)
else:
qs = self.evaluate_net.predict(observation[np.newaxis], verbose=0)
action = np.argmax(qs)
if self.mode == 'train':
self.trajectory += [observation, reward, terminated, action]
if len(self.trajectory) >= 8:
state, _, _, act, next_state, reward, terminated, _ = \
self.trajectory[-8:]
self.replayer.store(state, act, reward, next_state, terminated)
if self.replayer.count >= self.replayer.capacity * 0.95:
# skip first few episodes for speed
self.learn()
return action
def close(self):
pass
def learn(self):
# replay
states, actions, rewards, next_states, terminateds = \
self.replayer.sample(1024)
# update value net
next_eval_qs = self.evaluate_net.predict(next_states, verbose=0)
next_actions = next_eval_qs.argmax(axis=-1)
next_qs = self.target_net.predict(next_states, verbose=0)
next_max_qs = next_qs[np.arange(next_qs.shape[0]), next_actions]
us = rewards + self.gamma * next_max_qs * (1. - terminateds)
targets = self.evaluate_net.predict(states, verbose=0)
targets[np.arange(us.shape[0]), actions] = us
self.evaluate_net.fit(states, targets, verbose=0)
agent = DuelDQNAgent(env)
def play_episode(env, agent, seed=None, mode=None, render=False):
observation, _ = env.reset(seed=seed)
reward, terminated, truncated = 0., False, False
agent.reset(mode=mode)
episode_reward, elapsed_steps = 0., 0
while True:
action = agent.step(observation, reward, terminated)
if render:
env.render()
if terminated or truncated:
break
observation, reward, terminated, truncated, _ = env.step(action)
episode_reward += reward
elapsed_steps += 1
agent.close()
return episode_reward, elapsed_steps
logging.info('==== train ====')
episode_rewards = []
for episode in itertools.count():
episode_reward, elapsed_steps = play_episode(env, agent, seed=episode,
mode='train')
episode_rewards.append(episode_reward)
logging.info('train episode %d: reward = %.2f, steps = %d',
episode, episode_reward, elapsed_steps)
if np.mean(episode_rewards[-10:]) > -110:
break
plt.plot(episode_rewards)
logging.info('==== test ====')
episode_rewards = []
for episode in range(100):
episode_reward, elapsed_steps = play_episode(env, agent)
episode_rewards.append(episode_reward)
logging.info('test episode %d: reward = %.2f, steps = %d',
episode, episode_reward, elapsed_steps)
logging.info('average episode reward = %.2f ± %.2f',
np.mean(episode_rewards), np.std(episode_rewards))
22:22:07 [INFO] ==== train ==== 22:22:18 [INFO] train episode 0: reward = -200.00, steps = 200 22:22:32 [INFO] train episode 1: reward = -200.00, steps = 200 22:22:44 [INFO] train episode 2: reward = -200.00, steps = 200 22:22:59 [INFO] train episode 3: reward = -200.00, steps = 200 22:23:13 [INFO] train episode 4: reward = -200.00, steps = 200 22:23:27 [INFO] train episode 5: reward = -200.00, steps = 200 22:23:46 [INFO] train episode 6: reward = -200.00, steps = 200 22:24:08 [INFO] train episode 7: reward = -200.00, steps = 200 22:24:25 [INFO] train episode 8: reward = -200.00, steps = 200 22:24:42 [INFO] train episode 9: reward = -200.00, steps = 200 22:25:09 [INFO] train episode 10: reward = -200.00, steps = 200 22:25:26 [INFO] train episode 11: reward = -200.00, steps = 200 22:25:40 [INFO] train episode 12: reward = -200.00, steps = 200 22:25:54 [INFO] train episode 13: reward = -200.00, steps = 200 22:26:07 [INFO] train episode 14: reward = -200.00, steps = 200 22:26:20 [INFO] train episode 15: reward = -200.00, steps = 200 22:26:34 [INFO] train episode 16: reward = -200.00, steps = 200 22:26:49 [INFO] train episode 17: reward = -200.00, steps = 200 22:27:03 [INFO] train episode 18: reward = -200.00, steps = 200 22:27:16 [INFO] train episode 19: reward = -200.00, steps = 200 22:27:31 [INFO] train episode 20: reward = -200.00, steps = 200 22:27:46 [INFO] train episode 21: reward = -200.00, steps = 200 22:28:00 [INFO] train episode 22: reward = -200.00, steps = 200 22:28:15 [INFO] train episode 23: reward = -200.00, steps = 200 22:28:30 [INFO] train episode 24: reward = -200.00, steps = 200 22:28:44 [INFO] train episode 25: reward = -200.00, steps = 200 22:28:59 [INFO] train episode 26: reward = -200.00, steps = 200 22:29:13 [INFO] train episode 27: reward = -200.00, steps = 200 22:29:27 [INFO] train episode 28: reward = -200.00, steps = 200 22:29:40 [INFO] train episode 29: reward = -200.00, steps = 200 22:29:53 [INFO] train episode 30: reward = -200.00, steps = 200 22:30:06 [INFO] train episode 31: reward = -200.00, steps = 200 22:30:19 [INFO] train episode 32: reward = -200.00, steps = 200 22:30:37 [INFO] train episode 33: reward = -200.00, steps = 200 22:30:58 [INFO] train episode 34: reward = -200.00, steps = 200 22:31:12 [INFO] train episode 35: reward = -200.00, steps = 200 22:31:26 [INFO] train episode 36: reward = -200.00, steps = 200 22:31:41 [INFO] train episode 37: reward = -200.00, steps = 200 22:31:55 [INFO] train episode 38: reward = -200.00, steps = 200 22:32:12 [INFO] train episode 39: reward = -200.00, steps = 200 22:32:26 [INFO] train episode 40: reward = -200.00, steps = 200 22:32:40 [INFO] train episode 41: reward = -200.00, steps = 200 22:32:55 [INFO] train episode 42: reward = -200.00, steps = 200 22:33:12 [INFO] train episode 43: reward = -200.00, steps = 200 22:33:26 [INFO] train episode 44: reward = -200.00, steps = 200 22:33:40 [INFO] train episode 45: reward = -200.00, steps = 200 22:33:53 [INFO] train episode 46: reward = -200.00, steps = 200 22:34:29 [INFO] train episode 47: reward = -200.00, steps = 200 22:36:05 [INFO] train episode 48: reward = -200.00, steps = 200 22:38:02 [INFO] train episode 49: reward = -200.00, steps = 200 22:40:03 [INFO] train episode 50: reward = -200.00, steps = 200 22:42:20 [INFO] train episode 51: reward = -200.00, steps = 200 22:44:42 [INFO] train episode 52: reward = -200.00, steps = 200 22:47:03 [INFO] train episode 53: reward = -200.00, steps = 200 22:49:07 [INFO] train episode 54: reward = -200.00, steps = 200 22:51:20 [INFO] train episode 55: reward = -200.00, steps = 200 22:53:29 [INFO] train episode 56: reward = -200.00, steps = 200 22:55:46 [INFO] train episode 57: reward = -200.00, steps = 200 22:56:43 [INFO] train episode 58: reward = -87.00, steps = 87 22:58:53 [INFO] train episode 59: reward = -200.00, steps = 200 23:01:17 [INFO] train episode 60: reward = -200.00, steps = 200 23:03:43 [INFO] train episode 61: reward = -200.00, steps = 200 23:06:02 [INFO] train episode 62: reward = -200.00, steps = 200 23:08:17 [INFO] train episode 63: reward = -200.00, steps = 200 23:10:37 [INFO] train episode 64: reward = -200.00, steps = 200 23:13:29 [INFO] train episode 65: reward = -200.00, steps = 200 23:15:04 [INFO] train episode 66: reward = -104.00, steps = 104 23:18:01 [INFO] train episode 67: reward = -200.00, steps = 200 23:19:31 [INFO] train episode 68: reward = -101.00, steps = 101 23:22:29 [INFO] train episode 69: reward = -200.00, steps = 200 23:25:31 [INFO] train episode 70: reward = -200.00, steps = 200 23:28:31 [INFO] train episode 71: reward = -200.00, steps = 200 23:31:33 [INFO] train episode 72: reward = -200.00, steps = 200 23:34:29 [INFO] train episode 73: reward = -200.00, steps = 200 23:37:26 [INFO] train episode 74: reward = -200.00, steps = 200 23:40:30 [INFO] train episode 75: reward = -200.00, steps = 200 23:43:46 [INFO] train episode 76: reward = -200.00, steps = 200 23:45:09 [INFO] train episode 77: reward = -89.00, steps = 89 23:48:31 [INFO] train episode 78: reward = -200.00, steps = 200 23:51:46 [INFO] train episode 79: reward = -200.00, steps = 200 23:54:56 [INFO] train episode 80: reward = -200.00, steps = 200 23:57:55 [INFO] train episode 81: reward = -200.00, steps = 200 00:00:52 [INFO] train episode 82: reward = -200.00, steps = 200 00:03:49 [INFO] train episode 83: reward = -200.00, steps = 200 00:06:43 [INFO] train episode 84: reward = -200.00, steps = 200 00:09:39 [INFO] train episode 85: reward = -200.00, steps = 200 00:12:35 [INFO] train episode 86: reward = -200.00, steps = 200 00:15:31 [INFO] train episode 87: reward = -200.00, steps = 200 00:18:26 [INFO] train episode 88: reward = -200.00, steps = 200 00:21:22 [INFO] train episode 89: reward = -200.00, steps = 200 00:24:18 [INFO] train episode 90: reward = -200.00, steps = 200 00:27:18 [INFO] train episode 91: reward = -200.00, steps = 200 00:30:13 [INFO] train episode 92: reward = -200.00, steps = 200 00:33:14 [INFO] train episode 93: reward = -200.00, steps = 200 00:36:09 [INFO] train episode 94: reward = -200.00, steps = 200 00:39:06 [INFO] train episode 95: reward = -200.00, steps = 200 00:42:05 [INFO] train episode 96: reward = -200.00, steps = 200 00:45:00 [INFO] train episode 97: reward = -200.00, steps = 200 00:47:54 [INFO] train episode 98: reward = -200.00, steps = 200 00:50:48 [INFO] train episode 99: reward = -200.00, steps = 200 00:53:43 [INFO] train episode 100: reward = -200.00, steps = 200 00:56:39 [INFO] train episode 101: reward = -200.00, steps = 200 00:59:34 [INFO] train episode 102: reward = -200.00, steps = 200 01:02:29 [INFO] train episode 103: reward = -200.00, steps = 200 01:05:23 [INFO] train episode 104: reward = -200.00, steps = 200 01:08:18 [INFO] train episode 105: reward = -200.00, steps = 200 01:11:13 [INFO] train episode 106: reward = -200.00, steps = 200 01:14:08 [INFO] train episode 107: reward = -200.00, steps = 200 01:17:03 [INFO] train episode 108: reward = -200.00, steps = 200 01:19:56 [INFO] train episode 109: reward = -200.00, steps = 200 01:22:51 [INFO] train episode 110: reward = -200.00, steps = 200 01:25:44 [INFO] train episode 111: reward = -200.00, steps = 200 01:28:42 [INFO] train episode 112: reward = -200.00, steps = 200 01:31:37 [INFO] train episode 113: reward = -200.00, steps = 200 01:34:33 [INFO] train episode 114: reward = -200.00, steps = 200 01:37:02 [INFO] train episode 115: reward = -171.00, steps = 171 01:38:50 [INFO] train episode 116: reward = -122.00, steps = 122 01:41:46 [INFO] train episode 117: reward = -200.00, steps = 200 01:44:34 [INFO] train episode 118: reward = -191.00, steps = 191 01:47:05 [INFO] train episode 119: reward = -173.00, steps = 173 01:49:59 [INFO] train episode 120: reward = -200.00, steps = 200 01:52:54 [INFO] train episode 121: reward = -200.00, steps = 200 01:55:48 [INFO] train episode 122: reward = -200.00, steps = 200 01:58:35 [INFO] train episode 123: reward = -190.00, steps = 190 02:01:23 [INFO] train episode 124: reward = -192.00, steps = 192 02:04:16 [INFO] train episode 125: reward = -200.00, steps = 200 02:07:10 [INFO] train episode 126: reward = -200.00, steps = 200 02:10:05 [INFO] train episode 127: reward = -200.00, steps = 200 02:13:00 [INFO] train episode 128: reward = -200.00, steps = 200 02:15:54 [INFO] train episode 129: reward = -200.00, steps = 200 02:18:47 [INFO] train episode 130: reward = -200.00, steps = 200 02:21:41 [INFO] train episode 131: reward = -200.00, steps = 200 02:24:34 [INFO] train episode 132: reward = -200.00, steps = 200 02:27:30 [INFO] train episode 133: reward = -200.00, steps = 200 02:29:13 [INFO] train episode 134: reward = -117.00, steps = 117 02:30:45 [INFO] train episode 135: reward = -105.00, steps = 105 02:33:38 [INFO] train episode 136: reward = -200.00, steps = 200 02:35:18 [INFO] train episode 137: reward = -114.00, steps = 114 02:38:13 [INFO] train episode 138: reward = -200.00, steps = 200 02:39:58 [INFO] train episode 139: reward = -118.00, steps = 118 02:42:56 [INFO] train episode 140: reward = -200.00, steps = 200 02:44:47 [INFO] train episode 141: reward = -127.00, steps = 127 02:47:04 [INFO] train episode 142: reward = -157.00, steps = 157 02:48:44 [INFO] train episode 143: reward = -114.00, steps = 114 02:51:27 [INFO] train episode 144: reward = -185.00, steps = 185 02:53:04 [INFO] train episode 145: reward = -110.00, steps = 110 02:54:37 [INFO] train episode 146: reward = -106.00, steps = 106 02:56:13 [INFO] train episode 147: reward = -108.00, steps = 108 02:57:28 [INFO] train episode 148: reward = -96.00, steps = 96 02:58:37 [INFO] train episode 149: reward = -87.00, steps = 87 03:00:33 [INFO] train episode 150: reward = -150.00, steps = 150 03:02:10 [INFO] train episode 151: reward = -122.00, steps = 122 03:04:41 [INFO] train episode 152: reward = -194.00, steps = 194 03:06:02 [INFO] train episode 153: reward = -104.00, steps = 104 03:07:34 [INFO] train episode 154: reward = -116.00, steps = 116 03:10:09 [INFO] train episode 155: reward = -200.00, steps = 200 03:11:22 [INFO] train episode 156: reward = -93.00, steps = 93 03:13:18 [INFO] train episode 157: reward = -149.00, steps = 149 03:15:12 [INFO] train episode 158: reward = -147.00, steps = 147 03:17:47 [INFO] train episode 159: reward = -200.00, steps = 200 03:20:21 [INFO] train episode 160: reward = -200.00, steps = 200 03:21:27 [INFO] train episode 161: reward = -86.00, steps = 86 03:23:28 [INFO] train episode 162: reward = -161.00, steps = 161 03:24:35 [INFO] train episode 163: reward = -87.00, steps = 87 03:27:08 [INFO] train episode 164: reward = -200.00, steps = 200 03:29:14 [INFO] train episode 165: reward = -166.00, steps = 166 03:31:46 [INFO] train episode 166: reward = -200.00, steps = 200 03:32:53 [INFO] train episode 167: reward = -88.00, steps = 88 03:34:09 [INFO] train episode 168: reward = -100.00, steps = 100 03:36:39 [INFO] train episode 169: reward = -200.00, steps = 200 03:39:11 [INFO] train episode 170: reward = -200.00, steps = 200 03:41:41 [INFO] train episode 171: reward = -200.00, steps = 200 03:42:44 [INFO] train episode 172: reward = -83.00, steps = 83 03:44:30 [INFO] train episode 173: reward = -140.00, steps = 140 03:45:38 [INFO] train episode 174: reward = -90.00, steps = 90 03:47:33 [INFO] train episode 175: reward = -151.00, steps = 151 03:49:37 [INFO] train episode 176: reward = -163.00, steps = 163 03:50:42 [INFO] train episode 177: reward = -85.00, steps = 85 03:52:21 [INFO] train episode 178: reward = -131.00, steps = 131 03:53:45 [INFO] train episode 179: reward = -111.00, steps = 111 03:55:13 [INFO] train episode 180: reward = -115.00, steps = 115 03:56:49 [INFO] train episode 181: reward = -127.00, steps = 127 03:58:33 [INFO] train episode 182: reward = -136.00, steps = 136 04:00:03 [INFO] train episode 183: reward = -120.00, steps = 120 04:01:36 [INFO] train episode 184: reward = -123.00, steps = 123 04:03:08 [INFO] train episode 185: reward = -121.00, steps = 121 04:04:33 [INFO] train episode 186: reward = -113.00, steps = 113 04:06:02 [INFO] train episode 187: reward = -119.00, steps = 119 04:07:31 [INFO] train episode 188: reward = -118.00, steps = 118 04:08:58 [INFO] train episode 189: reward = -115.00, steps = 115 04:10:42 [INFO] train episode 190: reward = -124.00, steps = 124 04:12:09 [INFO] train episode 191: reward = -115.00, steps = 115 04:13:37 [INFO] train episode 192: reward = -116.00, steps = 116 04:15:06 [INFO] train episode 193: reward = -118.00, steps = 118 04:16:33 [INFO] train episode 194: reward = -115.00, steps = 115 04:17:55 [INFO] train episode 195: reward = -109.00, steps = 109 04:19:17 [INFO] train episode 196: reward = -111.00, steps = 111 04:20:35 [INFO] train episode 197: reward = -116.00, steps = 116 04:21:32 [INFO] train episode 198: reward = -84.00, steps = 84 04:23:29 [INFO] train episode 199: reward = -174.00, steps = 174 04:24:48 [INFO] train episode 200: reward = -119.00, steps = 119 04:26:01 [INFO] train episode 201: reward = -108.00, steps = 108 04:27:04 [INFO] train episode 202: reward = -92.00, steps = 92 04:28:14 [INFO] train episode 203: reward = -105.00, steps = 105 04:29:27 [INFO] train episode 204: reward = -109.00, steps = 109 04:30:36 [INFO] train episode 205: reward = -102.00, steps = 102 04:31:33 [INFO] train episode 206: reward = -85.00, steps = 85 04:31:33 [INFO] ==== test ==== 04:31:44 [INFO] test episode 0: reward = -106.00, steps = 106 04:31:56 [INFO] test episode 1: reward = -106.00, steps = 106 04:32:07 [INFO] test episode 2: reward = -106.00, steps = 106 04:32:18 [INFO] test episode 3: reward = -106.00, steps = 106 04:32:28 [INFO] test episode 4: reward = -88.00, steps = 88 04:32:40 [INFO] test episode 5: reward = -105.00, steps = 105 04:32:53 [INFO] test episode 6: reward = -128.00, steps = 128 04:33:03 [INFO] test episode 7: reward = -90.00, steps = 90 04:33:14 [INFO] test episode 8: reward = -106.00, steps = 106 04:33:26 [INFO] test episode 9: reward = -104.00, steps = 104 04:33:40 [INFO] test episode 10: reward = -138.00, steps = 138 04:33:50 [INFO] test episode 11: reward = -87.00, steps = 87 04:34:02 [INFO] test episode 12: reward = -106.00, steps = 106 04:34:13 [INFO] test episode 13: reward = -106.00, steps = 106 04:34:24 [INFO] test episode 14: reward = -106.00, steps = 106 04:34:36 [INFO] test episode 15: reward = -105.00, steps = 105 04:34:47 [INFO] test episode 16: reward = -104.00, steps = 104 04:34:56 [INFO] test episode 17: reward = -91.00, steps = 91 04:35:08 [INFO] test episode 18: reward = -103.00, steps = 103 04:35:19 [INFO] test episode 19: reward = -107.00, steps = 107 04:35:30 [INFO] test episode 20: reward = -106.00, steps = 106 04:35:42 [INFO] test episode 21: reward = -106.00, steps = 106 04:35:53 [INFO] test episode 22: reward = -106.00, steps = 106 04:36:04 [INFO] test episode 23: reward = -106.00, steps = 106 04:36:16 [INFO] test episode 24: reward = -106.00, steps = 106 04:36:26 [INFO] test episode 25: reward = -91.00, steps = 91 04:36:38 [INFO] test episode 26: reward = -106.00, steps = 106 04:36:49 [INFO] test episode 27: reward = -106.00, steps = 106 04:36:59 [INFO] test episode 28: reward = -105.00, steps = 105 04:37:13 [INFO] test episode 29: reward = -135.00, steps = 135 04:37:24 [INFO] test episode 30: reward = -107.00, steps = 107 04:37:35 [INFO] test episode 31: reward = -105.00, steps = 105 04:37:46 [INFO] test episode 32: reward = -106.00, steps = 106 04:37:59 [INFO] test episode 33: reward = -128.00, steps = 128 04:38:10 [INFO] test episode 34: reward = -105.00, steps = 105 04:38:21 [INFO] test episode 35: reward = -107.00, steps = 107 04:38:32 [INFO] test episode 36: reward = -107.00, steps = 107 04:38:43 [INFO] test episode 37: reward = -105.00, steps = 105 04:38:53 [INFO] test episode 38: reward = -104.00, steps = 104 04:39:02 [INFO] test episode 39: reward = -88.00, steps = 88 04:39:13 [INFO] test episode 40: reward = -106.00, steps = 106 04:39:24 [INFO] test episode 41: reward = -104.00, steps = 104 04:39:35 [INFO] test episode 42: reward = -106.00, steps = 106 04:39:46 [INFO] test episode 43: reward = -105.00, steps = 105 04:39:57 [INFO] test episode 44: reward = -105.00, steps = 105 04:40:08 [INFO] test episode 45: reward = -105.00, steps = 105 04:40:18 [INFO] test episode 46: reward = -104.00, steps = 104 04:40:29 [INFO] test episode 47: reward = -105.00, steps = 105 04:40:40 [INFO] test episode 48: reward = -106.00, steps = 106 04:40:51 [INFO] test episode 49: reward = -104.00, steps = 104 04:41:01 [INFO] test episode 50: reward = -105.00, steps = 105 04:41:12 [INFO] test episode 51: reward = -106.00, steps = 106 04:41:23 [INFO] test episode 52: reward = -106.00, steps = 106 04:41:34 [INFO] test episode 53: reward = -106.00, steps = 106 04:41:45 [INFO] test episode 54: reward = -106.00, steps = 106 04:41:58 [INFO] test episode 55: reward = -128.00, steps = 128 04:42:09 [INFO] test episode 56: reward = -105.00, steps = 105 04:42:20 [INFO] test episode 57: reward = -105.00, steps = 105 04:42:31 [INFO] test episode 58: reward = -104.00, steps = 104 04:42:42 [INFO] test episode 59: reward = -104.00, steps = 104 04:42:55 [INFO] test episode 60: reward = -128.00, steps = 128 04:43:04 [INFO] test episode 61: reward = -88.00, steps = 88 04:43:15 [INFO] test episode 62: reward = -106.00, steps = 106 04:43:25 [INFO] test episode 63: reward = -87.00, steps = 87 04:43:36 [INFO] test episode 64: reward = -107.00, steps = 107 04:43:46 [INFO] test episode 65: reward = -105.00, steps = 105 04:43:57 [INFO] test episode 66: reward = -106.00, steps = 106 04:44:06 [INFO] test episode 67: reward = -88.00, steps = 88 04:44:17 [INFO] test episode 68: reward = -104.00, steps = 104 04:44:28 [INFO] test episode 69: reward = -105.00, steps = 105 04:44:42 [INFO] test episode 70: reward = -133.00, steps = 133 04:44:51 [INFO] test episode 71: reward = -92.00, steps = 92 04:45:02 [INFO] test episode 72: reward = -107.00, steps = 107 04:45:13 [INFO] test episode 73: reward = -107.00, steps = 107 04:45:24 [INFO] test episode 74: reward = -105.00, steps = 105 04:45:35 [INFO] test episode 75: reward = -105.00, steps = 105 04:45:46 [INFO] test episode 76: reward = -107.00, steps = 107 04:45:55 [INFO] test episode 77: reward = -87.00, steps = 87 04:46:06 [INFO] test episode 78: reward = -105.00, steps = 105 04:46:17 [INFO] test episode 79: reward = -106.00, steps = 106 04:46:28 [INFO] test episode 80: reward = -106.00, steps = 106 04:46:39 [INFO] test episode 81: reward = -105.00, steps = 105 04:46:50 [INFO] test episode 82: reward = -106.00, steps = 106 04:47:01 [INFO] test episode 83: reward = -104.00, steps = 104 04:47:10 [INFO] test episode 84: reward = -90.00, steps = 90 04:47:21 [INFO] test episode 85: reward = -107.00, steps = 107 04:47:32 [INFO] test episode 86: reward = -107.00, steps = 107 04:47:43 [INFO] test episode 87: reward = -103.00, steps = 103 04:47:54 [INFO] test episode 88: reward = -106.00, steps = 106 04:48:05 [INFO] test episode 89: reward = -106.00, steps = 106 04:48:18 [INFO] test episode 90: reward = -127.00, steps = 127 04:48:27 [INFO] test episode 91: reward = -88.00, steps = 88 04:48:40 [INFO] test episode 92: reward = -127.00, steps = 127 04:48:50 [INFO] test episode 93: reward = -88.00, steps = 88 04:49:00 [INFO] test episode 94: reward = -106.00, steps = 106 04:49:11 [INFO] test episode 95: reward = -105.00, steps = 105 04:49:22 [INFO] test episode 96: reward = -104.00, steps = 104 04:49:33 [INFO] test episode 97: reward = -107.00, steps = 107 04:49:42 [INFO] test episode 98: reward = -88.00, steps = 88 04:49:53 [INFO] test episode 99: reward = -105.00, steps = 105 04:49:53 [INFO] average episode reward = -105.21 ± 9.98
env.close()