Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 94c4328

Browse files
committedAug 4, 2018
tune monte carlo es loop penalty:
1 parent 3a07285 commit 94c4328

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed
 

‎rl/monte_carlo_es.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,22 @@ def play_game(grid, policy):
3636
# but r(t) results from taking action a(t-1) from s(t-1) and landing in s(t)
3737
states_actions_rewards = [(s, a, 0)]
3838
seen_states = set()
39+
seen_states.add(grid.current_state())
40+
num_steps = 0
3941
while True:
40-
old_s = grid.current_state()
4142
r = grid.move(a)
43+
num_steps += 1
4244
s = grid.current_state()
4345

4446
if s in seen_states:
4547
# hack so that we don't end up in an infinitely long episode
4648
# bumping into the wall repeatedly
47-
states_actions_rewards.append((s, None, -100))
49+
# if num_steps == 1 -> bumped into a wall and haven't moved anywhere
50+
# reward = -10
51+
# else:
52+
# reward = falls off by 1 / num_steps
53+
reward = -10. / num_steps
54+
states_actions_rewards.append((s, None, reward))
4855
break
4956
elif grid.game_over():
5057
states_actions_rewards.append((s, None, r))

0 commit comments

Comments
 (0)
Please sign in to comment.