File tree Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -36,15 +36,22 @@ def play_game(grid, policy):
36
36
# but r(t) results from taking action a(t-1) from s(t-1) and landing in s(t)
37
37
states_actions_rewards = [(s , a , 0 )]
38
38
seen_states = set ()
39
+ seen_states .add (grid .current_state ())
40
+ num_steps = 0
39
41
while True :
40
- old_s = grid .current_state ()
41
42
r = grid .move (a )
43
+ num_steps += 1
42
44
s = grid .current_state ()
43
45
44
46
if s in seen_states :
45
47
# hack so that we don't end up in an infinitely long episode
46
48
# bumping into the wall repeatedly
47
- states_actions_rewards .append ((s , None , - 100 ))
49
+ # if num_steps == 1 -> bumped into a wall and haven't moved anywhere
50
+ # reward = -10
51
+ # else:
52
+ # reward = falls off by 1 / num_steps
53
+ reward = - 10. / num_steps
54
+ states_actions_rewards .append ((s , None , reward ))
48
55
break
49
56
elif grid .game_over ():
50
57
states_actions_rewards .append ((s , None , r ))
You can’t perform that action at this time.
0 commit comments