|
27 | 27 | from sklearn.kernel_approximation import RBFSampler
|
28 | 28 | from sklearn.linear_model import SGDRegressor
|
29 | 29 |
|
30 |
| -gym_minor_version = int(gym.__version__.split('.')[1]) |
31 |
| -if gym_minor_version >= 19: |
32 |
| - exit("Please install OpenAI Gym 0.19.0 or earlier") |
33 |
| - |
34 | 30 |
|
35 | 31 | # SGDRegressor defaults:
|
36 | 32 | # loss='squared_loss', penalty='l2', alpha=0.0001,
|
@@ -74,7 +70,7 @@ def __init__(self, env, feature_transformer, learning_rate):
|
74 | 70 | self.feature_transformer = feature_transformer
|
75 | 71 | for i in range(env.action_space.n):
|
76 | 72 | model = SGDRegressor(learning_rate=learning_rate)
|
77 |
| - model.partial_fit(feature_transformer.transform( [env.reset()] ), [0]) |
| 73 | + model.partial_fit(feature_transformer.transform( [env.reset()[0]] ), [0]) |
78 | 74 | self.models.append(model)
|
79 | 75 |
|
80 | 76 | def predict(self, s):
|
@@ -103,14 +99,14 @@ def sample_action(self, s, eps):
|
103 | 99 |
|
104 | 100 | # returns a list of states_and_rewards, and the total reward
|
105 | 101 | def play_one(model, env, eps, gamma):
|
106 |
| - observation = env.reset() |
| 102 | + observation = env.reset()[0] |
107 | 103 | done = False
|
108 | 104 | totalreward = 0
|
109 | 105 | iters = 0
|
110 | 106 | while not done and iters < 10000:
|
111 | 107 | action = model.sample_action(observation, eps)
|
112 | 108 | prev_observation = observation
|
113 |
| - observation, reward, done, info = env.step(action) |
| 109 | + observation, reward, done, truncated, info = env.step(action) |
114 | 110 |
|
115 | 111 | # update the model
|
116 | 112 | if done:
|
|
0 commit comments