Skip to content

Commit 963f7b9

Browse files
re-add unkept changes
1 parent 3009035 commit 963f7b9

File tree

3 files changed

+30
-15
lines changed

3 files changed

+30
-15
lines changed

rl/comparing_epsilons.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
22
# https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
3+
from __future__ import print_function, division
4+
from builtins import range
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
38
import numpy as np
49
import matplotlib.pyplot as plt
510

@@ -23,7 +28,7 @@ def run_experiment(m1, m2, m3, eps, N):
2328

2429
data = np.empty(N)
2530

26-
for i in xrange(N):
31+
for i in range(N):
2732
# epsilon greedy
2833
p = np.random.random()
2934
if p < eps:
@@ -46,7 +51,7 @@ def run_experiment(m1, m2, m3, eps, N):
4651
plt.show()
4752

4853
for b in bandits:
49-
print b.mean
54+
print(b.mean)
5055

5156
return cumulative_average
5257

rl/comparing_explore_exploit_methods.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,44 @@
11
# https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
22
# https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
3+
from __future__ import print_function, division
4+
from builtins import range
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
38
import numpy as np
49
import matplotlib.pyplot as plt
510
from comparing_epsilons import Bandit
611
from optimistic_initial_values import run_experiment as run_experiment_oiv
712
from ucb1 import run_experiment as run_experiment_ucb
813

914
class BayesianBandit:
10-
def __init__(self, m):
11-
self.m = m
15+
def __init__(self, true_mean):
16+
self.true_mean = true_mean
1217
# parameters for mu - prior is N(0,1)
13-
self.m0 = 0
14-
self.lambda0 = 1
18+
self.predicted_mean = 0
19+
self.lambda_ = 1
1520
self.sum_x = 0 # for convenience
1621
self.tau = 1
1722

1823
def pull(self):
19-
return np.random.randn() + self.m
24+
return np.random.randn() + self.true_mean
2025

2126
def sample(self):
22-
return np.random.randn() / np.sqrt(self.lambda0) + self.m0
27+
return np.random.randn() / np.sqrt(self.lambda_) + self.predicted_mean
2328

2429
def update(self, x):
2530
# assume tau is 1
26-
self.lambda0 += 1
31+
self.lambda_ += 1
2732
self.sum_x += x
28-
self.m0 = self.tau*self.sum_x / self.lambda0
33+
self.predicted_mean = self.tau*self.sum_x / self.lambda_
2934

3035

3136
def run_experiment_decaying_epsilon(m1, m2, m3, N):
3237
bandits = [Bandit(m1), Bandit(m2), Bandit(m3)]
3338

3439
data = np.empty(N)
3540

36-
for i in xrange(N):
41+
for i in range(N):
3742
# epsilon greedy
3843
p = np.random.random()
3944
if p < 1.0/(i+1):
@@ -56,7 +61,7 @@ def run_experiment_decaying_epsilon(m1, m2, m3, N):
5661
plt.show()
5762

5863
for b in bandits:
59-
print b.mean
64+
print(b.mean)
6065

6166
return cumulative_average
6267

@@ -66,7 +71,7 @@ def run_experiment(m1, m2, m3, N):
6671

6772
data = np.empty(N)
6873

69-
for i in xrange(N):
74+
for i in range(N):
7075
# optimistic initial values
7176
j = np.argmax([b.sample() for b in bandits])
7277
x = bandits[j].pull()

rl/optimistic_initial_values.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
22
# https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
3+
from __future__ import print_function, division
4+
from builtins import range
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
38
import numpy as np
49
import matplotlib.pyplot as plt
510
from comparing_epsilons import run_experiment as run_experiment_eps
@@ -24,7 +29,7 @@ def run_experiment(m1, m2, m3, N, upper_limit=10):
2429

2530
data = np.empty(N)
2631

27-
for i in xrange(N):
32+
for i in range(N):
2833
# optimistic initial values
2934
j = np.argmax([b.mean for b in bandits])
3035
x = bandits[j].pull()
@@ -43,7 +48,7 @@ def run_experiment(m1, m2, m3, N, upper_limit=10):
4348
plt.show()
4449

4550
for b in bandits:
46-
print b.mean
51+
print(b.mean)
4752

4853
return cumulative_average
4954

0 commit comments

Comments
 (0)