re-add unkept changes

lazyprogrammer · lazyprogrammer · commit 963f7b98db46 · 2017-08-24T19:11:44.000-04:00
diff --git a/rl/comparing_epsilons.py b/rl/comparing_epsilons.py
@@ -1,5 +1,10 @@
 # https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
 # https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
 import numpy as np
 import matplotlib.pyplot as plt
 
@@ -23,7 +28,7 @@ def run_experiment(m1, m2, m3, eps, N):
 
   data = np.empty(N)
   
-  for i in xrange(N):
+  for i in range(N):
     # epsilon greedy
     p = np.random.random()
     if p < eps:
@@ -46,7 +51,7 @@ def run_experiment(m1, m2, m3, eps, N):
   plt.show()
 
   for b in bandits:
-    print b.mean
+    print(b.mean)
 
   return cumulative_average
 
diff --git a/rl/comparing_explore_exploit_methods.py b/rl/comparing_explore_exploit_methods.py
@@ -1,39 +1,44 @@
 # https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
 # https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
 import numpy as np
 import matplotlib.pyplot as plt
 from comparing_epsilons import Bandit
 from optimistic_initial_values import run_experiment as run_experiment_oiv
 from ucb1 import run_experiment as run_experiment_ucb
 
 class BayesianBandit:
-  def __init__(self, m):
-    self.m = m
+  def __init__(self, true_mean):
+    self.true_mean = true_mean
     # parameters for mu - prior is N(0,1)
-    self.m0 = 0
-    self.lambda0 = 1
+    self.predicted_mean = 0
+    self.lambda_ = 1
     self.sum_x = 0 # for convenience
     self.tau = 1
 
   def pull(self):
-    return np.random.randn() + self.m
+    return np.random.randn() + self.true_mean
 
   def sample(self):
-    return np.random.randn() / np.sqrt(self.lambda0) + self.m0
+    return np.random.randn() / np.sqrt(self.lambda_) + self.predicted_mean
 
   def update(self, x):
     # assume tau is 1
-    self.lambda0 += 1
+    self.lambda_ += 1
     self.sum_x += x
-    self.m0 = self.tau*self.sum_x / self.lambda0
+    self.predicted_mean = self.tau*self.sum_x / self.lambda_
 
 
 def run_experiment_decaying_epsilon(m1, m2, m3, N):
   bandits = [Bandit(m1), Bandit(m2), Bandit(m3)]
 
   data = np.empty(N)
   
-  for i in xrange(N):
+  for i in range(N):
     # epsilon greedy
     p = np.random.random()
     if p < 1.0/(i+1):
@@ -56,7 +61,7 @@ def run_experiment_decaying_epsilon(m1, m2, m3, N):
   plt.show()
 
   for b in bandits:
-    print b.mean
+    print(b.mean)
 
   return cumulative_average
 
@@ -66,7 +71,7 @@ def run_experiment(m1, m2, m3, N):
 
   data = np.empty(N)
   
-  for i in xrange(N):
+  for i in range(N):
     # optimistic initial values
     j = np.argmax([b.sample() for b in bandits])
     x = bandits[j].pull()
diff --git a/rl/optimistic_initial_values.py b/rl/optimistic_initial_values.py
@@ -1,5 +1,10 @@
 # https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
 # https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
 import numpy as np
 import matplotlib.pyplot as plt
 from comparing_epsilons import run_experiment as run_experiment_eps
@@ -24,7 +29,7 @@ def run_experiment(m1, m2, m3, N, upper_limit=10):
 
   data = np.empty(N)
   
-  for i in xrange(N):
+  for i in range(N):
     # optimistic initial values
     j = np.argmax([b.mean for b in bandits])
     x = bandits[j].pull()
@@ -43,7 +48,7 @@ def run_experiment(m1, m2, m3, N, upper_limit=10):
   plt.show()
 
   for b in bandits:
-    print b.mean
+    print(b.mean)
 
   return cumulative_average