Merge pull request #2 from jorgelamb/agentView

karpathy · karpathy · commit 0b9315a69c55 · 2015-09-20T12:07:39.000-07:00
add "agent view", "human player" and "stats"
diff --git a/waterworld.html b/waterworld.html
@@ -68,9 +68,11 @@
   
   <script type="application/javascript">
     var canvas, ctx;
+    var agentView = false;
+    var humanControls = false;
     
     // Draw everything
-    function draw() {  
+    function draw() {
       ctx.clearRect(0, 0, canvas.width, canvas.height);
       ctx.lineWidth = 1;
       var agents = w.agents;
@@ -84,7 +86,7 @@
         ctx.lineTo(q.p2.x, q.p2.y);
       }
       ctx.stroke();
-  
+
       // draw agents
       // color agent based on reward it is experiencing at the moment
       var r = 0;
@@ -95,15 +97,15 @@
         
         // draw agents body
         ctx.beginPath();
-        ctx.arc(a.op.x, a.op.y, a.rad, 0, Math.PI*2, true); 
+        ctx.arc(a.op.x, a.op.y, a.rad, 0, Math.PI*2, true);
         ctx.fill();
         ctx.stroke();
         
         // draw agents sight
         for(var ei=0,ne=a.eyes.length;ei<ne;ei++) {
           var e = a.eyes[ei];
           var sr = e.sensed_proximity;
-          if(e.sensed_type === -1 || e.sensed_type === 0) { 
+          if(e.sensed_type === -1 || e.sensed_type === 0) {
             ctx.strokeStyle = "rgb(200,200,200)"; // wall or nothing
           }
           if(e.sensed_type === 1) { ctx.strokeStyle = "rgb(255,150,150)"; } // apples
@@ -118,20 +120,22 @@
       
       // draw items
       ctx.strokeStyle = "rgb(0,0,0)";
-      for(var i=0,n=w.items.length;i<n;i++) {
-        var it = w.items[i];
-        if(it.type === 1) ctx.fillStyle = "rgb(255, 150, 150)";
-        if(it.type === 2) ctx.fillStyle = "rgb(150, 255, 150)";
-        ctx.beginPath();
-        ctx.arc(it.p.x, it.p.y, it.rad, 0, Math.PI*2, true); 
-        ctx.fill();
-        ctx.stroke();
+      if(!agentView) {
+        for(var i=0,n=w.items.length;i<n;i++) {
+          var it = w.items[i];
+          if(it.type === 1) ctx.fillStyle = "rgb(255, 150, 150)";
+          if(it.type === 2) ctx.fillStyle = "rgb(150, 255, 150)";
+          ctx.beginPath();
+          ctx.arc(it.p.x, it.p.y, it.rad, 0, Math.PI*2, true);
+          ctx.fill();
+          ctx.stroke();
+        }
       }
     }
 
     // Tick the world
-    var smooth_reward_history = [];
-    var smooth_reward = null;
+    var smooth_reward_history = []; // [][];
+    var smooth_reward = [];
     var flott = 0;
     function tick() {
 
@@ -143,17 +147,22 @@
         w.tick();
       }
       draw();
+      updateStats();
       
-      var rew = w.agents[0].last_reward;
-      if(smooth_reward == null) { smooth_reward = rew; }
-      smooth_reward = smooth_reward * 0.999 + rew * 0.001;
       flott += 1;
-      if(flott === 50) {
-        // record smooth reward
-        if(smooth_reward_history.length >= nflot) {
-          smooth_reward_history = smooth_reward_history.slice(1);
+      for(i=0; i<w.agents.length; i++) {
+        var rew = w.agents[i].last_reward;
+        if(!smooth_reward[i]) { smooth_reward[i] = 0; }
+        smooth_reward[i] = smooth_reward[i] * 0.999 + rew * 0.001;
+        if(flott === 50) {
+          // record smooth reward
+          if(smooth_reward_history[i].length >= nflot) {
+            smooth_reward_history[i] = smooth_reward_history[i].slice(1);
+          }
+          smooth_reward_history[i].push(smooth_reward[i]);
         }
-        smooth_reward_history.push(smooth_reward);
+      }
+      if(flott === 50) {
         flott = 0;
       }
 
@@ -170,10 +179,14 @@
     var nflot = 1000;
     function initFlot() {
       var container = $("#flotreward");
-      var res = getFlotRewards();
+      var res = getFlotRewards(0);
+      var res1 = getFlotRewards(1);
       series = [{
         data: res,
         lines: {fill: true}
+      }, {
+        data: res1,
+        lines: {fill: true}
       }];
       var plot = $.plot(container, series, {
         grid: {
@@ -199,16 +212,21 @@
         }
       });
       setInterval(function(){
-        series[0].data = getFlotRewards();
+        for(var i=0; i<w.agents.length; i++) {
+          series[i].data = getFlotRewards(i);
+        }
         plot.setData(series);
         plot.draw();
       }, 100);
     }
-    function getFlotRewards() {
+    function getFlotRewards(agentId) {
       // zip rewards into flot data
       var res = [];
-      for(var i=0,n=smooth_reward_history.length;i<n;i++) {
-        res.push([i, smooth_reward_history[i]]);
+      if(agentId >= w.agents.length || !smooth_reward_history[agentId]) {
+        return res;
+      }
+      for(var i=0,n=smooth_reward_history[agentId].length;i<n;i++) {
+        res.push([i, smooth_reward_history[agentId][i]]);
       }
       return res;
     }
@@ -264,6 +282,51 @@
       });
     }
 
+    function toggleAgentView() {
+      agentView = !agentView;
+    }
+
+    var lastKey = null;
+    document.onkeydown = function(e) {
+      var event = window.event ? window.event : e;
+      lastKey = event.keyCode
+      if(lastKey == 37 || lastKey == 38 || lastKey == 39 || lastKey == 40) {
+        enableHuman();
+        e.preventDefault();
+        if(lastKey == 37) {
+          humanAction = 0;
+        }
+        if(lastKey == 39) {
+          humanAction = 1;
+        }
+        if(lastKey == 38) {
+          humanAction = 2;
+        }
+        if(lastKey == 40) {
+          humanAction = 3;
+        }
+      }
+    };
+
+    var humanAction = -1;
+    function enableHuman() {
+      if(!humanControls) {
+        humanControls = true;
+        var a = new Agent();
+        a.forward = function() {
+          this.action = humanAction;
+          humanAction = -1;
+        };
+        a.brain = {
+          learn: function(reward) {
+           // Do nothing;
+          }
+        };
+        w.agents.push(a);
+        smooth_reward_history.push([]);
+      }
+    }
+
     var w; // global world object
     var current_interval_id;
     var skipdraw = false;
@@ -281,6 +344,7 @@
         a.brain = new RL.DQNAgent(env, spec); // give agent a TD brain
         //a.brain = new RL.RecurrentReinforceAgent(env, {});
         w.agents.push(a);
+        smooth_reward_history.push([]);
       }
 
       $( "#slider" ).slider({
@@ -318,6 +382,14 @@
       })();
     }
     
+    function updateStats() {
+      var stats = "<ul>";
+      for(var i=0; i<w.agents.length; i++) {
+        stats += "<li>Player " + (i+1) + ": " + w.agents[i].apples + " apples, " + w.agents[i].poison + " poison</li>";
+      }
+      stats += "</ul>";
+      $("#apples_and_poison").html(stats);
+    }
   </script>
   <style type="text/css">
       canvas { border: 1px solid white; }
@@ -365,10 +437,14 @@ <h1 style="font-size:50px;">REINFORCE<span style="color:#058;">js</span></h1>
 <button class="btn btn-success" onclick="gofast()" style="width:150px;height:50px;margin-bottom:5px;">Go fast</button>
 <button class="btn btn-success" onclick="gonormal()" style="width:150px;height:50px;margin-bottom:5px;">Go normal</button>
 <button class="btn btn-success" onclick="goslow()" style="width:150px;height:50px;margin-bottom:5px;">Go slow</button>
+<button class="btn btn-danger" onclick="toggleAgentView()" style="width:150px;height:50px;margin-bottom:5px;">Toggle Agent View</button>
+<button class="btn btn-danger" onclick="enableHuman()" style="width:150px;height:50px;margin-bottom:5px;">Start playing (use arrow keys)</button>
 
 <canvas id="canvas" width="700" height="500"></canvas>
 </div>
 
+   <div id="apples_and_poison"></div>
+
    <div id="brain_info_div"></div>
 
 <button class="btn btn-primary" onclick="loadAgent()" style="width:200px;height:35px;margin-bottom:5px;margin-right:20px;">Load a Pretrained Agent</button>
@@ -404,4 +480,4 @@ <h1 style="font-size:50px;">REINFORCE<span style="color:#058;">js</span></h1>
 <br><br><br><br>
    </div>
  </body>
-</html>
+</html>
diff --git a/waterworld.js b/waterworld.js
@@ -192,7 +192,7 @@ World.prototype = {
     for(var i=0,n=this.agents.length;i<n;i++) {
       this.agents[i].forward();
     }
-                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
+
     // apply outputs of agents on evironment
     for(var i=0,n=this.agents.length;i<n;i++) {
       var a = this.agents[i];
@@ -239,6 +239,7 @@ World.prototype = {
     // tick all items
     var update_items = false;
     for(var j=0,m=this.agents.length;j<m;j++) {
+      var a = this.agents[j];
       a.digestion_signal = 0; // important - reset this!
     }
     for(var i=0,n=this.items.length;i<n;i++) {
@@ -256,8 +257,14 @@ World.prototype = {
           var rescheck = false;
           if(!rescheck) { 
             // ding! nom nom nom
-            if(it.type === 1) a.digestion_signal += 1.0; // mmm delicious apple
-            if(it.type === 2) a.digestion_signal += -1.0; // ewww poison
+            if(it.type === 1) {
+              a.digestion_signal += 1.0; // mmm delicious apple
+              a.apples++;
+            }
+            if(it.type === 2) {
+              a.digestion_signal += -1.0; // ewww poison
+              a.poison++;
+            }
             it.cleanup_ = true;
             update_items = true;
             break; // break out of loop, item was consumed
@@ -338,6 +345,9 @@ var Agent = function() {
   this.reward_bonus = 0.0;
   this.digestion_signal = 0.0;
   
+  this.apples = 0;
+  this.poison = 0;
+
   // outputs on world
   this.action = 0;