Skip to content

Commit 0b9315a

Browse files
committed
Merge pull request #2 from jorgelamb/agentView
add "agent view", "human player" and "stats"
2 parents 6d97536 + b2de4f5 commit 0b9315a

File tree

2 files changed

+117
-31
lines changed

2 files changed

+117
-31
lines changed

waterworld.html

Lines changed: 104 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,11 @@
6868

6969
<script type="application/javascript">
7070
var canvas, ctx;
71+
var agentView = false;
72+
var humanControls = false;
7173

7274
// Draw everything
73-
function draw() {
75+
function draw() {
7476
ctx.clearRect(0, 0, canvas.width, canvas.height);
7577
ctx.lineWidth = 1;
7678
var agents = w.agents;
@@ -84,7 +86,7 @@
8486
ctx.lineTo(q.p2.x, q.p2.y);
8587
}
8688
ctx.stroke();
87-
89+
8890
// draw agents
8991
// color agent based on reward it is experiencing at the moment
9092
var r = 0;
@@ -95,15 +97,15 @@
9597

9698
// draw agents body
9799
ctx.beginPath();
98-
ctx.arc(a.op.x, a.op.y, a.rad, 0, Math.PI*2, true);
100+
ctx.arc(a.op.x, a.op.y, a.rad, 0, Math.PI*2, true);
99101
ctx.fill();
100102
ctx.stroke();
101103

102104
// draw agents sight
103105
for(var ei=0,ne=a.eyes.length;ei<ne;ei++) {
104106
var e = a.eyes[ei];
105107
var sr = e.sensed_proximity;
106-
if(e.sensed_type === -1 || e.sensed_type === 0) {
108+
if(e.sensed_type === -1 || e.sensed_type === 0) {
107109
ctx.strokeStyle = "rgb(200,200,200)"; // wall or nothing
108110
}
109111
if(e.sensed_type === 1) { ctx.strokeStyle = "rgb(255,150,150)"; } // apples
@@ -118,20 +120,22 @@
118120

119121
// draw items
120122
ctx.strokeStyle = "rgb(0,0,0)";
121-
for(var i=0,n=w.items.length;i<n;i++) {
122-
var it = w.items[i];
123-
if(it.type === 1) ctx.fillStyle = "rgb(255, 150, 150)";
124-
if(it.type === 2) ctx.fillStyle = "rgb(150, 255, 150)";
125-
ctx.beginPath();
126-
ctx.arc(it.p.x, it.p.y, it.rad, 0, Math.PI*2, true);
127-
ctx.fill();
128-
ctx.stroke();
123+
if(!agentView) {
124+
for(var i=0,n=w.items.length;i<n;i++) {
125+
var it = w.items[i];
126+
if(it.type === 1) ctx.fillStyle = "rgb(255, 150, 150)";
127+
if(it.type === 2) ctx.fillStyle = "rgb(150, 255, 150)";
128+
ctx.beginPath();
129+
ctx.arc(it.p.x, it.p.y, it.rad, 0, Math.PI*2, true);
130+
ctx.fill();
131+
ctx.stroke();
132+
}
129133
}
130134
}
131135

132136
// Tick the world
133-
var smooth_reward_history = [];
134-
var smooth_reward = null;
137+
var smooth_reward_history = []; // [][];
138+
var smooth_reward = [];
135139
var flott = 0;
136140
function tick() {
137141

@@ -143,17 +147,22 @@
143147
w.tick();
144148
}
145149
draw();
150+
updateStats();
146151

147-
var rew = w.agents[0].last_reward;
148-
if(smooth_reward == null) { smooth_reward = rew; }
149-
smooth_reward = smooth_reward * 0.999 + rew * 0.001;
150152
flott += 1;
151-
if(flott === 50) {
152-
// record smooth reward
153-
if(smooth_reward_history.length >= nflot) {
154-
smooth_reward_history = smooth_reward_history.slice(1);
153+
for(i=0; i<w.agents.length; i++) {
154+
var rew = w.agents[i].last_reward;
155+
if(!smooth_reward[i]) { smooth_reward[i] = 0; }
156+
smooth_reward[i] = smooth_reward[i] * 0.999 + rew * 0.001;
157+
if(flott === 50) {
158+
// record smooth reward
159+
if(smooth_reward_history[i].length >= nflot) {
160+
smooth_reward_history[i] = smooth_reward_history[i].slice(1);
161+
}
162+
smooth_reward_history[i].push(smooth_reward[i]);
155163
}
156-
smooth_reward_history.push(smooth_reward);
164+
}
165+
if(flott === 50) {
157166
flott = 0;
158167
}
159168

@@ -170,10 +179,14 @@
170179
var nflot = 1000;
171180
function initFlot() {
172181
var container = $("#flotreward");
173-
var res = getFlotRewards();
182+
var res = getFlotRewards(0);
183+
var res1 = getFlotRewards(1);
174184
series = [{
175185
data: res,
176186
lines: {fill: true}
187+
}, {
188+
data: res1,
189+
lines: {fill: true}
177190
}];
178191
var plot = $.plot(container, series, {
179192
grid: {
@@ -199,16 +212,21 @@
199212
}
200213
});
201214
setInterval(function(){
202-
series[0].data = getFlotRewards();
215+
for(var i=0; i<w.agents.length; i++) {
216+
series[i].data = getFlotRewards(i);
217+
}
203218
plot.setData(series);
204219
plot.draw();
205220
}, 100);
206221
}
207-
function getFlotRewards() {
222+
function getFlotRewards(agentId) {
208223
// zip rewards into flot data
209224
var res = [];
210-
for(var i=0,n=smooth_reward_history.length;i<n;i++) {
211-
res.push([i, smooth_reward_history[i]]);
225+
if(agentId >= w.agents.length || !smooth_reward_history[agentId]) {
226+
return res;
227+
}
228+
for(var i=0,n=smooth_reward_history[agentId].length;i<n;i++) {
229+
res.push([i, smooth_reward_history[agentId][i]]);
212230
}
213231
return res;
214232
}
@@ -264,6 +282,51 @@
264282
});
265283
}
266284

285+
function toggleAgentView() {
286+
agentView = !agentView;
287+
}
288+
289+
var lastKey = null;
290+
document.onkeydown = function(e) {
291+
var event = window.event ? window.event : e;
292+
lastKey = event.keyCode
293+
if(lastKey == 37 || lastKey == 38 || lastKey == 39 || lastKey == 40) {
294+
enableHuman();
295+
e.preventDefault();
296+
if(lastKey == 37) {
297+
humanAction = 0;
298+
}
299+
if(lastKey == 39) {
300+
humanAction = 1;
301+
}
302+
if(lastKey == 38) {
303+
humanAction = 2;
304+
}
305+
if(lastKey == 40) {
306+
humanAction = 3;
307+
}
308+
}
309+
};
310+
311+
var humanAction = -1;
312+
function enableHuman() {
313+
if(!humanControls) {
314+
humanControls = true;
315+
var a = new Agent();
316+
a.forward = function() {
317+
this.action = humanAction;
318+
humanAction = -1;
319+
};
320+
a.brain = {
321+
learn: function(reward) {
322+
// Do nothing;
323+
}
324+
};
325+
w.agents.push(a);
326+
smooth_reward_history.push([]);
327+
}
328+
}
329+
267330
var w; // global world object
268331
var current_interval_id;
269332
var skipdraw = false;
@@ -281,6 +344,7 @@
281344
a.brain = new RL.DQNAgent(env, spec); // give agent a TD brain
282345
//a.brain = new RL.RecurrentReinforceAgent(env, {});
283346
w.agents.push(a);
347+
smooth_reward_history.push([]);
284348
}
285349

286350
$( "#slider" ).slider({
@@ -318,6 +382,14 @@
318382
})();
319383
}
320384

385+
function updateStats() {
386+
var stats = "<ul>";
387+
for(var i=0; i<w.agents.length; i++) {
388+
stats += "<li>Player " + (i+1) + ": " + w.agents[i].apples + " apples, " + w.agents[i].poison + " poison</li>";
389+
}
390+
stats += "</ul>";
391+
$("#apples_and_poison").html(stats);
392+
}
321393
</script>
322394
<style type="text/css">
323395
canvas { border: 1px solid white; }
@@ -365,10 +437,14 @@ <h1 style="font-size:50px;">REINFORCE<span style="color:#058;">js</span></h1>
365437
<button class="btn btn-success" onclick="gofast()" style="width:150px;height:50px;margin-bottom:5px;">Go fast</button>
366438
<button class="btn btn-success" onclick="gonormal()" style="width:150px;height:50px;margin-bottom:5px;">Go normal</button>
367439
<button class="btn btn-success" onclick="goslow()" style="width:150px;height:50px;margin-bottom:5px;">Go slow</button>
440+
<button class="btn btn-danger" onclick="toggleAgentView()" style="width:150px;height:50px;margin-bottom:5px;">Toggle Agent View</button>
441+
<button class="btn btn-danger" onclick="enableHuman()" style="width:150px;height:50px;margin-bottom:5px;">Start playing (use arrow keys)</button>
368442

369443
<canvas id="canvas" width="700" height="500"></canvas>
370444
</div>
371445

446+
<div id="apples_and_poison"></div>
447+
372448
<div id="brain_info_div"></div>
373449

374450
<button class="btn btn-primary" onclick="loadAgent()" style="width:200px;height:35px;margin-bottom:5px;margin-right:20px;">Load a Pretrained Agent</button>
@@ -404,4 +480,4 @@ <h1 style="font-size:50px;">REINFORCE<span style="color:#058;">js</span></h1>
404480
<br><br><br><br>
405481
</div>
406482
</body>
407-
</html>
483+
</html>

waterworld.js

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ World.prototype = {
192192
for(var i=0,n=this.agents.length;i<n;i++) {
193193
this.agents[i].forward();
194194
}
195-
195+
196196
// apply outputs of agents on evironment
197197
for(var i=0,n=this.agents.length;i<n;i++) {
198198
var a = this.agents[i];
@@ -239,6 +239,7 @@ World.prototype = {
239239
// tick all items
240240
var update_items = false;
241241
for(var j=0,m=this.agents.length;j<m;j++) {
242+
var a = this.agents[j];
242243
a.digestion_signal = 0; // important - reset this!
243244
}
244245
for(var i=0,n=this.items.length;i<n;i++) {
@@ -256,8 +257,14 @@ World.prototype = {
256257
var rescheck = false;
257258
if(!rescheck) {
258259
// ding! nom nom nom
259-
if(it.type === 1) a.digestion_signal += 1.0; // mmm delicious apple
260-
if(it.type === 2) a.digestion_signal += -1.0; // ewww poison
260+
if(it.type === 1) {
261+
a.digestion_signal += 1.0; // mmm delicious apple
262+
a.apples++;
263+
}
264+
if(it.type === 2) {
265+
a.digestion_signal += -1.0; // ewww poison
266+
a.poison++;
267+
}
261268
it.cleanup_ = true;
262269
update_items = true;
263270
break; // break out of loop, item was consumed
@@ -338,6 +345,9 @@ var Agent = function() {
338345
this.reward_bonus = 0.0;
339346
this.digestion_signal = 0.0;
340347

348+
this.apples = 0;
349+
this.poison = 0;
350+
341351
// outputs on world
342352
this.action = 0;
343353

0 commit comments

Comments
 (0)