6868
6969 < script type ="application/javascript ">
7070 var canvas , ctx ;
71+ var agentView = false ;
72+ var humanControls = false ;
7173
7274 // Draw everything
73- function draw ( ) {
75+ function draw ( ) {
7476 ctx . clearRect ( 0 , 0 , canvas . width , canvas . height ) ;
7577 ctx . lineWidth = 1 ;
7678 var agents = w . agents ;
8486 ctx . lineTo ( q . p2 . x , q . p2 . y ) ;
8587 }
8688 ctx . stroke ( ) ;
87-
89+
8890 // draw agents
8991 // color agent based on reward it is experiencing at the moment
9092 var r = 0 ;
9597
9698 // draw agents body
9799 ctx . beginPath ( ) ;
98- ctx . arc ( a . op . x , a . op . y , a . rad , 0 , Math . PI * 2 , true ) ;
100+ ctx . arc ( a . op . x , a . op . y , a . rad , 0 , Math . PI * 2 , true ) ;
99101 ctx . fill ( ) ;
100102 ctx . stroke ( ) ;
101103
102104 // draw agents sight
103105 for ( var ei = 0 , ne = a . eyes . length ; ei < ne ; ei ++ ) {
104106 var e = a . eyes [ ei ] ;
105107 var sr = e . sensed_proximity ;
106- if ( e . sensed_type === - 1 || e . sensed_type === 0 ) {
108+ if ( e . sensed_type === - 1 || e . sensed_type === 0 ) {
107109 ctx . strokeStyle = "rgb(200,200,200)" ; // wall or nothing
108110 }
109111 if ( e . sensed_type === 1 ) { ctx . strokeStyle = "rgb(255,150,150)" ; } // apples
118120
119121 // draw items
120122 ctx . strokeStyle = "rgb(0,0,0)" ;
121- for ( var i = 0 , n = w . items . length ; i < n ; i ++ ) {
122- var it = w . items [ i ] ;
123- if ( it . type === 1 ) ctx . fillStyle = "rgb(255, 150, 150)" ;
124- if ( it . type === 2 ) ctx . fillStyle = "rgb(150, 255, 150)" ;
125- ctx . beginPath ( ) ;
126- ctx . arc ( it . p . x , it . p . y , it . rad , 0 , Math . PI * 2 , true ) ;
127- ctx . fill ( ) ;
128- ctx . stroke ( ) ;
123+ if ( ! agentView ) {
124+ for ( var i = 0 , n = w . items . length ; i < n ; i ++ ) {
125+ var it = w . items [ i ] ;
126+ if ( it . type === 1 ) ctx . fillStyle = "rgb(255, 150, 150)" ;
127+ if ( it . type === 2 ) ctx . fillStyle = "rgb(150, 255, 150)" ;
128+ ctx . beginPath ( ) ;
129+ ctx . arc ( it . p . x , it . p . y , it . rad , 0 , Math . PI * 2 , true ) ;
130+ ctx . fill ( ) ;
131+ ctx . stroke ( ) ;
132+ }
129133 }
130134 }
131135
132136 // Tick the world
133- var smooth_reward_history = [ ] ;
134- var smooth_reward = null ;
137+ var smooth_reward_history = [ ] ; // [][];
138+ var smooth_reward = [ ] ;
135139 var flott = 0 ;
136140 function tick ( ) {
137141
143147 w . tick ( ) ;
144148 }
145149 draw ( ) ;
150+ updateStats ( ) ;
146151
147- var rew = w . agents [ 0 ] . last_reward ;
148- if ( smooth_reward == null ) { smooth_reward = rew ; }
149- smooth_reward = smooth_reward * 0.999 + rew * 0.001 ;
150152 flott += 1 ;
151- if ( flott === 50 ) {
152- // record smooth reward
153- if ( smooth_reward_history . length >= nflot ) {
154- smooth_reward_history = smooth_reward_history . slice ( 1 ) ;
153+ for ( i = 0 ; i < w . agents . length ; i ++ ) {
154+ var rew = w . agents [ i ] . last_reward ;
155+ if ( ! smooth_reward [ i ] ) { smooth_reward [ i ] = 0 ; }
156+ smooth_reward [ i ] = smooth_reward [ i ] * 0.999 + rew * 0.001 ;
157+ if ( flott === 50 ) {
158+ // record smooth reward
159+ if ( smooth_reward_history [ i ] . length >= nflot ) {
160+ smooth_reward_history [ i ] = smooth_reward_history [ i ] . slice ( 1 ) ;
161+ }
162+ smooth_reward_history [ i ] . push ( smooth_reward [ i ] ) ;
155163 }
156- smooth_reward_history . push ( smooth_reward ) ;
164+ }
165+ if ( flott === 50 ) {
157166 flott = 0 ;
158167 }
159168
170179 var nflot = 1000 ;
171180 function initFlot ( ) {
172181 var container = $ ( "#flotreward" ) ;
173- var res = getFlotRewards ( ) ;
182+ var res = getFlotRewards ( 0 ) ;
183+ var res1 = getFlotRewards ( 1 ) ;
174184 series = [ {
175185 data : res ,
176186 lines : { fill : true }
187+ } , {
188+ data : res1 ,
189+ lines : { fill : true }
177190 } ] ;
178191 var plot = $ . plot ( container , series , {
179192 grid : {
199212 }
200213 } ) ;
201214 setInterval ( function ( ) {
202- series [ 0 ] . data = getFlotRewards ( ) ;
215+ for ( var i = 0 ; i < w . agents . length ; i ++ ) {
216+ series [ i ] . data = getFlotRewards ( i ) ;
217+ }
203218 plot . setData ( series ) ;
204219 plot . draw ( ) ;
205220 } , 100 ) ;
206221 }
207- function getFlotRewards ( ) {
222+ function getFlotRewards ( agentId ) {
208223 // zip rewards into flot data
209224 var res = [ ] ;
210- for ( var i = 0 , n = smooth_reward_history . length ; i < n ; i ++ ) {
211- res . push ( [ i , smooth_reward_history [ i ] ] ) ;
225+ if ( agentId >= w . agents . length || ! smooth_reward_history [ agentId ] ) {
226+ return res ;
227+ }
228+ for ( var i = 0 , n = smooth_reward_history [ agentId ] . length ; i < n ; i ++ ) {
229+ res . push ( [ i , smooth_reward_history [ agentId ] [ i ] ] ) ;
212230 }
213231 return res ;
214232 }
264282 } ) ;
265283 }
266284
285+ function toggleAgentView ( ) {
286+ agentView = ! agentView ;
287+ }
288+
289+ var lastKey = null ;
290+ document . onkeydown = function ( e ) {
291+ var event = window . event ? window . event : e ;
292+ lastKey = event . keyCode
293+ if ( lastKey == 37 || lastKey == 38 || lastKey == 39 || lastKey == 40 ) {
294+ enableHuman ( ) ;
295+ e . preventDefault ( ) ;
296+ if ( lastKey == 37 ) {
297+ humanAction = 0 ;
298+ }
299+ if ( lastKey == 39 ) {
300+ humanAction = 1 ;
301+ }
302+ if ( lastKey == 38 ) {
303+ humanAction = 2 ;
304+ }
305+ if ( lastKey == 40 ) {
306+ humanAction = 3 ;
307+ }
308+ }
309+ } ;
310+
311+ var humanAction = - 1 ;
312+ function enableHuman ( ) {
313+ if ( ! humanControls ) {
314+ humanControls = true ;
315+ var a = new Agent ( ) ;
316+ a . forward = function ( ) {
317+ this . action = humanAction ;
318+ humanAction = - 1 ;
319+ } ;
320+ a . brain = {
321+ learn : function ( reward ) {
322+ // Do nothing;
323+ }
324+ } ;
325+ w . agents . push ( a ) ;
326+ smooth_reward_history . push ( [ ] ) ;
327+ }
328+ }
329+
267330 var w ; // global world object
268331 var current_interval_id ;
269332 var skipdraw = false ;
281344 a . brain = new RL . DQNAgent ( env , spec ) ; // give agent a TD brain
282345 //a.brain = new RL.RecurrentReinforceAgent(env, {});
283346 w . agents . push ( a ) ;
347+ smooth_reward_history . push ( [ ] ) ;
284348 }
285349
286350 $ ( "#slider" ) . slider ( {
318382 } ) ( ) ;
319383 }
320384
385+ function updateStats ( ) {
386+ var stats = "<ul>" ;
387+ for ( var i = 0 ; i < w . agents . length ; i ++ ) {
388+ stats += "<li>Player " + ( i + 1 ) + ": " + w . agents [ i ] . apples + " apples, " + w . agents [ i ] . poison + " poison</li>" ;
389+ }
390+ stats += "</ul>" ;
391+ $ ( "#apples_and_poison" ) . html ( stats ) ;
392+ }
321393 </ script >
322394 < style type ="text/css ">
323395 canvas { border : 1px solid white; }
@@ -365,10 +437,14 @@ <h1 style="font-size:50px;">REINFORCE<span style="color:#058;">js</span></h1>
365437< button class ="btn btn-success " onclick ="gofast() " style ="width:150px;height:50px;margin-bottom:5px; "> Go fast</ button >
366438< button class ="btn btn-success " onclick ="gonormal() " style ="width:150px;height:50px;margin-bottom:5px; "> Go normal</ button >
367439< button class ="btn btn-success " onclick ="goslow() " style ="width:150px;height:50px;margin-bottom:5px; "> Go slow</ button >
440+ < button class ="btn btn-danger " onclick ="toggleAgentView() " style ="width:150px;height:50px;margin-bottom:5px; "> Toggle Agent View</ button >
441+ < button class ="btn btn-danger " onclick ="enableHuman() " style ="width:150px;height:50px;margin-bottom:5px; "> Start playing (use arrow keys)</ button >
368442
369443< canvas id ="canvas " width ="700 " height ="500 "> </ canvas >
370444</ div >
371445
446+ < div id ="apples_and_poison "> </ div >
447+
372448 < div id ="brain_info_div "> </ div >
373449
374450< button class ="btn btn-primary " onclick ="loadAgent() " style ="width:200px;height:35px;margin-bottom:5px;margin-right:20px; "> Load a Pretrained Agent</ button >
@@ -404,4 +480,4 @@ <h1 style="font-size:50px;">REINFORCE<span style="color:#058;">js</span></h1>
404480< br > < br > < br > < br >
405481 </ div >
406482 </ body >
407- </ html >
483+ </ html >
0 commit comments