Replies: 1 comment 1 reply
-
Hello, The 4s random termination makes me guess that you haven't recorded a custom reward function for your custom map? This is done by executing python -m tmrl --record-reward and driving to the finish line. Then you can do python -m tmrl --ckeck-environment and drive manually to check that the printed rewards make sense. |
Beta Was this translation helpful? Give feedback.
1 reply
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
I am trying to use tmrl as gym. And i am trying to train my model on a custom map but after 4 secs environment terminates itself randomly. This is the code that i have been using.
""import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tmrl import get_environment
from collections import deque
import random
import time
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
class DQN(nn.Module):
def init(self, input_dim, output_dim):
super(DQN, self).init()
self.fc1 = nn.Linear(input_dim, 64)
self.fc2 = nn.Linear(64, 64)
self.fc3 = nn.Linear(64, output_dim)
class DQNAgent:
def init(self, state_shape, action_space):
self.state_shape = state_shape
self.action_space = action_space
self.memory = deque(maxlen=10000)
self.gamma = 0.95
self.epsilon = 1.0
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.model = DQN(state_shape[0], action_space).to(device)
self.target_model = DQN(state_shape[0], action_space).to(device)
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
self.update_target_model()
def preprocess_obs(obs):
speed = obs[0]
lidar = obs[1].flatten()
prev_actions = obs[2]
return np.concatenate([speed, lidar, prev_actions])
def action_to_env_action(action):
if action == 0:
return np.array([1.0, 0.0, -1.0]) # Full throttle, turn left
elif action == 1:
return np.array([1.0, 0.0, 0.0]) # Full throttle, go straight
else:
return np.array([1.0, 0.0, 1.0]) # Full throttle, turn right
def calculate_speed_reward(speed):
return speed * 0.1 # Scale the reward to be smaller than the main rewards
env = get_environment()
state_shape = (1 + 4*19 + 3,) # speed + flattened LIDAR + previous actions
action_space = 3 # Left, Straight, Right
agent = DQNAgent(state_shape, action_space)
batch_size = 32
n_episodes = 1000
time_limit = 300 # 30 seconds time limit
for e in range(n_episodes):
obs, info = env.reset()
state = preprocess_obs(obs)
total_reward = 0
done = False
start_time = time.time()
step_count = 0
episode_speeds = []
torch.save(agent.model.state_dict(), 'tmrl_dqn_model.pth')
print("Training finished.")""
And here's the config file that i have used . I am not able to find why the environment terminated itself after a fixed time.
""{
"RUN_NAME": "test",
"RESET_TRAINING": true,
"BUFFERS_MAXLEN": 5000000,
"RW_MAX_SAMPLES_PER_EPISODE": 10000,
"CUDA_TRAINING": true,
"CUDA_INFERENCE": false,
"VIRTUAL_GAMEPAD": true,
"DCAC": false,
"LOCALHOST_WORKER": true,
"LOCALHOST_TRAINER": true,
"PUBLIC_IP_SERVER": "0.0.0.0",
"PASSWORD": "==>TMRL@UseASecurePasswordHere!<==",
"TLS": false,
"TLS_HOSTNAME": "default",
"TLS_CREDENTIALS_DIRECTORY": "",
"NB_WORKERS": -1,
"WANDB_PROJECT": "tmrl",
"WANDB_ENTITY": "tmrl",
"WANDB_KEY": "df28d4daa98d2df2557d74caf78e40c68adaf288",
"PORT": 55555,
"LOCAL_PORT_SERVER": 55556,
"LOCAL_PORT_TRAINER": 55557,
"LOCAL_PORT_WORKER": 55558,
"BUFFER_SIZE": 536870912,
"HEADER_SIZE": 12,
"SOCKET_TIMEOUT_CONNECT_TRAINER": 300.0,
"SOCKET_TIMEOUT_ACCEPT_TRAINER": 300.0,
"SOCKET_TIMEOUT_CONNECT_ROLLOUT": 300.0,
"SOCKET_TIMEOUT_ACCEPT_ROLLOUT": 300.0,
"SOCKET_TIMEOUT_COMMUNICATE": 30.0,
"SELECT_TIMEOUT_OUTBOUND": 30.0,
"ACK_TIMEOUT_WORKER_TO_SERVER": 300.0,
"ACK_TIMEOUT_TRAINER_TO_SERVER": 300.0,
"ACK_TIMEOUT_SERVER_TO_WORKER": 300.0,
"ACK_TIMEOUT_SERVER_TO_TRAINER": 7200.0,
"RECV_TIMEOUT_TRAINER_FROM_SERVER": 7200.0,
"RECV_TIMEOUT_WORKER_FROM_SERVER": 600.0,
"WAIT_BEFORE_RECONNECTION": 10.0,
"LOOP_SLEEP_TIME": 1.0,
"MAX_EPOCHS": 10000,
"ROUNDS_PER_EPOCH": 100,
"TRAINING_STEPS_PER_ROUND": 200,
"MAX_TRAINING_STEPS_PER_ENVIRONMENT_STEP": 4.0,
"ENVIRONMENT_STEPS_BEFORE_TRAINING": 1000,
"UPDATE_MODEL_INTERVAL": 200,
"UPDATE_BUFFER_INTERVAL": 200,
"SAVE_MODEL_EVERY": 0,
"MEMORY_SIZE": 1000000,
"BATCH_SIZE": 256,
"ALG": {
"ALGORITHM": "SAC",
"LEARN_ENTROPY_COEF":false,
"LR_ACTOR":0.00001,
"LR_CRITIC":0.00005,
"LR_ENTROPY":0.0003,
"GAMMA":0.995,
"POLYAK":0.995,
"TARGET_ENTROPY":-0.5,
"ALPHA":0.01,
"REDQ_N":10,
"REDQ_M":2,
"REDQ_Q_UPDATES_PER_POLICY_UPDATE":20,
"OPTIMIZER_ACTOR": "adam",
"OPTIMIZER_CRITIC": "adam",
"BETAS_ACTOR": [0.997, 0.997],
"BETAS_CRITIC": [0.997, 0.997],
"L2_ACTOR": 0.0,
"L2_CRITIC": 0.0
},
"ENV": {
"RTGYM_INTERFACE": "TM20LIDAR",
"WINDOW_WIDTH": 958,
"WINDOW_HEIGHT": 488,
"SLEEP_TIME_AT_RESET": 1.5,
"IMG_HIST_LEN": 4,
"RTGYM_CONFIG": {
"time_step_duration": 0.05,
"start_obs_capture": 0.04,
"time_step_timeout_factor": 10.0,
"act_buf_len": 2,
"benchmark": false,
"wait_on_done": true,
"ep_max_length": 1000
},
"REWARD_CONFIG": {
"END_OF_TRACK": 100.0,
"CONSTANT_PENALTY": 0.0,
"CHECK_FORWARD": 500,
"CHECK_BACKWARD": 10,
"FAILURE_COUNTDOWN": 10,
"MIN_STEPS": 70,
"MAX_STRAY": 100.0
}
},
"VERSION": "0.6.0"
}""
Beta Was this translation helpful? Give feedback.
All reactions