import os
#os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
from typing import Dict
from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet152
from tqdm import tqdm
from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path
c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\dataset\select_agents.py:31: UserWarning: Windows detected. BLOSC_NOLOCK has not been set as it causes memory leaks on Windows.However, writing the mask with this config may be inconsistent. warnings.warn(
torch.cuda.is_available()
True
# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = "H:\prediction-dataset"
dm = LocalDataManager(None)
# get config
cfg = load_config_data("./agent_motion_config.yaml")
print(cfg)
{'format_version': 4, 'model_params': {'model_architecture': 'resnet50', 'history_num_frames': 50, 'future_num_frames': 50, 'step_time': 0.1, 'render_ego_history': True}, 'raster_params': {'raster_size': [224, 224], 'pixel_size': [0.5, 0.5], 'ego_center': [0.25, 0.5], 'map_type': 'py_semantic', 'satellite_map_key': 'aerial_map/aerial_map.png', 'semantic_map_key': 'semantic_map/semantic_map.pb', 'dataset_meta_key': 'meta.json', 'filter_agents_threshold': 0.5, 'disable_traffic_light_faces': False, 'set_origin_to_bottom': True}, 'train_data_loader': {'key': 'scenes/sample.zarr', 'batch_size': 4, 'shuffle': True, 'num_workers': 0}, 'val_data_loader': {'key': 'scenes/validate.zarr', 'batch_size': 2, 'shuffle': False, 'num_workers': 0}, 'train_params': {'checkpoint_every_n_steps': 1000000, 'max_num_steps': 100000, 'eval_every_n_steps': 10000}}
def build_model(cfg: Dict) -> torch.nn.Module:
# load pre-trained Conv2D model
model = resnet152(pretrained=True)
# change input channels number to match the rasterizer's output
num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
num_in_channels = 3 + num_history_channels
model.conv1 = nn.Conv2d(
num_in_channels,
model.conv1.out_channels,
kernel_size=model.conv1.kernel_size,
stride=model.conv1.stride,
padding=model.conv1.padding,
bias=False,
)
# change output size to (X, Y) * number of future states
num_targets = 2 * cfg["model_params"]["future_num_frames"]
model.fc = nn.Linear(in_features=2048, out_features=num_targets)
return model
def forward(data, model, device, criterion):
inputs = data["image"].to(device)
target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
targets = data["target_positions"].to(device)
# Forward pass
outputs = model(inputs).reshape(targets.shape)
loss = criterion(outputs, targets)
# not all the output steps are valid, but we can filter them out from the loss using availabilities
loss = loss * target_availabilities
loss = loss.mean()
return loss, outputs
# ===== INIT DATASET
train_cfg = cfg["train_data_loader"]
rasterizer = build_rasterizer(cfg, dm)
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"],
num_workers=train_cfg["num_workers"])
print(train_dataset)
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+ | Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency | +------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+ | 100 | 24838 | 1893736 | 316008 | 0.69 | 248.38 | 76.24 | 24.83 | 10.00 | +------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
# ==== INIT MODEL
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model = build_model(cfg).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss(reduction="none")
cuda:0
c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet152_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet152_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg)
# ==== TRAIN LOOP
tr_it = iter(train_dataloader)
progress_bar = tqdm(range(cfg["train_params"]["max_num_steps"]))
losses_train = []
losses_avg = []
for _ in progress_bar:
try:
data = next(tr_it)
except StopIteration:
tr_it = iter(train_dataloader)
data = next(tr_it)
model.train()
torch.set_grad_enabled(True)
loss, _ = forward(data, model, device, criterion)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses_train.append(loss.item())
losses_avg.append(np.mean(losses_train))
progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}")
loss: 2.2113845348358154 loss(avg): 5.946497581898172: 25%|██▍ | 24607/100000 [3:05:37<9:28:43, 2.21it/s]
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[9], line 21 18 loss.backward() 19 optimizer.step() ---> 21 losses_train.append(loss.item()) 22 losses_avg.append(np.mean(losses_train)) 23 progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}") KeyboardInterrupt:
# num_epochs = 1
# training_losses = []
# for epoch in range(1, num_epochs+1):
# epoch_loss = 0
# for batch in train_dataloader:
# model.train()
# torch.set_grad_enabled(True)
# loss, _ = forward(batch, model, device, criterion)
# optimizer.zero_grad()
# loss.backward()
# optimizer.step()
# epoch_loss += loss.item()
# avg_loss = epoch_loss / len(train_dataloader)
# training_losses.append(avg_loss)
# if(epoch % 10 == 0 or epoch == 1):
# print(f"Epoch {epoch}, Average Loss: {avg_loss}")
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[10], line 5 3 for epoch in range(1, num_epochs+1): 4 epoch_loss = 0 ----> 5 for batch in train_dataloader: 6 model.train() 7 torch.set_grad_enabled(True) File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torch\utils\data\dataloader.py:628, in _BaseDataLoaderIter.__next__(self) 625 if self._sampler_iter is None: 626 # TODO(https://github.com/pytorch/pytorch/issues/76750) 627 self._reset() # type: ignore[call-arg] --> 628 data = self._next_data() 629 self._num_yielded += 1 630 if self._dataset_kind == _DatasetKind.Iterable and \ 631 self._IterableDataset_len_called is not None and \ 632 self._num_yielded > self._IterableDataset_len_called: File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torch\utils\data\dataloader.py:671, in _SingleProcessDataLoaderIter._next_data(self) 669 def _next_data(self): 670 index = self._next_index() # may raise StopIteration --> 671 data = self._dataset_fetcher.fetch(index) # may raise StopIteration 672 if self._pin_memory: 673 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device) File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torch\utils\data\_utils\fetch.py:58, in _MapDatasetFetcher.fetch(self, possibly_batched_index) 56 data = self.dataset.__getitems__(possibly_batched_index) 57 else: ---> 58 data = [self.dataset[idx] for idx in possibly_batched_index] 59 else: 60 data = self.dataset[possibly_batched_index] File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torch\utils\data\_utils\fetch.py:58, in <listcomp>(.0) 56 data = self.dataset.__getitems__(possibly_batched_index) 57 else: ---> 58 data = [self.dataset[idx] for idx in possibly_batched_index] 59 else: 60 data = self.dataset[possibly_batched_index] File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\dataset\agent.py:122, in AgentDataset.__getitem__(self, index) 120 else: 121 state_index = frame_index - self.cumulative_sizes[scene_index - 1] --> 122 return self.get_frame(scene_index, state_index, track_id=track_id) File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\dataset\ego.py:191, in EgoDataset.get_frame(self, scene_index, state_index, track_id) 190 def get_frame(self, scene_index: int, state_index: int, track_id: Optional[int] = None) -> dict: --> 191 data = super().get_frame(scene_index, state_index, track_id=track_id) 192 # TODO (@lberg): this should not be here but in the rasterizer 193 data["image"] = data["image"].transpose(2, 0, 1) # 0,1,C -> C,0,1 File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\dataset\ego.py:70, in BaseEgoDataset.get_frame(self, scene_index, state_index, track_id) 67 if self.cfg["raster_params"]["disable_traffic_light_faces"]: 68 tl_faces = np.empty(0, dtype=self.dataset.tl_faces.dtype) # completely disable traffic light faces ---> 70 data = self.sample_function(state_index, frames, self.dataset.agents, tl_faces, track_id) 72 # add information only, so that all data keys are always preserved 73 data["scene_index"] = scene_index File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\sampling\agent_sampling.py:242, in generate_agent_sample(state_index, frames, agents, tl_faces, selected_track_id, render_context, history_num_frames, future_num_frames, step_time, filter_agents_threshold, rasterizer, perturbation) 239 agent_extent_m = agent["extent"] 240 selected_agent = agent --> 242 input_im = rasterizer.rasterize(history_frames, history_agents, history_tl_faces, selected_agent) 244 world_from_agent = compute_agent_pose(agent_centroid_m, agent_yaw_rad) 245 agent_from_world = np.linalg.inv(world_from_agent) File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\rasterization\sem_box_rasterizer.py:42, in SemBoxRasterizer.rasterize(self, history_frames, history_agents, history_tl_faces, agent) 35 def rasterize( 36 self, 37 history_frames: np.ndarray, (...) 40 agent: Optional[np.ndarray] = None, 41 ) -> np.ndarray: ---> 42 im_out_box = self.box_rast.rasterize(history_frames, history_agents, history_tl_faces, agent) 43 im_out_sem = self.sem_rast.rasterize(history_frames, history_agents, history_tl_faces, agent) 44 return np.concatenate([im_out_box, im_out_sem], -1) File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\rasterization\box_rasterizer.py:156, in BoxRasterizer.rasterize(self, history_frames, history_agents, history_tl_faces, agent) 153 # combine such that the image consists of [agent_t, agent_t-1, agent_t-2, ego_t, ego_t-1, ego_t-2] 154 out_im = np.concatenate((agents_images, ego_images), -1) --> 156 return out_im.astype(np.float32) / 255 KeyboardInterrupt:
#save model
torch.save(model.state_dict(), "./agent_motion_model1.pth")
#load model
#model.load_state_dict(torch.load("./agent_motion_model.pth"))
<All keys matched successfully>
plt.plot(np.arange(len(losses_train)), losses_train, label="train loss")
plt.legend()
plt.show()
# ===== GENERATE AND LOAD CHOPPED DATASET
num_frames_to_chop = 100
eval_cfg = cfg["val_data_loader"]
eval_base_path = create_chopped_dataset(dm.require(eval_cfg["key"]), cfg["raster_params"]["filter_agents_threshold"],
num_frames_to_chop, cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS)
copying: 100%|██████████| 16220/16220 [10:52<00:00, 24.87it/s] c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\evaluation\extract_ground_truth.py:52: RuntimeWarning: you're running with a custom agents_mask dataset = AgentDataset(cfg=cfg, zarr_dataset=zarr_dataset, rasterizer=rasterizer, agents_mask=agents_mask) extracting GT: 100%|██████████| 94694/94694 [07:48<00:00, 201.92it/s]
eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
eval_mask_path = str(Path(eval_base_path) / "mask.npz")
eval_gt_path = str(Path(eval_base_path) / "gt.csv")
eval_zarr = ChunkedDataset(eval_zarr_path).open()
eval_mask = np.load(eval_mask_path)["arr_0"]
# ===== INIT DATASET AND LOAD MASK
eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=4,
num_workers=eval_cfg["num_workers"])
print(eval_dataset)
C:\Users\peter\AppData\Local\Temp\ipykernel_32160\1453756949.py:8: RuntimeWarning: you're running with a custom agents_mask eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+ | Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency | +------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+ | 16220 | 1622000 | 125423254 | 11733321 | 45.06 | 100.00 | 77.33 | 10.00 | 10.00 | +------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
# ==== EVAL LOOP
model.eval()
torch.set_grad_enabled(False)
# store information for evaluation
future_coords_offsets_pd = []
timestamps = []
agent_ids = []
progress_bar = tqdm(eval_dataloader)
for data in progress_bar:
_, ouputs = forward(data, model, device, criterion)
# convert agent coordinates into world offsets
agents_coords = ouputs.cpu().numpy()
world_from_agents = data["world_from_agent"].numpy()
centroids = data["centroid"].numpy()
coords_offset = transform_points(agents_coords, world_from_agents) - centroids[:, None, :2]
future_coords_offsets_pd.append(np.stack(coords_offset))
timestamps.append(data["timestamp"].numpy().copy())
agent_ids.append(data["track_id"].numpy().copy())
100%|██████████| 23674/23674 [2:25:50<00:00, 2.71it/s]
pred_path = f"{gettempdir()}/pred.csv"
write_pred_csv(pred_path,
timestamps=np.concatenate(timestamps),
track_ids=np.concatenate(agent_ids),
coords=np.concatenate(future_coords_offsets_pd),
)
metrics = compute_metrics_csv(eval_gt_path, pred_path, [neg_multi_log_likelihood, time_displace])
for metric_name, metric_mean in metrics.items():
print(metric_name, metric_mean)
neg_multi_log_likelihood 175.45676550093557 time_displace [0.06773729 0.11469305 0.17395884 0.19997748 0.20411925 0.22879742 0.26271531 0.29943946 0.33569181 0.37513012 0.42009098 0.46598532 0.50938015 0.55164097 0.593019 0.63808795 0.67606026 0.72140343 0.76299364 0.80569364 0.84873197 0.88992806 0.93171807 0.97412277 1.01663126 1.05784709 1.1016372 1.1421511 1.18651499 1.22979849 1.2737417 1.32533333 1.37266788 1.41942745 1.46950437 1.5177337 1.56511116 1.61363376 1.66083676 1.71677312 1.76480592 1.81612993 1.86786494 1.92937666 1.9854049 2.03929472 2.08680542 2.14062817 2.19937759 2.25933752]
neg 1epoch 0hist = 7634
100 steps 0 hist = 4917
100 steps 10 hist = 3819
100 steps 50 hist = 933
100 steps 50 hist resnet151 = 1292
100 steps 50 hist resnet151 = 1575
1/14th epoch on validate 50 hist restnet151 = 175
import matplotlib.lines as mlines
import matplotlib.patches as patches
model.eval()
torch.set_grad_enabled(False)
# build a dict to retrieve future trajectories from GT
gt_rows = {}
for row in read_gt_csv(eval_gt_path):
gt_rows[row["track_id"] + row["timestamp"]] = row["coord"]
eval_ego_dataset = EgoDataset(cfg, eval_dataset.dataset, rasterizer)
# for frame_number in range(99, len(eval_zarr.frames), 100): # start from last frame of scene_0 and increase by 100
for frame_number in range(99, 10000, 100): # start from last frame of scene_0 and increase by 100
agent_indices = eval_dataset.get_frame_indices(frame_number)
if not len(agent_indices):
continue
# get AV point-of-view frame
data_ego = eval_ego_dataset[frame_number]
im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0))
center = np.asarray(cfg["raster_params"]["ego_center"]) * cfg["raster_params"]["raster_size"]
predicted_positions = []
target_positions = []
for v_index in agent_indices:
data_agent = eval_dataset[v_index]
out_net = model(torch.from_numpy(data_agent["image"]).unsqueeze(0).to(device))
out_pos = out_net[0].reshape(-1, 2).detach().cpu().numpy()
# store absolute world coordinates
predicted_positions.append(transform_points(out_pos, data_agent["world_from_agent"]))
# retrieve target positions from the GT and store as absolute coordinates
track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]
target_positions.append(gt_rows[str(track_id) + str(timestamp)] + data_agent["centroid"][:2])
# convert coordinates to AV point-of-view so we can draw them
predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["raster_from_world"])
target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])
PREDICTED_POINTS_COLOR = (0, 255, 255)
TARGET_POINTS_COLOR = (255, 0, 255)
draw_trajectory(im_ego, target_positions, TARGET_POINTS_COLOR)
draw_trajectory(im_ego, predicted_positions, PREDICTED_POINTS_COLOR)
TARGET_POINTS_COLOR = (255/255, 0/255, 255/255)
PREDICTED_POINTS_COLOR = (255/255, 165/255, 0/255)
target_line = mlines.Line2D([], [], color=TARGET_POINTS_COLOR, markersize=15, label='Target Trajectory')
predicted_line = mlines.Line2D([], [], color="aqua", markersize=15, label='Predicted Trajectory')
agent_vehicle_patch = patches.Rectangle((0, 0), 1, 0.5, facecolor='blue', edgecolor='blue', label='Agent Vehicles')
plt.legend(handles=[target_line, predicted_line, agent_vehicle_patch], loc='upper right')
plt.imshow(im_ego)
plt.show()