In [1]:
import os
#os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet152
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path
c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\dataset\select_agents.py:31: UserWarning: Windows detected. BLOSC_NOLOCK has not been set as it causes memory leaks on Windows.However, writing the mask with this config may be inconsistent.
  warnings.warn(
In [2]:
torch.cuda.is_available()
Out[2]:
True
In [ ]:
 
In [3]:
# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = "H:\prediction-dataset"
dm = LocalDataManager(None)
# get config
cfg = load_config_data("./agent_motion_config.yaml")
print(cfg)
{'format_version': 4, 'model_params': {'model_architecture': 'resnet50', 'history_num_frames': 50, 'future_num_frames': 50, 'step_time': 0.1, 'render_ego_history': True}, 'raster_params': {'raster_size': [224, 224], 'pixel_size': [0.5, 0.5], 'ego_center': [0.25, 0.5], 'map_type': 'py_semantic', 'satellite_map_key': 'aerial_map/aerial_map.png', 'semantic_map_key': 'semantic_map/semantic_map.pb', 'dataset_meta_key': 'meta.json', 'filter_agents_threshold': 0.5, 'disable_traffic_light_faces': False, 'set_origin_to_bottom': True}, 'train_data_loader': {'key': 'scenes/sample.zarr', 'batch_size': 4, 'shuffle': True, 'num_workers': 0}, 'val_data_loader': {'key': 'scenes/validate.zarr', 'batch_size': 2, 'shuffle': False, 'num_workers': 0}, 'train_params': {'checkpoint_every_n_steps': 1000000, 'max_num_steps': 100000, 'eval_every_n_steps': 10000}}
In [4]:
def build_model(cfg: Dict) -> torch.nn.Module:
    # load pre-trained Conv2D model
    model = resnet152(pretrained=True)

    # change input channels number to match the rasterizer's output
    num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
    num_in_channels = 3 + num_history_channels
    model.conv1 = nn.Conv2d(
        num_in_channels,
        model.conv1.out_channels,
        kernel_size=model.conv1.kernel_size,
        stride=model.conv1.stride,
        padding=model.conv1.padding,
        bias=False,
    )
    # change output size to (X, Y) * number of future states
    num_targets = 2 * cfg["model_params"]["future_num_frames"]
    model.fc = nn.Linear(in_features=2048, out_features=num_targets)

    return model
In [5]:
def forward(data, model, device, criterion):
    inputs = data["image"].to(device)
    target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    targets = data["target_positions"].to(device)
    # Forward pass
    outputs = model(inputs).reshape(targets.shape)
    loss = criterion(outputs, targets)
    # not all the output steps are valid, but we can filter them out from the loss using availabilities
    loss = loss * target_availabilities
    loss = loss.mean()
    return loss, outputs
In [6]:
# ===== INIT DATASET
train_cfg = cfg["train_data_loader"]
rasterizer = build_rasterizer(cfg, dm)
In [7]:
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], 
                             num_workers=train_cfg["num_workers"])
print(train_dataset)
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|    100     |   24838    |  1893736   |     316008    |       0.69      |        248.38        |        76.24         |        24.83         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
In [8]:
# ==== INIT MODEL
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model = build_model(cfg).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss(reduction="none")
cuda:0
c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
  warnings.warn(
c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet152_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet152_Weights.DEFAULT` to get the most up-to-date weights.
  warnings.warn(msg)
In [9]:
# ==== TRAIN LOOP
tr_it = iter(train_dataloader)
progress_bar = tqdm(range(cfg["train_params"]["max_num_steps"]))
losses_train = []
losses_avg = []
for _ in progress_bar:
    try:
        data = next(tr_it)
    except StopIteration:
        tr_it = iter(train_dataloader)
        data = next(tr_it)
    model.train()
    torch.set_grad_enabled(True)
    loss, _ = forward(data, model, device, criterion)

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    losses_train.append(loss.item())
    losses_avg.append(np.mean(losses_train))
    progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}")
loss: 2.2113845348358154 loss(avg): 5.946497581898172:  25%|██▍       | 24607/100000 [3:05:37<9:28:43,  2.21it/s]     
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[9], line 21
     18 loss.backward()
     19 optimizer.step()
---> 21 losses_train.append(loss.item())
     22 losses_avg.append(np.mean(losses_train))
     23 progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}")

KeyboardInterrupt: 
In [ ]:
# num_epochs = 1
# training_losses = []
# for epoch in range(1, num_epochs+1):
#     epoch_loss = 0
#     for batch in train_dataloader:
#         model.train()
#         torch.set_grad_enabled(True)
#         loss, _ = forward(batch, model, device, criterion)

#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         epoch_loss += loss.item()
#     avg_loss = epoch_loss / len(train_dataloader)
#     training_losses.append(avg_loss)
#     if(epoch % 10 == 0 or epoch == 1):
#         print(f"Epoch {epoch}, Average Loss: {avg_loss}")




    
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[10], line 5
      3 for epoch in range(1, num_epochs+1):
      4     epoch_loss = 0
----> 5     for batch in train_dataloader:
      6         model.train()
      7         torch.set_grad_enabled(True)

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torch\utils\data\dataloader.py:628, in _BaseDataLoaderIter.__next__(self)
    625 if self._sampler_iter is None:
    626     # TODO(https://github.com/pytorch/pytorch/issues/76750)
    627     self._reset()  # type: ignore[call-arg]
--> 628 data = self._next_data()
    629 self._num_yielded += 1
    630 if self._dataset_kind == _DatasetKind.Iterable and \
    631         self._IterableDataset_len_called is not None and \
    632         self._num_yielded > self._IterableDataset_len_called:

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torch\utils\data\dataloader.py:671, in _SingleProcessDataLoaderIter._next_data(self)
    669 def _next_data(self):
    670     index = self._next_index()  # may raise StopIteration
--> 671     data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    672     if self._pin_memory:
    673         data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torch\utils\data\_utils\fetch.py:58, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
     56         data = self.dataset.__getitems__(possibly_batched_index)
     57     else:
---> 58         data = [self.dataset[idx] for idx in possibly_batched_index]
     59 else:
     60     data = self.dataset[possibly_batched_index]

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\torch\utils\data\_utils\fetch.py:58, in <listcomp>(.0)
     56         data = self.dataset.__getitems__(possibly_batched_index)
     57     else:
---> 58         data = [self.dataset[idx] for idx in possibly_batched_index]
     59 else:
     60     data = self.dataset[possibly_batched_index]

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\dataset\agent.py:122, in AgentDataset.__getitem__(self, index)
    120 else:
    121     state_index = frame_index - self.cumulative_sizes[scene_index - 1]
--> 122 return self.get_frame(scene_index, state_index, track_id=track_id)

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\dataset\ego.py:191, in EgoDataset.get_frame(self, scene_index, state_index, track_id)
    190 def get_frame(self, scene_index: int, state_index: int, track_id: Optional[int] = None) -> dict:
--> 191     data = super().get_frame(scene_index, state_index, track_id=track_id)
    192     # TODO (@lberg): this should not be here but in the rasterizer
    193     data["image"] = data["image"].transpose(2, 0, 1)  # 0,1,C -> C,0,1

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\dataset\ego.py:70, in BaseEgoDataset.get_frame(self, scene_index, state_index, track_id)
     67 if self.cfg["raster_params"]["disable_traffic_light_faces"]:
     68     tl_faces = np.empty(0, dtype=self.dataset.tl_faces.dtype)  # completely disable traffic light faces
---> 70 data = self.sample_function(state_index, frames, self.dataset.agents, tl_faces, track_id)
     72 # add information only, so that all data keys are always preserved
     73 data["scene_index"] = scene_index

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\sampling\agent_sampling.py:242, in generate_agent_sample(state_index, frames, agents, tl_faces, selected_track_id, render_context, history_num_frames, future_num_frames, step_time, filter_agents_threshold, rasterizer, perturbation)
    239     agent_extent_m = agent["extent"]
    240     selected_agent = agent
--> 242 input_im = rasterizer.rasterize(history_frames, history_agents, history_tl_faces, selected_agent)
    244 world_from_agent = compute_agent_pose(agent_centroid_m, agent_yaw_rad)
    245 agent_from_world = np.linalg.inv(world_from_agent)

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\rasterization\sem_box_rasterizer.py:42, in SemBoxRasterizer.rasterize(self, history_frames, history_agents, history_tl_faces, agent)
     35 def rasterize(
     36         self,
     37         history_frames: np.ndarray,
   (...)
     40         agent: Optional[np.ndarray] = None,
     41 ) -> np.ndarray:
---> 42     im_out_box = self.box_rast.rasterize(history_frames, history_agents, history_tl_faces, agent)
     43     im_out_sem = self.sem_rast.rasterize(history_frames, history_agents, history_tl_faces, agent)
     44     return np.concatenate([im_out_box, im_out_sem], -1)

File c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\rasterization\box_rasterizer.py:156, in BoxRasterizer.rasterize(self, history_frames, history_agents, history_tl_faces, agent)
    153 # combine such that the image consists of [agent_t, agent_t-1, agent_t-2, ego_t, ego_t-1, ego_t-2]
    154 out_im = np.concatenate((agents_images, ego_images), -1)
--> 156 return out_im.astype(np.float32) / 255

KeyboardInterrupt: 
In [ ]:
#save model
torch.save(model.state_dict(), "./agent_motion_model1.pth")
#load model
#model.load_state_dict(torch.load("./agent_motion_model.pth"))
Out[ ]:
<All keys matched successfully>
In [ ]:
plt.plot(np.arange(len(losses_train)), losses_train, label="train loss")
plt.legend()
plt.show()
In [ ]:
# ===== GENERATE AND LOAD CHOPPED DATASET
num_frames_to_chop = 100
eval_cfg = cfg["val_data_loader"]
eval_base_path = create_chopped_dataset(dm.require(eval_cfg["key"]), cfg["raster_params"]["filter_agents_threshold"], 
                              num_frames_to_chop, cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS)
copying: 100%|██████████| 16220/16220 [10:52<00:00, 24.87it/s]
c:\Users\peter\anaconda3\envs\l5kitenv\lib\site-packages\l5kit\evaluation\extract_ground_truth.py:52: RuntimeWarning: you're running with a custom agents_mask
  dataset = AgentDataset(cfg=cfg, zarr_dataset=zarr_dataset, rasterizer=rasterizer, agents_mask=agents_mask)
extracting GT: 100%|██████████| 94694/94694 [07:48<00:00, 201.92it/s]
In [ ]:
eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
eval_mask_path = str(Path(eval_base_path) / "mask.npz")
eval_gt_path = str(Path(eval_base_path) / "gt.csv")

eval_zarr = ChunkedDataset(eval_zarr_path).open()
eval_mask = np.load(eval_mask_path)["arr_0"]
# ===== INIT DATASET AND LOAD MASK
eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=4, 
                             num_workers=eval_cfg["num_workers"])
print(eval_dataset)
C:\Users\peter\AppData\Local\Temp\ipykernel_32160\1453756949.py:8: RuntimeWarning: you're running with a custom agents_mask
  eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|   16220    |  1622000   | 125423254  |    11733321   |      45.06      |        100.00        |        77.33         |        10.00         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
In [ ]:
# ==== EVAL LOOP
model.eval()
torch.set_grad_enabled(False)

# store information for evaluation
future_coords_offsets_pd = []
timestamps = []
agent_ids = []

progress_bar = tqdm(eval_dataloader)
for data in progress_bar:
    _, ouputs = forward(data, model, device, criterion)
    
    # convert agent coordinates into world offsets
    agents_coords = ouputs.cpu().numpy()
    world_from_agents = data["world_from_agent"].numpy()
    centroids = data["centroid"].numpy()
    coords_offset = transform_points(agents_coords, world_from_agents) - centroids[:, None, :2]
    
    future_coords_offsets_pd.append(np.stack(coords_offset))
    timestamps.append(data["timestamp"].numpy().copy())
    agent_ids.append(data["track_id"].numpy().copy())
    
100%|██████████| 23674/23674 [2:25:50<00:00,  2.71it/s]  
In [ ]:
pred_path = f"{gettempdir()}/pred.csv"
In [ ]:
write_pred_csv(pred_path,
               timestamps=np.concatenate(timestamps),
               track_ids=np.concatenate(agent_ids),
               coords=np.concatenate(future_coords_offsets_pd),
              )
In [ ]:
metrics = compute_metrics_csv(eval_gt_path, pred_path, [neg_multi_log_likelihood, time_displace])
for metric_name, metric_mean in metrics.items():
    print(metric_name, metric_mean)
neg_multi_log_likelihood 175.45676550093557
time_displace [0.06773729 0.11469305 0.17395884 0.19997748 0.20411925 0.22879742
 0.26271531 0.29943946 0.33569181 0.37513012 0.42009098 0.46598532
 0.50938015 0.55164097 0.593019   0.63808795 0.67606026 0.72140343
 0.76299364 0.80569364 0.84873197 0.88992806 0.93171807 0.97412277
 1.01663126 1.05784709 1.1016372  1.1421511  1.18651499 1.22979849
 1.2737417  1.32533333 1.37266788 1.41942745 1.46950437 1.5177337
 1.56511116 1.61363376 1.66083676 1.71677312 1.76480592 1.81612993
 1.86786494 1.92937666 1.9854049  2.03929472 2.08680542 2.14062817
 2.19937759 2.25933752]

neg 1epoch 0hist = 7634
100 steps 0 hist = 4917
100 steps 10 hist = 3819 100 steps 50 hist = 933
100 steps 50 hist resnet151 = 1292
100 steps 50 hist resnet151 = 1575
1/14th epoch on validate 50 hist restnet151 = 175

In [ ]:
import matplotlib.lines as mlines
import matplotlib.patches as patches
model.eval()
torch.set_grad_enabled(False)

# build a dict to retrieve future trajectories from GT
gt_rows = {}
for row in read_gt_csv(eval_gt_path):
    gt_rows[row["track_id"] + row["timestamp"]] = row["coord"]

eval_ego_dataset = EgoDataset(cfg, eval_dataset.dataset, rasterizer)

# for frame_number in range(99, len(eval_zarr.frames), 100):  # start from last frame of scene_0 and increase by 100
for frame_number in range(99, 10000, 100):  # start from last frame of scene_0 and increase by 100
    agent_indices = eval_dataset.get_frame_indices(frame_number) 
    if not len(agent_indices):
        continue

    # get AV point-of-view frame
    data_ego = eval_ego_dataset[frame_number]
    im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0))
    center = np.asarray(cfg["raster_params"]["ego_center"]) * cfg["raster_params"]["raster_size"]
    
    predicted_positions = []
    target_positions = []

    for v_index in agent_indices:
        data_agent = eval_dataset[v_index]

        out_net = model(torch.from_numpy(data_agent["image"]).unsqueeze(0).to(device))
        out_pos = out_net[0].reshape(-1, 2).detach().cpu().numpy()
        # store absolute world coordinates
        predicted_positions.append(transform_points(out_pos, data_agent["world_from_agent"]))
        # retrieve target positions from the GT and store as absolute coordinates
        track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]
        target_positions.append(gt_rows[str(track_id) + str(timestamp)] + data_agent["centroid"][:2])


    # convert coordinates to AV point-of-view so we can draw them
    predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["raster_from_world"])
    target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])
    PREDICTED_POINTS_COLOR = (0, 255, 255)
    TARGET_POINTS_COLOR = (255, 0, 255)

    draw_trajectory(im_ego, target_positions, TARGET_POINTS_COLOR)
    draw_trajectory(im_ego, predicted_positions, PREDICTED_POINTS_COLOR)
    TARGET_POINTS_COLOR = (255/255, 0/255, 255/255)
    PREDICTED_POINTS_COLOR = (255/255, 165/255, 0/255)

    target_line = mlines.Line2D([], [], color=TARGET_POINTS_COLOR, markersize=15, label='Target Trajectory')
    predicted_line = mlines.Line2D([], [], color="aqua", markersize=15, label='Predicted Trajectory')
    agent_vehicle_patch = patches.Rectangle((0, 0), 1, 0.5, facecolor='blue', edgecolor='blue', label='Agent Vehicles')
    plt.legend(handles=[target_line, predicted_line, agent_vehicle_patch], loc='upper right')


    plt.imshow(im_ego)
    plt.show()
In [ ]: