Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into add-export-task
Browse files Browse the repository at this point in the history
# Conflicts:
#	yolo/model/yolo.py
  • Loading branch information
ramonhollands committed Feb 20, 2025
2 parents a5cbf06 + 1d28355 commit bab9d4d
Show file tree
Hide file tree
Showing 7 changed files with 177 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ runs
*/data

# Datasets and model checkpoints
*.ckpt
*.pth
*.pt
*.trt
Expand Down
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
>
> Use of this code is at your own risk and discretion. It is advisable to consult with the project owner before deploying or integrating into any critical systems. -->

Welcome to the official implementation of YOLOv7 and YOLOv9, YOLO-RD. This repository will contains the complete codebase, pre-trained models, and detailed instructions for training and deploying YOLOv9.
Welcome to the official implementation of YOLOv7[^1] and YOLOv9[^2], YOLO-RD[^3]. This repository will contains the complete codebase, pre-trained models, and detailed instructions for training and deploying YOLOv9.

## TL;DR

Expand Down Expand Up @@ -133,3 +133,9 @@ Contributions to the YOLO project are welcome! See [CONTRIBUTING](docs/CONTRIBUT
}
```

[^1]: [**YOLOv7**: Trainable Bag-of-Freebies Sets New State-of-the-Art for Real-Time Object Detectors](https://arxiv.org/abs/2207.02696)

[^2]: [**YOLOv9**: Learning What You Want to Learn Using Programmable Gradient Information](https://arxiv.org/abs/2402.13616)

[^3]: [**YOLO-RD**: Introducing Relevant and Compact Explicit Knowledge to YOLO by Retriever-Dictionary](https://arxiv.org/abs/2410.15346)
1 change: 1 addition & 0 deletions yolo/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ class YOLOLayer(nn.Module):
tags: str
layer_type: str
usable: bool
external: Optional[dict]


IDX_TO_ID = [
Expand Down
133 changes: 133 additions & 0 deletions yolo/config/model/v9-t.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
name: v9-t

anchor:
reg_max: 16

model:
backbone:
- Conv:
args: {out_channels: 16, kernel_size: 3, stride: 2}
source: 0
- Conv:
args: {out_channels: 32, kernel_size: 3, stride: 2}
- ELAN:
args: {out_channels: 32, part_channels: 32}

- AConv:
args: {out_channels: 64}
- RepNCSPELAN:
args:
out_channels: 64
part_channels: 64
csp_args: {repeat_num: 3}
tags: B3

- AConv:
args: {out_channels: 96}
- RepNCSPELAN:
args:
out_channels: 96
part_channels: 96
csp_args: {repeat_num: 3}
tags: B4

- AConv:
args: {out_channels: 128}
- RepNCSPELAN:
args:
out_channels: 128
part_channels: 128
csp_args: {repeat_num: 3}
tags: B5

neck:
- SPPELAN:
args: {out_channels: 128}
tags: N3

- UpSample:
args: {scale_factor: 2, mode: nearest}
- Concat:
source: [-1, B4]
- RepNCSPELAN:
args:
out_channels: 96
part_channels: 96
csp_args: {repeat_num: 3}
tags: N4

head:
- UpSample:
args: {scale_factor: 2, mode: nearest}
- Concat:
source: [-1, B3]

- RepNCSPELAN:
args:
out_channels: 64
part_channels: 64
csp_args: {repeat_num: 3}
tags: P3
- AConv:
args: {out_channels: 48}
- Concat:
source: [-1, N4]

- RepNCSPELAN:
args:
out_channels: 96
part_channels: 96
csp_args: {repeat_num: 3}
tags: P4
- AConv:
args: {out_channels: 64}
- Concat:
source: [-1, N3]

- RepNCSPELAN:
args:
out_channels: 128
part_channels: 128
csp_args: {repeat_num: 3}
tags: P5

detection:
- MultiheadDetection:
source: [P3, P4, P5]
tags: Main
output: True

auxiliary:
- SPPELAN:
source: B5
args: {out_channels: 128}
tags: A5

- UpSample:
args: {scale_factor: 2, mode: nearest}
- Concat:
source: [-1, B4]

- RepNCSPELAN:
args:
out_channels: 96
part_channels: 96
csp_args: {repeat_num: 3}
tags: A4

- UpSample:
args: {scale_factor: 2, mode: nearest}
- Concat:
source: [-1, B3]

- RepNCSPELAN:
args:
out_channels: 64
part_channels: 64
csp_args: {repeat_num: 3}
tags: A3

- MultiheadDetection:
source: [A3, A4, A5]
tags: AUX
output: True
29 changes: 17 additions & 12 deletions yolo/model/yolo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import OrderedDict
from pathlib import Path
from typing import Dict, List, Union
from typing import Dict, List, Optional, Union

import torch
from omegaconf import ListConfig, OmegaConf
Expand Down Expand Up @@ -68,30 +68,34 @@ def build_model(self, model_arch: Dict[str, List[Dict[str, Dict[str, Dict]]]]):
setattr(layer, "out_c", out_channels)
layer_idx += 1

def forward(self, x):
y = {0: x}
def forward(self, x, external: Optional[Dict] = None, shortcut: Optional[str] = None):
y = {0: x, **(external or {})}
output = dict()

# Use a simple loop instead of enumerate()
# Needed for torch export compatibility
index = 1
for layer in self.model:
index = 1
for layer in self.model:
if isinstance(layer.source, list):
model_input = [y[idx] for idx in layer.source]
else:
model_input = y[layer.source]

x = layer(model_input)

external_input = {source_name: y[source_name] for source_name in layer.external}

x = layer(model_input, **external_input)
y[-1] = x

if layer.usable:
y[index] = x

if layer.output:
output[layer.tags] = x

if layer.tags == shortcut:
return output

index += 1

return output

def get_out_channels(self, layer_type: str, layer_args: dict, output_dim: list, source: Union[int, list]):
Expand Down Expand Up @@ -123,6 +127,7 @@ def create_layer(self, layer_type: str, source: Union[int, list], layer_info: Di
setattr(layer, "in_c", kwargs.get("in_channels", None))
setattr(layer, "output", layer_info.get("output", False))
setattr(layer, "tags", layer_info.get("tags", None))
setattr(layer, "external", layer_info.get("external", []))
setattr(layer, "usable", 0)
return layer
else:
Expand Down
4 changes: 2 additions & 2 deletions yolo/tools/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def validation_step(self, batch, batch_idx):
batch_size, images, targets, rev_tensor, img_paths = batch
H, W = images.shape[2:]
predicts = self.post_process(self.ema(images), image_size=[W, H])
self.metric.update(
mAP = self.metric(
[to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
)
return predicts
return predicts, mAP

def on_validation_epoch_end(self):
epoch_metrics = self.metric.compute()
Expand Down
17 changes: 16 additions & 1 deletion yolo/utils/logging_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,20 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch: Any, batch_idx:
self.progress.update(self.train_progress_bar_id, description=batch_descript)
self.refresh()

@override
@rank_zero_only
def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx) -> None:
if self.is_disabled:
return
if trainer.sanity_checking:
self._update(self.val_sanity_progress_bar_id, batch_idx + 1)
elif self.val_progress_bar_id is not None:
self._update(self.val_progress_bar_id, batch_idx + 1)
_, mAP = outputs
mAP_desc = f" mAP :{mAP['map']*100:6.2f} | mAP50 :{mAP['map_50']*100:6.2f} |"
self.progress.update(self.val_progress_bar_id, description=f"[green]Valid [white]|{mAP_desc}")
self.refresh()

@override
@rank_zero_only
def on_train_end(self, trainer: "Trainer", pl_module: "LightningModule") -> None:
Expand Down Expand Up @@ -212,8 +226,9 @@ def on_validation_batch_end(self, trainer: Trainer, pl_module, outputs, batch, b
if batch_idx != 0:
return
batch_size, images, targets, rev_tensor, img_paths = batch
predicts, _ = outputs
gt_boxes = targets[0] if targets.ndim == 3 else targets
pred_boxes = outputs[0] if isinstance(outputs, list) else outputs
pred_boxes = predicts[0] if isinstance(predicts, list) else predicts
images = [images[0]]
step = trainer.current_epoch
for logger in trainer.loggers:
Expand Down

0 comments on commit bab9d4d

Please sign in to comment.