Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions configs/dino/_base_/dino_focalnet.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
architecture: DETR
# pretrain_weights: # rewrite in FocalNet.pretrained in ppdet/modeling/backbones/focalnet.py
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
hidden_dim: 256
use_focal_loss: True

DETR:
backbone: FocalNet
transformer: DINOTransformer
detr_head: DINOHead
post_process: DETRBBoxPostProcess

FocalNet:
arch: 'focalnet_L_384_22k_fl4'
out_indices: [1, 2, 3]
pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams

DINOTransformer:
num_queries: 900
position_embed_type: sine
num_levels: 4
nhead: 8
num_encoder_layers: 6
num_decoder_layers: 6
dim_feedforward: 2048
dropout: 0.0
activation: relu
pe_temperature: 20
pe_offset: 0.0
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: True

DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}

DETRBBoxPostProcess:
num_top_queries: 300
2 changes: 1 addition & 1 deletion ppdet/modeling/architectures/detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _forward(self):
body_feats = self.backbone(self.inputs)

# Transformer
pad_mask = self.inputs['pad_mask'] if self.training else None
pad_mask = self.inputs.get('pad_mask', None)
out_transformer = self.transformer(body_feats, pad_mask, self.inputs)

# DETR Head
Expand Down
3 changes: 2 additions & 1 deletion ppdet/modeling/backbones/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from . import vision_transformer
from . import mobileone
from . import trans_encoder
from . import focalnet

from .vgg import *
from .resnet import *
Expand All @@ -57,6 +58,6 @@
from .csp_darknet import *
from .convnext import *
from .vision_transformer import *
from .vision_transformer import *
from .mobileone import *
from .trans_encoder import *
from .focalnet import *
Loading