Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions configs/pose3d/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@

PaddleDetection 中提供了两种3D Pose算法(稀疏关键点),分别是适用于服务器端的大模型Metro3D和移动端的TinyPose3D。其中Metro3D基于[End-to-End Human Pose and Mesh Reconstruction with Transformers](https://arxiv.org/abs/2012.09760)进行了稀疏化改造,TinyPose3D是在TinyPose基础上修改输出3D关键点。

## 模型推荐(待补充)
## 模型推荐

|模型|适用场景|human3.6m精度|模型下载|
|:--:|:--:|:--:|:--:|
|Metro3D|服务器端|-|-|
|TinyPose3D|移动端|-|-|
|模型|适用场景|human3.6m精度(14关键点)|human3.6m精度(17关键点)|模型下载|
|:--:|:--:|:--:|:--:|:--:|
|Metro3D|服务器端|56.014|46.619|[metro3d_24kpts.pdparams](https://bj.bcebos.com/v1/paddledet/models/pose3d/metro3d_24kpts.pdparams)|
|TinyPose3D|移动端|86.381|71.223|[tinypose3d_human36m.pdparams](https://bj.bcebos.com/v1/paddledet/models/pose3d/tinypose3d_human36M.pdparams)|

注:
1. 训练数据基于 [MeshTransfomer](https://github.com/microsoft/MeshTransformer) 中的训练数据。
Expand Down Expand Up @@ -137,13 +137,14 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/infer.py -c configs/pose3d/metro3d_24kpts.y

我们的训练数据提供了大量的低精度自动生成式的数据,用户可以在此数据训练的基础上,标注自己高精度的目标动作数据进行finetune,即可得到相对稳定较好的模型。

我们在医疗康复高精度数据上的训练效果展示如下
我们在医疗康复高精度数据上的训练效果展示如下 [高清视频](https://user-images.githubusercontent.com/31800336/218949226-22e6ab25-facb-4cc6-8eca-38d4bfd973e5.mp4)

<div align="center">
<img src="https://user-images.githubusercontent.com/31800336/218949226-22e6ab25-facb-4cc6-8eca-38d4bfd973e5.mp4" width='600'/>
<img src="https://user-images.githubusercontent.com/31800336/221747019-ceacfd64-e218-476b-a369-c6dc259816b2.gif" width='600'/>
</div>



## 引用

```
Expand Down
17 changes: 8 additions & 9 deletions configs/pose3d/tinypose3d_human36M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@ train_width: &train_width 128
trainsize: &trainsize [*train_width, *train_height]

#####model
architecture: TinyPose3DHRNet
architecture: TinyPose3DHRHeatmapNet
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/keypoint/tinypose_128x96.pdparams

TinyPose3DHRNet:
TinyPose3DHRHeatmapNet:
backbone: LiteHRNet
post_process: HR3DNetPostProcess
fc_channel: 1024
num_joints: *num_joints
width: &width 40
loss: Pose3DLoss
Expand Down Expand Up @@ -56,17 +55,17 @@ OptimizerBuilder:
#####data
TrainDataset:
!Pose3DDataset
dataset_dir: Human3.6M
image_dirs: ["Images"]
anno_list: ['Human3.6m_train.json']
dataset_dir: dataset/traindata/
image_dirs: ["human3.6m"]
anno_list: ['pose3d/Human3.6m_train.json']
num_joints: *num_joints
test_mode: False

EvalDataset:
!Pose3DDataset
dataset_dir: Human3.6M
image_dirs: ["Images"]
anno_list: ['Human3.6m_valid.json']
dataset_dir: dataset/traindata/
image_dirs: ["human3.6m"]
anno_list: ['pose3d/Human3.6m_valid.json']
num_joints: *num_joints
test_mode: True

Expand Down
6 changes: 2 additions & 4 deletions ppdet/data/source/pose3d_cmb.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
this code is base on https://github.com/open-mmlab/mmpose
"""

import os
import cv2
import numpy as np
Expand Down Expand Up @@ -80,7 +78,7 @@ def get_mask(self, mvm_percent=0.3):
mjm_mask[indices, :] = 0.0
# return mjm_mask

num_joints = 1
num_joints = 10
mvm_mask = np.ones((num_joints, 1)).astype(np.float)
if self.test_mode == False:
num_vertices = num_joints
Expand Down
5 changes: 0 additions & 5 deletions ppdet/metrics/pose3d_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,6 @@ def all_gather(data):


class Pose3DEval(object):
"""refer to
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
Copyright (c) Microsoft, under the MIT License.
"""

def __init__(self, output_eval, save_prediction_only=False):
super(Pose3DEval, self).__init__()
self.output_eval = output_eval
Expand Down
47 changes: 25 additions & 22 deletions ppdet/modeling/architectures/keypoint_hrnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(self,
use_dark=True):
"""
HRNet network, see https://arxiv.org/abs/1902.09212

Args:
backbone (nn.Layer): backbone instance
post_process (object): `HRNetPostProcess` instance
Expand Down Expand Up @@ -132,10 +132,10 @@ def __init__(self, use_dark=True):

def get_max_preds(self, heatmaps):
'''get predictions from score maps

Args:
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])

Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
Expand Down Expand Up @@ -220,12 +220,12 @@ def dark_postprocess(self, hm, coords, kernelsize):
def get_final_preds(self, heatmaps, center, scale, kernelsize=3):
"""the highest heatvalue location with a quarter offset in the
direction from the highest response to the second highest response.

Args:
heatmaps (numpy.ndarray): The predicted heatmaps
center (numpy.ndarray): The boxes center
scale (numpy.ndarray): The scale factor

Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
Expand Down Expand Up @@ -341,10 +341,7 @@ def __init__(
self.deploy = False
self.num_joints = num_joints

self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
# for heatmap output
self.final_conv_new = L.Conv2d(
width, num_joints * 32, 1, 1, 0, bias=True)
self.final_conv = L.Conv2d(width, num_joints * 32, 1, 1, 0, bias=True)

@classmethod
def from_config(cls, cfg, *args, **kwargs):
Expand All @@ -356,20 +353,19 @@ def from_config(cls, cfg, *args, **kwargs):
def _forward(self):
feats = self.backbone(self.inputs) # feats:[[batch_size, 40, 32, 24]]

hrnet_outputs = self.final_conv_new(feats[0])
hrnet_outputs = self.final_conv(feats[0])
res = soft_argmax(hrnet_outputs, self.num_joints)

if self.training:
return self.loss(res, self.inputs)
else: # export model need
return res
return res

def get_loss(self):
return self._forward()
pose3d = self._forward()
loss = self.loss(pose3d, None, self.inputs)
outputs = {'loss': loss}
return outputs

def get_pred(self):
res_lst = self._forward()
outputs = {'keypoint': res_lst}
outputs = {'pose3d': res_lst}
return outputs

def flip_back(self, output_flipped, matched_parts):
Expand Down Expand Up @@ -427,16 +423,23 @@ def from_config(cls, cfg, *args, **kwargs):
return {'backbone': backbone, }

def _forward(self):
feats = self.backbone(self.inputs) # feats:[[batch_size, 40, 32, 24]]
'''
self.inputs is a dict
'''
feats = self.backbone(
self.inputs) # feats:[[batch_size, 40, width/4, height/4]]

hrnet_outputs = self.final_conv(
feats[0]) # hrnet_outputs: [batch_size, num_joints*32,32,32]

hrnet_outputs = self.final_conv(feats[0])
flatten_res = self.flatten(
hrnet_outputs) # [batch_size, 24, (height/4)*(width/4)]
hrnet_outputs) # [batch_size,num_joints*32,32*32]

res = self.fc1(flatten_res)
res = self.act1(res)
res = self.fc2(res)
res = self.act2(res)
res = self.fc3(res) # [batch_size, 24, 3]
res = self.fc3(res)

if self.training:
return self.loss(res, self.inputs)
Expand All @@ -448,7 +451,7 @@ def get_loss(self):

def get_pred(self):
res_lst = self._forward()
outputs = {'keypoint': res_lst}
outputs = {'pose3d': res_lst}
return outputs

def flip_back(self, output_flipped, matched_parts):
Expand Down
4 changes: 2 additions & 2 deletions ppdet/modeling/architectures/pose3d_metro.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(
trans_encoder='',
loss='Pose3DLoss', ):
"""
METRO network, see https://arxiv.org/abs/
Modified from METRO network, see https://arxiv.org/abs/2012.09760

Args:
backbone (nn.Layer): backbone instance
Expand All @@ -65,7 +65,7 @@ def __init__(
self.deploy = False

self.trans_encoder = trans_encoder
self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 1, 1)
self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 10, 1)
self.cam_param_fc = paddle.nn.Linear(3, 2)

@classmethod
Expand Down