Skip to content

Commit ae5507c

Browse files
authored
[pose-detection] Add support for multi-pose MoveNet models (from URL only for now) (#770)
* Add support for multi-pose MoveNet models (from URL only for now) * Fix lint errors * Use box from multi-pose model. Address reviewers' comments. * Address reviewer's comments. * Address reviewer's comments * Return Pose arrays from pose estimation calls * Update comments
1 parent be260b2 commit ae5507c

File tree

7 files changed

+471
-280
lines changed

7 files changed

+471
-280
lines changed

pose-detection/src/index.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,12 @@ const calculators = {keypointsToNormalizedKeypoints};
3838
export {calculators};
3939

4040
// MoveNet model types.
41-
import {SINGLEPOSE_LIGHTNING, SINGLEPOSE_THUNDER} from './movenet/constants';
41+
import {SINGLEPOSE_LIGHTNING, SINGLEPOSE_THUNDER, MULTIPOSE} from './movenet/constants';
4242
const movenet = {
4343
modelType: {
4444
'SINGLEPOSE_LIGHTNING': SINGLEPOSE_LIGHTNING,
45-
'SINGLEPOSE_THUNDER': SINGLEPOSE_THUNDER
45+
'SINGLEPOSE_THUNDER': SINGLEPOSE_THUNDER,
46+
'MULTIPOSE': MULTIPOSE
4647
}
4748
};
4849
export {movenet};

pose-detection/src/movenet/constants.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@ import {MoveNetEstimationConfig, MoveNetModelConfig} from './types';
1919

2020
export const SINGLEPOSE_LIGHTNING = 'SinglePose.Lightning';
2121
export const SINGLEPOSE_THUNDER = 'SinglePose.Thunder';
22+
export const MULTIPOSE = 'MultiPose';
2223

23-
export const VALID_MODELS = [SINGLEPOSE_LIGHTNING, SINGLEPOSE_THUNDER];
24+
export const VALID_MODELS =
25+
[SINGLEPOSE_LIGHTNING, SINGLEPOSE_THUNDER, MULTIPOSE];
2426

2527
export const MOVENET_SINGLEPOSE_LIGHTNING_URL =
2628
'https://tfhub.dev/google/tfjs-model/movenet/singlepose/lightning/4';
@@ -29,16 +31,15 @@ export const MOVENET_SINGLEPOSE_THUNDER_URL =
2931

3032
export const MOVENET_SINGLEPOSE_LIGHTNING_RESOLUTION = 192;
3133
export const MOVENET_SINGLEPOSE_THUNDER_RESOLUTION = 256;
34+
export const MOVENET_MULTIPOSE_RESOLUTION = 320;
3235

3336
// The default configuration for loading MoveNet.
3437
export const MOVENET_CONFIG: MoveNetModelConfig = {
3538
modelType: SINGLEPOSE_LIGHTNING,
3639
enableSmoothing: true
3740
};
3841

39-
export const MOVENET_SINGLE_POSE_ESTIMATION_CONFIG: MoveNetEstimationConfig = {
40-
maxPoses: 1
41-
};
42+
export const MOVENET_ESTIMATION_CONFIG: MoveNetEstimationConfig = {};
4243

4344
export const KEYPOINT_FILTER_CONFIG = {
4445
frequency: 30,
@@ -50,3 +51,11 @@ export const KEYPOINT_FILTER_CONFIG = {
5051
};
5152
export const CROP_FILTER_ALPHA = 0.9;
5253
export const MIN_CROP_KEYPOINT_SCORE = 0.2;
54+
export const MIN_POSE_SCORE = 0.2;
55+
56+
export const NUM_KEYPOINTS = 17;
57+
export const NUM_KEYPOINT_VALUES = 3; // [y, x, score]
58+
export const MULTIPOSE_BOX_SIZE = 5; // [ymin, xmin, ymax, xmax, score]
59+
export const MULTIPOSE_BOX_SCORE_IDX = NUM_KEYPOINTS * NUM_KEYPOINT_VALUES + 4;
60+
export const MULTIPOSE_INSTANCE_SIZE =
61+
NUM_KEYPOINTS * NUM_KEYPOINT_VALUES + MULTIPOSE_BOX_SIZE;
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
/**
2+
* @license
3+
* Copyright 2021 Google LLC. All Rights Reserved.
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
* =============================================================================
16+
*/
17+
18+
import {ImageSize} from '../calculators/interfaces/common_interfaces';
19+
import {BoundingBox} from '../calculators/interfaces/shape_interfaces';
20+
import {COCO_KEYPOINTS} from '../constants';
21+
import {Keypoint} from '../types';
22+
23+
import {MIN_CROP_KEYPOINT_SCORE} from './constants';
24+
25+
/**
26+
* Determines whether the torso of a person is visible.
27+
*
28+
* @param keypoints An array of `Keypoint`s associated with a person.
29+
* @param keypointIndexByName A map from keypoint name to index in the keypoints
30+
* array.
31+
* @return A boolean indicating whether the torso is visible.
32+
*/
33+
export function torsoVisible(
34+
keypoints: Keypoint[],
35+
keypointIndexByName: {[index: string]: number}): boolean {
36+
return (
37+
(keypoints[keypointIndexByName['left_hip']].score >
38+
MIN_CROP_KEYPOINT_SCORE ||
39+
keypoints[keypointIndexByName['right_hip']].score >
40+
MIN_CROP_KEYPOINT_SCORE) &&
41+
(keypoints[keypointIndexByName['left_shoulder']].score >
42+
MIN_CROP_KEYPOINT_SCORE ||
43+
keypoints[keypointIndexByName['right_shoulder']].score >
44+
MIN_CROP_KEYPOINT_SCORE));
45+
}
46+
47+
/**
48+
* Calculates the maximum distance from each keypoint to the center location.
49+
* The function returns the maximum distances from the two sets of keypoints:
50+
* full 17 keypoints and 4 torso keypoints. The returned information will be
51+
* used to determine the crop size. See determineCropRegion for more detail.
52+
*
53+
* @param keypoints An array of `Keypoint`s associated with a person.
54+
* @param keypointIndexByName A map from keypoint name to index in the keypoints
55+
* array.
56+
* @param targetKeypoints Maps from joint names to coordinates.
57+
* @param centerY The Y coordinate of the center of the person.
58+
* @param centerX The X coordinate of the center of the person.
59+
* @return An array containing information about the torso and body range in the
60+
* image: [maxTorsoYrange, maxTorsoXrange, maxBodyYrange, maxBodyXrange].
61+
*/
62+
function determineTorsoAndBodyRange(
63+
keypoints: Keypoint[], keypointIndexByName: {[index: string]: number},
64+
targetKeypoints: {[index: string]: number[]}, centerY: number,
65+
centerX: number): number[] {
66+
const torsoJoints =
67+
['left_shoulder', 'right_shoulder', 'left_hip', 'right_hip'];
68+
let maxTorsoYrange = 0.0;
69+
let maxTorsoXrange = 0.0;
70+
for (let i = 0; i < torsoJoints.length; i++) {
71+
const distY = Math.abs(centerY - targetKeypoints[torsoJoints[i]][0]);
72+
const distX = Math.abs(centerX - targetKeypoints[torsoJoints[i]][1]);
73+
if (distY > maxTorsoYrange) {
74+
maxTorsoYrange = distY;
75+
}
76+
if (distX > maxTorsoXrange) {
77+
maxTorsoXrange = distX;
78+
}
79+
}
80+
let maxBodyYrange = 0.0;
81+
let maxBodyXrange = 0.0;
82+
for (const key of Object.keys(targetKeypoints)) {
83+
if (keypoints[keypointIndexByName[key]].score < MIN_CROP_KEYPOINT_SCORE) {
84+
continue;
85+
}
86+
const distY = Math.abs(centerY - targetKeypoints[key][0]);
87+
const distX = Math.abs(centerX - targetKeypoints[key][1]);
88+
if (distY > maxBodyYrange) {
89+
maxBodyYrange = distY;
90+
}
91+
if (distX > maxBodyXrange) {
92+
maxBodyXrange = distX;
93+
}
94+
}
95+
96+
return [maxTorsoYrange, maxTorsoXrange, maxBodyYrange, maxBodyXrange];
97+
}
98+
99+
/**
100+
* Determines the region to crop the image for the model to run inference on.
101+
* The algorithm uses the detected joints from the previous frame to estimate
102+
* the square region that encloses the full body of the target person and
103+
* centers at the midpoint of two hip joints. The crop size is determined by
104+
* the distances between each joint and the center point.
105+
* When the model is not confident with the four torso joint predictions, the
106+
* function returns a default crop which is the full image padded to square.
107+
*
108+
* @param currentCropRegion The crop region that was used for the current frame.
109+
* Can be null for the very first frame that is handled by the detector.
110+
* @param keypoints An array of `Keypoint`s associated with a person.
111+
* @param keypointIndexByName A map from keypoint name to index in the keypoints
112+
* array.
113+
* @param imageSize The size of the image that is being processed.
114+
* @return A `BoundingBox` that contains the new crop region.
115+
*/
116+
export function determineNextCropRegion(
117+
currentCropRegion: BoundingBox, keypoints: Keypoint[],
118+
keypointIndexByName: {[index: string]: number},
119+
imageSize: ImageSize): BoundingBox {
120+
const targetKeypoints: {[index: string]: number[]} = {};
121+
122+
for (const key of COCO_KEYPOINTS) {
123+
targetKeypoints[key] = [
124+
keypoints[keypointIndexByName[key]].y * imageSize.height,
125+
keypoints[keypointIndexByName[key]].x * imageSize.width
126+
];
127+
}
128+
129+
if (torsoVisible(keypoints, keypointIndexByName)) {
130+
const centerY =
131+
(targetKeypoints['left_hip'][0] + targetKeypoints['right_hip'][0]) / 2;
132+
const centerX =
133+
(targetKeypoints['left_hip'][1] + targetKeypoints['right_hip'][1]) / 2;
134+
135+
const [maxTorsoYrange, maxTorsoXrange, maxBodyYrange, maxBodyXrange] =
136+
determineTorsoAndBodyRange(
137+
keypoints, keypointIndexByName, targetKeypoints, centerY, centerX);
138+
139+
let cropLengthHalf = Math.max(
140+
maxTorsoXrange * 1.9, maxTorsoYrange * 1.9, maxBodyYrange * 1.2,
141+
maxBodyXrange * 1.2);
142+
143+
cropLengthHalf = Math.min(
144+
cropLengthHalf,
145+
Math.max(
146+
centerX, imageSize.width - centerX, centerY,
147+
imageSize.height - centerY));
148+
149+
const cropCorner = [centerY - cropLengthHalf, centerX - cropLengthHalf];
150+
151+
if (cropLengthHalf > Math.max(imageSize.width, imageSize.height) / 2) {
152+
return initCropRegion(currentCropRegion == null, imageSize);
153+
} else {
154+
const cropLength = cropLengthHalf * 2;
155+
return {
156+
yMin: cropCorner[0] / imageSize.height,
157+
xMin: cropCorner[1] / imageSize.width,
158+
yMax: (cropCorner[0] + cropLength) / imageSize.height,
159+
xMax: (cropCorner[1] + cropLength) / imageSize.width,
160+
height: (cropCorner[0] + cropLength) / imageSize.height -
161+
cropCorner[0] / imageSize.height,
162+
width: (cropCorner[1] + cropLength) / imageSize.width -
163+
cropCorner[1] / imageSize.width
164+
};
165+
}
166+
} else {
167+
return initCropRegion(currentCropRegion == null, imageSize);
168+
}
169+
}
170+
171+
/**
172+
* Provides initial crop region.
173+
*
174+
* The function provides the initial crop region when the algorithm cannot
175+
* reliably determine the crop region from the previous frame. There are two
176+
* scenarios:
177+
* 1) The very first frame: the function returns the best guess by cropping
178+
* a square in the middle of the image.
179+
* 2) Not enough reliable keypoints detected from the previous frame: the
180+
* function pads the full image from both sides to make it a square
181+
* image.
182+
*
183+
* @param firstFrame A boolean indicating whether we are initializing a crop
184+
* region for the very first frame.
185+
* @param imageSize The size of the image that is being processed.
186+
* @return A `BoundingBox` that contains the initial crop region.
187+
*/
188+
export function initCropRegion(
189+
firstFrame: boolean, imageSize: ImageSize): BoundingBox {
190+
let boxHeight: number, boxWidth: number, yMin: number, xMin: number;
191+
if (firstFrame) {
192+
// If it is the first frame, perform a best guess by making the square
193+
// crop at the image center to better utilize the image pixels and
194+
// create higher chance to enter the cropping loop.
195+
if (imageSize.width > imageSize.height) {
196+
boxHeight = 1.0;
197+
boxWidth = imageSize.height / imageSize.width;
198+
yMin = 0.0;
199+
xMin = (imageSize.width / 2 - imageSize.height / 2) / imageSize.width;
200+
} else {
201+
boxHeight = imageSize.width / imageSize.height;
202+
boxWidth = 1.0;
203+
yMin = (imageSize.height / 2 - imageSize.width / 2) / imageSize.height;
204+
xMin = 0.0;
205+
}
206+
} else {
207+
// No cropRegion was available from a previous estimatePoses() call, so
208+
// run the model on the full image with padding on both sides.
209+
if (imageSize.width > imageSize.height) {
210+
boxHeight = imageSize.width / imageSize.height;
211+
boxWidth = 1.0;
212+
yMin = (imageSize.height / 2 - imageSize.width / 2) / imageSize.height;
213+
xMin = 0.0;
214+
} else {
215+
boxHeight = 1.0;
216+
boxWidth = imageSize.height / imageSize.width;
217+
yMin = 0.0;
218+
xMin = (imageSize.width / 2 - imageSize.height / 2) / imageSize.width;
219+
}
220+
}
221+
return {
222+
yMin,
223+
xMin,
224+
yMax: yMin + boxHeight,
225+
xMax: xMin + boxWidth,
226+
height: boxHeight,
227+
width: boxWidth
228+
};
229+
}

0 commit comments

Comments
 (0)