|
| 1 | +/** |
| 2 | + * @license |
| 3 | + * Copyright 2021 Google LLC. All Rights Reserved. |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * https://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + * ============================================================================= |
| 16 | + */ |
| 17 | + |
| 18 | +import {ImageSize} from '../calculators/interfaces/common_interfaces'; |
| 19 | +import {BoundingBox} from '../calculators/interfaces/shape_interfaces'; |
| 20 | +import {COCO_KEYPOINTS} from '../constants'; |
| 21 | +import {Keypoint} from '../types'; |
| 22 | + |
| 23 | +import {MIN_CROP_KEYPOINT_SCORE} from './constants'; |
| 24 | + |
| 25 | +/** |
| 26 | + * Determines whether the torso of a person is visible. |
| 27 | + * |
| 28 | + * @param keypoints An array of `Keypoint`s associated with a person. |
| 29 | + * @param keypointIndexByName A map from keypoint name to index in the keypoints |
| 30 | + * array. |
| 31 | + * @return A boolean indicating whether the torso is visible. |
| 32 | + */ |
| 33 | +export function torsoVisible( |
| 34 | + keypoints: Keypoint[], |
| 35 | + keypointIndexByName: {[index: string]: number}): boolean { |
| 36 | + return ( |
| 37 | + (keypoints[keypointIndexByName['left_hip']].score > |
| 38 | + MIN_CROP_KEYPOINT_SCORE || |
| 39 | + keypoints[keypointIndexByName['right_hip']].score > |
| 40 | + MIN_CROP_KEYPOINT_SCORE) && |
| 41 | + (keypoints[keypointIndexByName['left_shoulder']].score > |
| 42 | + MIN_CROP_KEYPOINT_SCORE || |
| 43 | + keypoints[keypointIndexByName['right_shoulder']].score > |
| 44 | + MIN_CROP_KEYPOINT_SCORE)); |
| 45 | +} |
| 46 | + |
| 47 | +/** |
| 48 | + * Calculates the maximum distance from each keypoint to the center location. |
| 49 | + * The function returns the maximum distances from the two sets of keypoints: |
| 50 | + * full 17 keypoints and 4 torso keypoints. The returned information will be |
| 51 | + * used to determine the crop size. See determineCropRegion for more detail. |
| 52 | + * |
| 53 | + * @param keypoints An array of `Keypoint`s associated with a person. |
| 54 | + * @param keypointIndexByName A map from keypoint name to index in the keypoints |
| 55 | + * array. |
| 56 | + * @param targetKeypoints Maps from joint names to coordinates. |
| 57 | + * @param centerY The Y coordinate of the center of the person. |
| 58 | + * @param centerX The X coordinate of the center of the person. |
| 59 | + * @return An array containing information about the torso and body range in the |
| 60 | + * image: [maxTorsoYrange, maxTorsoXrange, maxBodyYrange, maxBodyXrange]. |
| 61 | + */ |
| 62 | +function determineTorsoAndBodyRange( |
| 63 | + keypoints: Keypoint[], keypointIndexByName: {[index: string]: number}, |
| 64 | + targetKeypoints: {[index: string]: number[]}, centerY: number, |
| 65 | + centerX: number): number[] { |
| 66 | + const torsoJoints = |
| 67 | + ['left_shoulder', 'right_shoulder', 'left_hip', 'right_hip']; |
| 68 | + let maxTorsoYrange = 0.0; |
| 69 | + let maxTorsoXrange = 0.0; |
| 70 | + for (let i = 0; i < torsoJoints.length; i++) { |
| 71 | + const distY = Math.abs(centerY - targetKeypoints[torsoJoints[i]][0]); |
| 72 | + const distX = Math.abs(centerX - targetKeypoints[torsoJoints[i]][1]); |
| 73 | + if (distY > maxTorsoYrange) { |
| 74 | + maxTorsoYrange = distY; |
| 75 | + } |
| 76 | + if (distX > maxTorsoXrange) { |
| 77 | + maxTorsoXrange = distX; |
| 78 | + } |
| 79 | + } |
| 80 | + let maxBodyYrange = 0.0; |
| 81 | + let maxBodyXrange = 0.0; |
| 82 | + for (const key of Object.keys(targetKeypoints)) { |
| 83 | + if (keypoints[keypointIndexByName[key]].score < MIN_CROP_KEYPOINT_SCORE) { |
| 84 | + continue; |
| 85 | + } |
| 86 | + const distY = Math.abs(centerY - targetKeypoints[key][0]); |
| 87 | + const distX = Math.abs(centerX - targetKeypoints[key][1]); |
| 88 | + if (distY > maxBodyYrange) { |
| 89 | + maxBodyYrange = distY; |
| 90 | + } |
| 91 | + if (distX > maxBodyXrange) { |
| 92 | + maxBodyXrange = distX; |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + return [maxTorsoYrange, maxTorsoXrange, maxBodyYrange, maxBodyXrange]; |
| 97 | +} |
| 98 | + |
| 99 | +/** |
| 100 | + * Determines the region to crop the image for the model to run inference on. |
| 101 | + * The algorithm uses the detected joints from the previous frame to estimate |
| 102 | + * the square region that encloses the full body of the target person and |
| 103 | + * centers at the midpoint of two hip joints. The crop size is determined by |
| 104 | + * the distances between each joint and the center point. |
| 105 | + * When the model is not confident with the four torso joint predictions, the |
| 106 | + * function returns a default crop which is the full image padded to square. |
| 107 | + * |
| 108 | + * @param currentCropRegion The crop region that was used for the current frame. |
| 109 | + * Can be null for the very first frame that is handled by the detector. |
| 110 | + * @param keypoints An array of `Keypoint`s associated with a person. |
| 111 | + * @param keypointIndexByName A map from keypoint name to index in the keypoints |
| 112 | + * array. |
| 113 | + * @param imageSize The size of the image that is being processed. |
| 114 | + * @return A `BoundingBox` that contains the new crop region. |
| 115 | + */ |
| 116 | +export function determineNextCropRegion( |
| 117 | + currentCropRegion: BoundingBox, keypoints: Keypoint[], |
| 118 | + keypointIndexByName: {[index: string]: number}, |
| 119 | + imageSize: ImageSize): BoundingBox { |
| 120 | + const targetKeypoints: {[index: string]: number[]} = {}; |
| 121 | + |
| 122 | + for (const key of COCO_KEYPOINTS) { |
| 123 | + targetKeypoints[key] = [ |
| 124 | + keypoints[keypointIndexByName[key]].y * imageSize.height, |
| 125 | + keypoints[keypointIndexByName[key]].x * imageSize.width |
| 126 | + ]; |
| 127 | + } |
| 128 | + |
| 129 | + if (torsoVisible(keypoints, keypointIndexByName)) { |
| 130 | + const centerY = |
| 131 | + (targetKeypoints['left_hip'][0] + targetKeypoints['right_hip'][0]) / 2; |
| 132 | + const centerX = |
| 133 | + (targetKeypoints['left_hip'][1] + targetKeypoints['right_hip'][1]) / 2; |
| 134 | + |
| 135 | + const [maxTorsoYrange, maxTorsoXrange, maxBodyYrange, maxBodyXrange] = |
| 136 | + determineTorsoAndBodyRange( |
| 137 | + keypoints, keypointIndexByName, targetKeypoints, centerY, centerX); |
| 138 | + |
| 139 | + let cropLengthHalf = Math.max( |
| 140 | + maxTorsoXrange * 1.9, maxTorsoYrange * 1.9, maxBodyYrange * 1.2, |
| 141 | + maxBodyXrange * 1.2); |
| 142 | + |
| 143 | + cropLengthHalf = Math.min( |
| 144 | + cropLengthHalf, |
| 145 | + Math.max( |
| 146 | + centerX, imageSize.width - centerX, centerY, |
| 147 | + imageSize.height - centerY)); |
| 148 | + |
| 149 | + const cropCorner = [centerY - cropLengthHalf, centerX - cropLengthHalf]; |
| 150 | + |
| 151 | + if (cropLengthHalf > Math.max(imageSize.width, imageSize.height) / 2) { |
| 152 | + return initCropRegion(currentCropRegion == null, imageSize); |
| 153 | + } else { |
| 154 | + const cropLength = cropLengthHalf * 2; |
| 155 | + return { |
| 156 | + yMin: cropCorner[0] / imageSize.height, |
| 157 | + xMin: cropCorner[1] / imageSize.width, |
| 158 | + yMax: (cropCorner[0] + cropLength) / imageSize.height, |
| 159 | + xMax: (cropCorner[1] + cropLength) / imageSize.width, |
| 160 | + height: (cropCorner[0] + cropLength) / imageSize.height - |
| 161 | + cropCorner[0] / imageSize.height, |
| 162 | + width: (cropCorner[1] + cropLength) / imageSize.width - |
| 163 | + cropCorner[1] / imageSize.width |
| 164 | + }; |
| 165 | + } |
| 166 | + } else { |
| 167 | + return initCropRegion(currentCropRegion == null, imageSize); |
| 168 | + } |
| 169 | +} |
| 170 | + |
| 171 | +/** |
| 172 | + * Provides initial crop region. |
| 173 | + * |
| 174 | + * The function provides the initial crop region when the algorithm cannot |
| 175 | + * reliably determine the crop region from the previous frame. There are two |
| 176 | + * scenarios: |
| 177 | + * 1) The very first frame: the function returns the best guess by cropping |
| 178 | + * a square in the middle of the image. |
| 179 | + * 2) Not enough reliable keypoints detected from the previous frame: the |
| 180 | + * function pads the full image from both sides to make it a square |
| 181 | + * image. |
| 182 | + * |
| 183 | + * @param firstFrame A boolean indicating whether we are initializing a crop |
| 184 | + * region for the very first frame. |
| 185 | + * @param imageSize The size of the image that is being processed. |
| 186 | + * @return A `BoundingBox` that contains the initial crop region. |
| 187 | + */ |
| 188 | +export function initCropRegion( |
| 189 | + firstFrame: boolean, imageSize: ImageSize): BoundingBox { |
| 190 | + let boxHeight: number, boxWidth: number, yMin: number, xMin: number; |
| 191 | + if (firstFrame) { |
| 192 | + // If it is the first frame, perform a best guess by making the square |
| 193 | + // crop at the image center to better utilize the image pixels and |
| 194 | + // create higher chance to enter the cropping loop. |
| 195 | + if (imageSize.width > imageSize.height) { |
| 196 | + boxHeight = 1.0; |
| 197 | + boxWidth = imageSize.height / imageSize.width; |
| 198 | + yMin = 0.0; |
| 199 | + xMin = (imageSize.width / 2 - imageSize.height / 2) / imageSize.width; |
| 200 | + } else { |
| 201 | + boxHeight = imageSize.width / imageSize.height; |
| 202 | + boxWidth = 1.0; |
| 203 | + yMin = (imageSize.height / 2 - imageSize.width / 2) / imageSize.height; |
| 204 | + xMin = 0.0; |
| 205 | + } |
| 206 | + } else { |
| 207 | + // No cropRegion was available from a previous estimatePoses() call, so |
| 208 | + // run the model on the full image with padding on both sides. |
| 209 | + if (imageSize.width > imageSize.height) { |
| 210 | + boxHeight = imageSize.width / imageSize.height; |
| 211 | + boxWidth = 1.0; |
| 212 | + yMin = (imageSize.height / 2 - imageSize.width / 2) / imageSize.height; |
| 213 | + xMin = 0.0; |
| 214 | + } else { |
| 215 | + boxHeight = 1.0; |
| 216 | + boxWidth = imageSize.height / imageSize.width; |
| 217 | + yMin = 0.0; |
| 218 | + xMin = (imageSize.width / 2 - imageSize.height / 2) / imageSize.width; |
| 219 | + } |
| 220 | + } |
| 221 | + return { |
| 222 | + yMin, |
| 223 | + xMin, |
| 224 | + yMax: yMin + boxHeight, |
| 225 | + xMax: xMin + boxWidth, |
| 226 | + height: boxHeight, |
| 227 | + width: boxWidth |
| 228 | + }; |
| 229 | +} |
0 commit comments