Skip to content

Commit 431b2c2

Browse files
Aclyggerganov
authored andcommitted
ggml-cpu : "align corners" for bilinear upscale/downscale (ggml/1285)
* add "align corners" mode for bilinear upscale, and allow downscaling * add ggml_interpolate, deprecate ggml_upscale_ext, pass in align-corners as bit-flag * test-backend-ops: replace ggml_upscale_ext with ggml_interpolate, add test cases for downscale and align-corners
1 parent 497be7c commit 431b2c2

File tree

4 files changed

+65
-31
lines changed

4 files changed

+65
-31
lines changed

ggml/include/ggml.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1867,6 +1867,12 @@ extern "C" {
18671867
enum ggml_scale_mode {
18681868
GGML_SCALE_MODE_NEAREST = 0,
18691869
GGML_SCALE_MODE_BILINEAR = 1,
1870+
1871+
GGML_SCALE_MODE_COUNT
1872+
};
1873+
1874+
enum ggml_scale_flag {
1875+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
18701876
};
18711877

18721878
// interpolate
@@ -1879,14 +1885,26 @@ extern "C" {
18791885

18801886
// interpolate
18811887
// interpolate scale to specified dimensions
1882-
GGML_API struct ggml_tensor * ggml_upscale_ext(
1888+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
18831889
struct ggml_context * ctx,
18841890
struct ggml_tensor * a,
18851891
int ne0,
18861892
int ne1,
18871893
int ne2,
18881894
int ne3,
1889-
enum ggml_scale_mode mode);
1895+
enum ggml_scale_mode mode),
1896+
"use ggml_interpolate instead");
1897+
1898+
// Up- or downsamples the input to the specified size.
1899+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
1900+
GGML_API struct ggml_tensor * ggml_interpolate(
1901+
struct ggml_context * ctx,
1902+
struct ggml_tensor * a,
1903+
int64_t ne0,
1904+
int64_t ne1,
1905+
int64_t ne2,
1906+
int64_t ne3,
1907+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
18901908

18911909
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
18921910
GGML_API struct ggml_tensor * ggml_pad(

ggml/src/ggml-cpu/ops.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7276,12 +7276,13 @@ static void ggml_compute_forward_upscale_f32(
72767276

72777277
GGML_TENSOR_UNARY_OP_LOCALS
72787278

7279-
const float sf0 = (float)ne0/src0->ne[0];
7280-
const float sf1 = (float)ne1/src0->ne[1];
7281-
const float sf2 = (float)ne2/src0->ne[2];
7282-
const float sf3 = (float)ne3/src0->ne[3];
7279+
float sf0 = (float)ne0/src0->ne[0];
7280+
float sf1 = (float)ne1/src0->ne[1];
7281+
float sf2 = (float)ne2/src0->ne[2];
7282+
float sf3 = (float)ne3/src0->ne[3];
72837283

7284-
const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
7284+
const int32_t mode_flags = ggml_get_op_params_i32(dst, 0);
7285+
const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF);
72857286

72867287
if (mode == GGML_SCALE_MODE_NEAREST) {
72877288
for (int64_t i3 = 0; i3 < ne3; i3++) {
@@ -7302,8 +7303,12 @@ static void ggml_compute_forward_upscale_f32(
73027303
}
73037304
}
73047305
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
7305-
// setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
7306-
const float pixel_offset = 0.5f;
7306+
float pixel_offset = 0.5f;
7307+
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
7308+
pixel_offset = 0.0f;
7309+
sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1);
7310+
sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1);
7311+
}
73077312

73087313
for (int64_t i3 = 0; i3 < ne3; i3++) {
73097314
const int64_t i03 = i3 / sf3;

ggml/src/ggml.c

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4447,24 +4447,21 @@ struct ggml_tensor * ggml_pool_2d_back(
44474447
return result;
44484448
}
44494449

4450-
// ggml_upscale
4450+
// ggml_upscale / ggml_interpolate
44514451

4452-
static struct ggml_tensor * ggml_upscale_impl(
4452+
static struct ggml_tensor * ggml_interpolate_impl(
44534453
struct ggml_context * ctx,
44544454
struct ggml_tensor * a,
4455-
int ne0,
4456-
int ne1,
4457-
int ne2,
4458-
int ne3,
4459-
enum ggml_scale_mode mode) {
4460-
GGML_ASSERT(a->ne[0] <= ne0);
4461-
GGML_ASSERT(a->ne[1] <= ne1);
4462-
GGML_ASSERT(a->ne[2] <= ne2);
4463-
GGML_ASSERT(a->ne[3] <= ne3);
4464-
4455+
int64_t ne0,
4456+
int64_t ne1,
4457+
int64_t ne2,
4458+
int64_t ne3,
4459+
uint32_t mode) {
4460+
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4461+
44654462
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
44664463

4467-
ggml_set_op_params_i32(result, 0, mode);
4464+
ggml_set_op_params_i32(result, 0, (int32_t)mode);
44684465

44694466
result->op = GGML_OP_UPSCALE;
44704467
result->src[0] = a;
@@ -4477,7 +4474,8 @@ struct ggml_tensor * ggml_upscale(
44774474
struct ggml_tensor * a,
44784475
int scale_factor,
44794476
enum ggml_scale_mode mode) {
4480-
return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4477+
GGML_ASSERT(scale_factor > 1);
4478+
return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
44814479
}
44824480

44834481
struct ggml_tensor * ggml_upscale_ext(
@@ -4488,7 +4486,18 @@ struct ggml_tensor * ggml_upscale_ext(
44884486
int ne2,
44894487
int ne3,
44904488
enum ggml_scale_mode mode) {
4491-
return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4489+
return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4490+
}
4491+
4492+
struct ggml_tensor * ggml_interpolate(
4493+
struct ggml_context * ctx,
4494+
struct ggml_tensor * a,
4495+
int64_t ne0,
4496+
int64_t ne1,
4497+
int64_t ne2,
4498+
int64_t ne3,
4499+
uint32_t mode) {
4500+
return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
44924501
}
44934502

44944503
// ggml_pad

tests/test-backend-ops.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3296,28 +3296,28 @@ struct test_upscale : public test_case {
32963296
}
32973297
};
32983298

3299-
// GGML_OP_UPSCALE (ext)
3300-
struct test_upscale_ext : public test_case {
3299+
// GGML_OP_UPSCALE (via ggml_interpolate)
3300+
struct test_interpolate : public test_case {
33013301
const ggml_type type;
33023302
const std::array<int64_t, 4> ne;
33033303
const std::array<int64_t, 4> ne_tgt;
3304-
const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
3304+
const uint32_t mode = GGML_SCALE_MODE_NEAREST;
33053305

33063306
std::string vars() override {
33073307
return VARS_TO_STR4(type, ne, ne_tgt, mode);
33083308
}
33093309

3310-
test_upscale_ext(ggml_type type = GGML_TYPE_F32,
3310+
test_interpolate(ggml_type type = GGML_TYPE_F32,
33113311
std::array<int64_t, 4> ne = {2, 5, 7, 11},
33123312
std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
3313-
ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
3313+
uint32_t mode = GGML_SCALE_MODE_NEAREST)
33143314
: type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
33153315

33163316
ggml_tensor * build_graph(ggml_context * ctx) override {
33173317
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
33183318
ggml_set_name(a, "a");
33193319

3320-
ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
3320+
ggml_tensor * out = ggml_interpolate(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
33213321
ggml_set_name(out, "out");
33223322

33233323
return out;
@@ -4799,8 +4799,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
47994799
for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) {
48004800
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
48014801
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
4802-
test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode));
4802+
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode));
4803+
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode));
48034804
}
4805+
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS));
48044806

48054807
test_cases.emplace_back(new test_sum());
48064808
test_cases.emplace_back(new test_sum_rows());

0 commit comments

Comments
 (0)