Skip to content

Fix devdax_open_ipc_handle() and devdax_close_ipc_handle() #838

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/base_alloc/base_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ umf_ba_pool_t *umf_ba_create(size_t size) {
char *data_ptr = (char *)&pool->data;
size_t size_left = pool_size - offsetof(umf_ba_pool_t, data);

utils_align_ptr_size((void **)&data_ptr, &size_left, MEMORY_ALIGNMENT);
utils_align_ptr_up_size_down((void **)&data_ptr, &size_left,
MEMORY_ALIGNMENT);

// init free_lock
utils_mutex_t *mutex = utils_mutex_init(&pool->metadata.free_lock);
Expand Down Expand Up @@ -209,7 +210,8 @@ void *umf_ba_alloc(umf_ba_pool_t *pool) {
size_t size_left =
pool->metadata.pool_size - offsetof(umf_ba_next_pool_t, data);

utils_align_ptr_size((void **)&data_ptr, &size_left, MEMORY_ALIGNMENT);
utils_align_ptr_up_size_down((void **)&data_ptr, &size_left,
MEMORY_ALIGNMENT);
ba_divide_memory_into_chunks(pool, data_ptr, size_left);
}

Expand Down
4 changes: 2 additions & 2 deletions src/base_alloc/base_alloc_linear.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ umf_ba_linear_pool_t *umf_ba_linear_create(size_t pool_size) {
void *data_ptr = &pool->data;
size_t size_left = pool_size - offsetof(umf_ba_linear_pool_t, data);

utils_align_ptr_size(&data_ptr, &size_left, MEMORY_ALIGNMENT);
utils_align_ptr_up_size_down(&data_ptr, &size_left, MEMORY_ALIGNMENT);

pool->metadata.pool_size = pool_size;
pool->metadata.data_ptr = data_ptr;
Expand Down Expand Up @@ -149,7 +149,7 @@ void *umf_ba_linear_alloc(umf_ba_linear_pool_t *pool, size_t size) {
void *data_ptr = &new_pool->data;
size_t size_left =
new_pool->pool_size - offsetof(umf_ba_next_linear_pool_t, data);
utils_align_ptr_size(&data_ptr, &size_left, MEMORY_ALIGNMENT);
utils_align_ptr_up_size_down(&data_ptr, &size_left, MEMORY_ALIGNMENT);

pool->metadata.data_ptr = data_ptr;
pool->metadata.size_left = size_left;
Expand Down
118 changes: 56 additions & 62 deletions src/provider/provider_devdax_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ umf_memory_provider_ops_t *umfDevDaxMemoryProviderOps(void) {
#include "utils_concurrency.h"
#include "utils_log.h"

#define NODESET_STR_BUF_LEN 1024
#define DEVDAX_PAGE_SIZE_2MB ((size_t)(2 * 1024 * 1024)) // == 2 MB

#define TLS_MSG_BUF_LEN 1024

Expand Down Expand Up @@ -228,15 +228,20 @@ static umf_result_t devdax_alloc(void *provider, size_t size, size_t alignment,
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

// alignment must be a power of two and a multiple of sizeof(void *)
if (alignment &&
((alignment & (alignment - 1)) || (alignment % sizeof(void *)))) {
LOG_ERR("wrong alignment: %zu (not a power of 2 or a multiple of "
"sizeof(void *))",
alignment);
// alignment must be a power of two and a multiple or a divider of the page size
if (alignment && ((alignment & (alignment - 1)) ||
((alignment % DEVDAX_PAGE_SIZE_2MB) &&
(DEVDAX_PAGE_SIZE_2MB % alignment)))) {
LOG_ERR("wrong alignment: %zu (not a power of 2 or a multiple or a "
"divider of the page size (%zu))",
alignment, DEVDAX_PAGE_SIZE_2MB);
Comment on lines +231 to +237
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To ensure I understand it correctly, will 4 KB also pass this check?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, 4kB is a divider of 2MB

return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

if (IS_NOT_ALIGNED(alignment, DEVDAX_PAGE_SIZE_2MB)) {
alignment = ALIGN_UP(alignment, DEVDAX_PAGE_SIZE_2MB);
}

devdax_memory_provider_t *devdax_provider =
(devdax_memory_provider_t *)provider;

Expand Down Expand Up @@ -300,7 +305,7 @@ static umf_result_t devdax_get_recommended_page_size(void *provider,
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

*page_size = utils_get_page_size();
*page_size = DEVDAX_PAGE_SIZE_2MB;

return UMF_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -369,9 +374,11 @@ static umf_result_t devdax_allocation_merge(void *provider, void *lowPtr,
}

typedef struct devdax_ipc_data_t {
char dd_path[PATH_MAX]; // path to the /dev/dax
size_t dd_size; // size of the /dev/dax
size_t offset; // offset of the data
char path[PATH_MAX]; // path to the /dev/dax
unsigned protection; // combination of OS-specific memory protection flags
// offset of the data (from the beginning of the devdax mapping) - see devdax_get_ipc_handle()
size_t offset;
size_t length; // length of the data
} devdax_ipc_data_t;

static umf_result_t devdax_get_ipc_handle_size(void *provider, size_t *size) {
Expand All @@ -386,8 +393,6 @@ static umf_result_t devdax_get_ipc_handle_size(void *provider, size_t *size) {

static umf_result_t devdax_get_ipc_handle(void *provider, const void *ptr,
size_t size, void *providerIpcData) {
(void)size; // unused

if (provider == NULL || ptr == NULL || providerIpcData == NULL) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}
Expand All @@ -396,11 +401,12 @@ static umf_result_t devdax_get_ipc_handle(void *provider, const void *ptr,
(devdax_memory_provider_t *)provider;

devdax_ipc_data_t *devdax_ipc_data = (devdax_ipc_data_t *)providerIpcData;
strncpy(devdax_ipc_data->path, devdax_provider->path, PATH_MAX - 1);
devdax_ipc_data->path[PATH_MAX - 1] = '\0';
devdax_ipc_data->protection = devdax_provider->protection;
devdax_ipc_data->offset =
(size_t)((uintptr_t)ptr - (uintptr_t)devdax_provider->base);
strncpy(devdax_ipc_data->dd_path, devdax_provider->path, PATH_MAX - 1);
devdax_ipc_data->dd_path[PATH_MAX - 1] = '\0';
devdax_ipc_data->dd_size = devdax_provider->size;
devdax_ipc_data->length = size;

return UMF_RESULT_SUCCESS;
}
Expand All @@ -416,16 +422,9 @@ static umf_result_t devdax_put_ipc_handle(void *provider,
devdax_ipc_data_t *devdax_ipc_data = (devdax_ipc_data_t *)providerIpcData;

// verify the path of the /dev/dax
if (strncmp(devdax_ipc_data->dd_path, devdax_provider->path, PATH_MAX)) {
if (strncmp(devdax_ipc_data->path, devdax_provider->path, PATH_MAX)) {
LOG_ERR("devdax path mismatch (local: %s, ipc: %s)",
devdax_provider->path, devdax_ipc_data->dd_path);
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

// verify the size of the /dev/dax
if (devdax_ipc_data->dd_size != devdax_provider->size) {
LOG_ERR("devdax size mismatch (local: %zu, ipc: %zu)",
devdax_provider->size, devdax_ipc_data->dd_size);
devdax_provider->path, devdax_ipc_data->path);
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

Expand All @@ -438,58 +437,54 @@ static umf_result_t devdax_open_ipc_handle(void *provider,
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

devdax_memory_provider_t *devdax_provider =
(devdax_memory_provider_t *)provider;
devdax_ipc_data_t *devdax_ipc_data = (devdax_ipc_data_t *)providerIpcData;

// verify it is the same devdax - first verify the path
if (strncmp(devdax_ipc_data->dd_path, devdax_provider->path, PATH_MAX)) {
LOG_ERR("devdax path mismatch (local: %s, ipc: %s)",
devdax_provider->path, devdax_ipc_data->dd_path);
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

// verify the size of the /dev/dax
if (devdax_ipc_data->dd_size != devdax_provider->size) {
LOG_ERR("devdax size mismatch (local: %zu, ipc: %zu)",
devdax_provider->size, devdax_ipc_data->dd_size);
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

umf_result_t ret = UMF_RESULT_SUCCESS;
int fd = utils_devdax_open(devdax_provider->path);
int fd = utils_devdax_open(devdax_ipc_data->path);
if (fd == -1) {
LOG_PERR("opening a devdax (%s) failed", devdax_provider->path);
LOG_PERR("opening the devdax (%s) failed", devdax_ipc_data->path);
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

unsigned map_sync_flag = 0;
utils_translate_mem_visibility_flag(UMF_MEM_MAP_SYNC, &map_sync_flag);

// It is just a workaround for case when
// devdax_alloc() was called with the size argument
// that is not a multiplier of DEVDAX_PAGE_SIZE_2MB.
size_t offset_aligned = devdax_ipc_data->offset;
size_t length_aligned = devdax_ipc_data->length;
utils_align_ptr_down_size_up((void **)&offset_aligned, &length_aligned,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need this call to the utils_align_ptr_down_size_up function? Offset and length should be aligned during the allocation? I mean that IPC handle represents allocation that is done by this memory provider and it already should be page-aligned. Probably here and in the devdax_open_ipc_handle we need to check that offset and length are page-aligned.

Copy link
Contributor Author

@ldorau ldorau Oct 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is needed. alloc() aligns pointer's address to the alignment given as the alloc()'s parameter, not to the page size, but mmap()requires the page size alignment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I got it. In case of arbitrary alignment such logic makes sense, but does arbitrary alignment in case of any memory provider (not only devdax) makes sense. I can assume that alignment always is a multiply of the page size. Did I miss something?

Copy link
Contributor Author

@ldorau ldorau Oct 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

User-requested alignment (given as alloc(size, alignment)) "must be a power of two and a multiple of sizeof(void *)." as POSIX_MEMALIGN(3) states, so it does not have to be a multiply of the page size.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but the memory provider API is closer to the mmap.
The umfPoolMalloc() is similar to regular malloc, but memory providers are responsible for a coarse-grain allocations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the main issue is that we cannot require the size (in alloc()) to be a multiple of 2MB - we cannot force jemalloc or any other pool manager or a user of a memory provider to call alloc() only with size being a multiple of 2MB.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think with alloc function everything is easy, we can support 4KB allocations because we just update the offset and do actual mapping at the init phase. The question is about ope_ip_handle how should we implement it.

Copy link
Contributor Author

@ldorau ldorau Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vinser52 please review the new version - there is no up-alignment in open/close_IPC_handle any more
(The devdax CI jobs pass: https://github.com/oneapi-src/unified-memory-framework/actions/runs/11595816096)

Copy link
Contributor Author

@ldorau ldorau Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vinser52 Done.

DEVDAX_PAGE_SIZE_2MB);

// mmap /dev/dax with the MAP_SYNC xor MAP_SHARED flag (if MAP_SYNC fails)
char *base = utils_mmap_file(NULL, devdax_provider->size,
devdax_provider->protection, map_sync_flag, fd,
0 /* offset */);
if (base == NULL) {
char *addr =
utils_mmap_file(NULL, length_aligned, devdax_ipc_data->protection,
map_sync_flag, fd, offset_aligned);
if (addr == NULL) {
devdax_store_last_native_error(UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED,
errno);

LOG_PERR("devdax mapping failed (path: %s, size: %zu, protection: %i, "
"fd: %i)",
devdax_provider->path, devdax_provider->size,
devdax_provider->protection, fd);
ret = UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
"fd: %i, offset: %zu)",
devdax_ipc_data->path, length_aligned,
devdax_ipc_data->protection, fd, offset_aligned);

*ptr = NULL;
(void)utils_close_fd(fd);

return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
}

LOG_DEBUG("devdax mapped (path: %s, size: %zu, protection: %i, fd: %i, "
"offset: %zu)",
devdax_provider->path, devdax_provider->size,
devdax_provider->protection, fd, devdax_ipc_data->offset);
"offset: %zu) to address %p",
devdax_ipc_data->path, length_aligned,
devdax_ipc_data->protection, fd, offset_aligned, addr);

(void)utils_close_fd(fd);
*ptr = addr;

*ptr = base + devdax_ipc_data->offset;
(void)utils_close_fd(fd);

return ret;
return UMF_RESULT_SUCCESS;
}

static umf_result_t devdax_close_ipc_handle(void *provider, void *ptr,
Expand All @@ -498,16 +493,15 @@ static umf_result_t devdax_close_ipc_handle(void *provider, void *ptr,
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

devdax_memory_provider_t *devdax_provider =
(devdax_memory_provider_t *)provider;
size = ALIGN_UP(size, DEVDAX_PAGE_SIZE_2MB);

errno = 0;
int ret = utils_munmap(devdax_provider->base, devdax_provider->size);
int ret = utils_munmap(ptr, size);
// ignore error when size == 0
if (ret && (size > 0)) {
devdax_store_last_native_error(UMF_DEVDAX_RESULT_ERROR_FREE_FAILED,
errno);
LOG_PERR("memory unmapping failed");
LOG_PERR("memory unmapping failed (ptr: %p, size: %zu)", ptr, size);

return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
}
Expand Down
34 changes: 28 additions & 6 deletions src/utils/utils_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,42 @@
#include "utils_assert.h"
#include "utils_common.h"

// align a pointer and a size
void utils_align_ptr_size(void **ptr, size_t *size, size_t alignment) {
// align a pointer up and a size down
void utils_align_ptr_up_size_down(void **ptr, size_t *size, size_t alignment) {
uintptr_t p = (uintptr_t)*ptr;
size_t s = *size;

// align pointer to 'alignment' bytes and adjust the size
// align the pointer up to 'alignment' bytes and adjust the size down
size_t rest = p & (alignment - 1);
if (rest) {
p += alignment - rest;
p = ALIGN_UP(p, alignment);
s -= alignment - rest;
}

ASSERT((p & (alignment - 1)) == 0);
ASSERT((s & (alignment - 1)) == 0);
ASSERT(IS_ALIGNED(p, alignment));
ASSERT(IS_ALIGNED(s, alignment));

*ptr = (void *)p;
*size = s;
}

// align a pointer down and a size up (for mmap()/munmap())
void utils_align_ptr_down_size_up(void **ptr, size_t *size, size_t alignment) {
uintptr_t p = (uintptr_t)*ptr;
size_t s = *size;

// align the pointer down to 'alignment' bytes and adjust the size up
size_t rest = p & (alignment - 1);
if (rest) {
p = ALIGN_DOWN(p, alignment);
s += rest;
}

// align the size up to 'alignment' bytes
s = ALIGN_UP(s, alignment);

ASSERT(IS_ALIGNED(p, alignment));
ASSERT(IS_ALIGNED(s, alignment));

*ptr = (void *)p;
*size = s;
Expand Down
7 changes: 5 additions & 2 deletions src/utils/utils_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,11 @@ int utils_is_running_in_proxy_lib(void);

size_t utils_get_page_size(void);

// align a pointer and a size
void utils_align_ptr_size(void **ptr, size_t *size, size_t alignment);
// align a pointer up and a size down
void utils_align_ptr_up_size_down(void **ptr, size_t *size, size_t alignment);

// align a pointer down and a size up (for mmap()/munmap())
void utils_align_ptr_down_size_up(void **ptr, size_t *size, size_t alignment);

// get the current process ID
int utils_getpid(void);
Expand Down
16 changes: 11 additions & 5 deletions test/provider_devdax_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,13 @@ INSTANTIATE_TEST_SUITE_P(devdaxProviderTest, umfProviderTest,

TEST_P(umfProviderTest, create_destroy) {}

TEST_P(umfProviderTest, alloc_page64_align_0) {
test_alloc_free_success(provider.get(), page_plus_64, 0, PURGE_NONE);
TEST_P(umfProviderTest, alloc_page_align_0) {
test_alloc_free_success(provider.get(), page_size, 0, PURGE_NONE);
}

TEST_P(umfProviderTest, alloc_2page_align_page_size) {
test_alloc_free_success(provider.get(), 2 * page_size, page_size,
PURGE_NONE);
}

TEST_P(umfProviderTest, alloc_page64_align_page_div_2) {
Expand All @@ -200,11 +205,11 @@ TEST_P(umfProviderTest, alloc_page64_align_page_div_2) {
}

TEST_P(umfProviderTest, purge_lazy) {
test_alloc_free_success(provider.get(), page_plus_64, 0, PURGE_LAZY);
test_alloc_free_success(provider.get(), page_size, 0, PURGE_LAZY);
}

TEST_P(umfProviderTest, purge_force) {
test_alloc_free_success(provider.get(), page_plus_64, 0, PURGE_FORCE);
test_alloc_free_success(provider.get(), page_size, 0, PURGE_FORCE);
}

// negative tests using test_alloc_failure
Expand All @@ -231,7 +236,8 @@ TEST_P(umfProviderTest, alloc_3_pages_WRONG_ALIGNMENT_3_pages) {
}

TEST_P(umfProviderTest, alloc_WRONG_SIZE) {
test_alloc_failure(provider.get(), -1, 0,
size_t size = (size_t)(-1) & ~(page_size - 1);
test_alloc_failure(provider.get(), size, 0,
UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC,
UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED);
}
Expand Down
Loading