48
48
int mtl_device_ref_count;
49
49
id <MTLLibrary > mtl_library;
50
50
51
+ NSLock * mtl_lock;
52
+
51
53
bool has_simdgroup_reduction;
52
54
bool has_simdgroup_mm;
53
55
bool has_residency_sets;
54
56
bool has_bfloat;
55
57
bool use_bfloat;
56
58
59
+ size_t max_size;
60
+
57
61
char name[128 ];
58
62
} g_ggml_ctx_dev_main = {
59
63
/* .mtl_device =*/ nil ,
60
64
/* .mtl_device_ref_count =*/ 0 ,
61
65
/* .mtl_library =*/ nil ,
66
+ /* .mtl_lock =*/ nil ,
62
67
/* .has_simdgroup_reduction =*/ false ,
63
68
/* .has_simdgroup_mm =*/ false ,
64
69
/* .has_residency_sets =*/ false ,
65
70
/* .has_bfloat =*/ false ,
66
71
/* .use_bfloat =*/ false ,
72
+ /* .max_size =*/ 0 ,
67
73
/* .name =*/ " " ,
68
74
};
69
75
70
76
// acquire
71
77
static id <MTLDevice > ggml_backend_metal_device_acq (struct ggml_backend_metal_device_context * ctx) {
72
78
assert (ctx != NULL );
73
79
80
+ if (ctx->mtl_lock == nil ) {
81
+ ctx->mtl_lock = [[NSLock alloc ] init ];
82
+ }
83
+
74
84
if (ctx->mtl_device == nil ) {
75
85
ctx->mtl_device = MTLCreateSystemDefaultDevice ();
76
86
}
94
104
ctx->use_bfloat = false ;
95
105
#endif
96
106
107
+ ctx->max_size = ctx->mtl_device .maxBufferLength ;
108
+
97
109
strncpy (ctx->name , [[ctx->mtl_device name ] UTF8String ], sizeof (ctx->name ) - 1 );
98
110
}
99
111
@@ -110,6 +122,11 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
110
122
ctx->mtl_device_ref_count --;
111
123
112
124
if (ctx->mtl_device_ref_count == 0 ) {
125
+ if (ctx->mtl_lock ) {
126
+ [ctx->mtl_lock release ];
127
+ ctx->mtl_lock = nil ;
128
+ }
129
+
113
130
if (ctx->mtl_library ) {
114
131
[ctx->mtl_library release ];
115
132
ctx->mtl_library = nil ;
@@ -977,7 +994,7 @@ @implementation GGMLMetalClass
977
994
struct ggml_backend_metal_context * ctx = calloc (1 , sizeof (struct ggml_backend_metal_context));
978
995
struct ggml_backend_metal_device_context * ctx_dev = dev->context ;
979
996
980
- id <MTLDevice > device = ggml_backend_metal_device_acq ( ctx_dev) ;
997
+ id <MTLDevice > device = ctx_dev-> mtl_device ;
981
998
982
999
GGML_LOG_INFO (" %s : picking default device: %s \n " , __func__, [[device name ] UTF8String ]);
983
1000
@@ -991,9 +1008,16 @@ @implementation GGMLMetalClass
991
1008
ctx->d_queue = dispatch_queue_create (" ggml-metal" , DISPATCH_QUEUE_CONCURRENT);
992
1009
993
1010
// load library
994
- if (ctx_dev->mtl_library == nil ) {
995
- ctx_dev->mtl_library = ggml_metal_load_library (device, ctx_dev->use_bfloat );
1011
+ {
1012
+ [ctx_dev->mtl_lock lock ];
1013
+
1014
+ if (ctx_dev->mtl_library == nil ) {
1015
+ ctx_dev->mtl_library = ggml_metal_load_library (device, ctx_dev->use_bfloat );
1016
+ }
1017
+
1018
+ [ctx_dev->mtl_lock unlock ];
996
1019
}
1020
+
997
1021
id <MTLLibrary > metal_library = ctx_dev->mtl_library ;
998
1022
if (metal_library == nil ) {
999
1023
GGML_LOG_ERROR (" %s : error: metal library is nil\n " , __func__);
@@ -5284,7 +5308,6 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
5284
5308
}
5285
5309
5286
5310
ggml_backend_metal_buffer_rset_free (ctx);
5287
- ggml_backend_metal_device_rel (buffer->buft ->device ->context );
5288
5311
5289
5312
if (ctx->owned ) {
5290
5313
#if TARGET_OS_OSX
@@ -5393,7 +5416,10 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
5393
5416
}
5394
5417
5395
5418
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device ->context ;
5396
- id <MTLDevice > device = ggml_backend_metal_device_acq (ctx_dev);
5419
+
5420
+ GGML_ASSERT (ctx_dev->mtl_device != nil );
5421
+
5422
+ id <MTLDevice > device = ctx_dev->mtl_device ;
5397
5423
5398
5424
ctx->all_data = ggml_metal_host_malloc (size_aligned);
5399
5425
ctx->all_size = size_aligned;
@@ -5416,14 +5442,12 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
5416
5442
if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers [0 ].metal == nil )) {
5417
5443
GGML_LOG_ERROR (" %s : error: failed to allocate buffer, size = %8.2f MiB\n " , __func__, size_aligned / 1024.0 / 1024.0 );
5418
5444
free (ctx);
5419
- ggml_backend_metal_device_rel (ctx_dev);
5420
5445
return NULL ;
5421
5446
}
5422
5447
5423
5448
if (!ggml_backend_metal_buffer_rset_init (ctx, ctx_dev, device)) {
5424
5449
GGML_LOG_ERROR (" %s : error: failed to initialize residency set\n " , __func__);
5425
5450
free (ctx);
5426
- ggml_backend_metal_device_rel (ctx_dev);
5427
5451
return NULL ;
5428
5452
}
5429
5453
@@ -5434,17 +5458,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
5434
5458
5435
5459
static size_t ggml_backend_metal_buffer_type_get_alignment (ggml_backend_buffer_type_t buft) {
5436
5460
return 32 ;
5461
+
5437
5462
GGML_UNUSED (buft);
5438
5463
}
5439
5464
5440
5465
static size_t ggml_backend_metal_buffer_type_get_max_size (ggml_backend_buffer_type_t buft) {
5441
- id <MTLDevice > device = ggml_backend_metal_device_acq (buft->device ->context );
5442
- const size_t max_size = device.maxBufferLength ;
5443
- ggml_backend_metal_device_rel (buft->device ->context );
5466
+ const size_t max_size = ((struct ggml_backend_metal_device_context *)buft->device ->context )->max_size ;
5444
5467
5445
5468
return max_size;
5446
-
5447
- GGML_UNUSED (buft);
5448
5469
}
5449
5470
5450
5471
static bool ggml_backend_metal_buffer_type_is_host (ggml_backend_buffer_type_t buft) {
@@ -5517,7 +5538,10 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
5517
5538
}
5518
5539
5519
5540
struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main;
5520
- id <MTLDevice > device = ggml_backend_metal_device_acq (ctx_dev);
5541
+
5542
+ GGML_ASSERT (ctx_dev->mtl_device != nil );
5543
+
5544
+ id <MTLDevice > device = ctx_dev->mtl_device ;
5521
5545
5522
5546
// the buffer fits into the max buffer size allowed by the device
5523
5547
if (size_aligned <= device.maxBufferLength ) {
@@ -5573,7 +5597,6 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
5573
5597
if (!ggml_backend_metal_buffer_rset_init (ctx, ctx_dev, device)) {
5574
5598
GGML_LOG_ERROR (" %s : error: failed to initialize residency set\n " , __func__);
5575
5599
free (ctx);
5576
- ggml_backend_metal_device_rel (ctx_dev);
5577
5600
return NULL ;
5578
5601
}
5579
5602
@@ -5589,10 +5612,8 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
5589
5612
}
5590
5613
5591
5614
static void ggml_backend_metal_free (ggml_backend_t backend) {
5592
- struct ggml_backend_metal_context * ctx = backend->context ;
5593
- struct ggml_backend_metal_device_context * ctx_dev = backend->device ->context ;
5615
+ struct ggml_backend_metal_context * ctx = backend->context ;
5594
5616
5595
- ggml_backend_metal_device_rel (ctx_dev);
5596
5617
ggml_metal_free (ctx);
5597
5618
5598
5619
free (backend);
@@ -5732,6 +5753,8 @@ bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family) {
5732
5753
5733
5754
struct ggml_backend_metal_device_context * ctx_dev = backend->device ->context ;
5734
5755
5756
+ GGML_ASSERT (ctx_dev->mtl_device != nil );
5757
+
5735
5758
return [ctx_dev->mtl_device supportsFamily: (MTLGPUFamilyApple1 + family - 1 )];
5736
5759
}
5737
5760
@@ -5751,23 +5774,18 @@ void ggml_backend_metal_capture_next_compute(ggml_backend_t backend) {
5751
5774
}
5752
5775
5753
5776
static const char * ggml_backend_metal_device_get_description (ggml_backend_dev_t dev) {
5754
- // acq/rel just to populate ctx->name in case it hasn't been done yet
5755
5777
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context ;
5756
- ggml_backend_metal_device_acq (ctx_dev);
5757
- ggml_backend_metal_device_rel (ctx_dev);
5758
5778
5759
5779
return ctx_dev->name ;
5760
5780
}
5761
5781
5762
5782
static void ggml_backend_metal_device_get_memory (ggml_backend_dev_t dev, size_t * free, size_t * total) {
5763
5783
if (@available (macOS 10.12 , iOS 16.0 , *)) {
5764
5784
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context ;
5765
- id <MTLDevice > device = ggml_backend_metal_device_acq ( ctx_dev) ;
5785
+ id <MTLDevice > device = ctx_dev-> mtl_device ;
5766
5786
5767
5787
*total = device.recommendedMaxWorkingSetSize ;
5768
5788
*free = *total - device.currentAllocatedSize ;
5769
-
5770
- ggml_backend_metal_device_rel (ctx_dev);
5771
5789
} else {
5772
5790
*free = 1 ;
5773
5791
*total = 1 ;
@@ -5845,7 +5863,10 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
5845
5863
}
5846
5864
5847
5865
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context ;
5848
- id <MTLDevice > device = ggml_backend_metal_device_acq (ctx_dev);
5866
+
5867
+ GGML_ASSERT (ctx_dev->mtl_device != nil );
5868
+
5869
+ id <MTLDevice > device = ctx_dev->mtl_device ;
5849
5870
5850
5871
// the buffer fits into the max buffer size allowed by the device
5851
5872
if (size_aligned <= device.maxBufferLength ) {
@@ -5901,7 +5922,6 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
5901
5922
if (!ggml_backend_metal_buffer_rset_init (ctx, ctx_dev, device)) {
5902
5923
GGML_LOG_ERROR (" %s : error: failed to initialize residency set\n " , __func__);
5903
5924
free (ctx);
5904
- ggml_backend_metal_device_rel (ctx_dev);
5905
5925
return NULL ;
5906
5926
}
5907
5927
@@ -5915,8 +5935,9 @@ static bool ggml_backend_metal_device_supports_op(ggml_backend_dev_t dev, const
5915
5935
}
5916
5936
5917
5937
static bool ggml_backend_metal_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
5918
- return buft->iface .get_name == ggml_backend_metal_buffer_type_get_name ||
5919
- buft->iface .get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
5938
+ return
5939
+ buft->iface .get_name == ggml_backend_metal_buffer_type_get_name ||
5940
+ buft->iface .get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
5920
5941
5921
5942
GGML_UNUSED (dev);
5922
5943
}
@@ -6001,8 +6022,19 @@ static ggml_backend_dev_t ggml_backend_metal_reg_device_get(ggml_backend_reg_t r
6001
6022
/* .get_proc_address = */ ggml_backend_metal_get_proc_address,
6002
6023
};
6003
6024
6025
+ // called upon program exit
6026
+ static void ggml_metal_cleanup (void ) {
6027
+ ggml_backend_metal_device_rel (&g_ggml_ctx_dev_main);
6028
+ }
6029
+
6030
+ // TODO: make thread-safe
6004
6031
ggml_backend_reg_t ggml_backend_metal_reg (void ) {
6005
- // TODO: make this thread-safe somehow?
6032
+ ggml_backend_metal_device_acq (&g_ggml_ctx_dev_main);
6033
+
6034
+ // register cleanup callback
6035
+ // TODO: not ideal, but not sure if there is a better way to do this in Objective-C
6036
+ atexit (ggml_metal_cleanup);
6037
+
6006
6038
{
6007
6039
g_ggml_backend_metal_reg = (struct ggml_backend_reg) {
6008
6040
/* .api_version = */ GGML_BACKEND_API_VERSION,
0 commit comments