Skip to content

Commit fa39b73

Browse files
DwyaneShiHaiyang Shi
andauthored
[Refactor] New memory layout for AIBrix KVCache (#1174)
* [Refactor] New memory layout for AIBrix KVCache - Legacy layout embedded tokens directly in the key, which could result in very long keys for cache blocks with long prefixes - New layout uses hash as the key and stores tokens as part of the value - L2Cache get operation now uses hash key and verifies token match after retrieving value Signed-off-by: Haiyang Shi <[email protected]> * [Integration] Enable new memory layout in vLLM v0.8.5 Signed-off-by: Haiyang Shi <[email protected]> * [Chore] Add hpkv dependency Signed-off-by: Haiyang Shi <[email protected]> * [Fix] Fix typing errors with python3.11 Signed-off-by: Haiyang Shi <[email protected]> * [Fix] Fix BaseKVCacheManager Signed-off-by: Haiyang Shi <[email protected]> * [Chore] Optimize L2Cache tokens comparison Signed-off-by: Haiyang Shi <[email protected]> * [Feature] KVCache layout: compact laytout Signed-off-by: Haiyang Shi <[email protected]> --------- Signed-off-by: Haiyang Shi <[email protected]> Co-authored-by: Haiyang Shi <[email protected]>
1 parent 4842c7d commit fa39b73

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2021
-920
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright 2024 The Aibrix Team.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from abc import ABC, abstractmethod
16+
from typing import Tuple
17+
18+
from .common import CachedPyObjectBase
19+
20+
21+
class KVCacheHashable(ABC):
22+
"""
23+
A hashable object that can be used to index a KV cache block.
24+
"""
25+
26+
@abstractmethod
27+
def __hash__(self) -> int:
28+
raise NotImplementedError
29+
30+
@abstractmethod
31+
def __eq__(self, other) -> bool:
32+
raise NotImplementedError
33+
34+
@abstractmethod
35+
def __len__(self) -> int:
36+
raise NotImplementedError
37+
38+
39+
class BaseKVCacheHashable(KVCacheHashable, CachedPyObjectBase):
40+
"""
41+
Base class for a hashable object that uses all tokens to compute the hash.
42+
"""
43+
44+
def __init__(self, prefix: Tuple[int, ...] | None, tokens: Tuple[int, ...]):
45+
self.prefix = prefix or tuple()
46+
self.tokens = tokens
47+
48+
def __hash__(self) -> int:
49+
return hash((self.prefix, self.tokens))
50+
51+
def __eq__(self, other) -> bool:
52+
if not isinstance(other, BaseKVCacheHashable):
53+
return False
54+
return (self.prefix, self.tokens) == (other.prefix, other.tokens)
55+
56+
57+
class TokenCacheKey(BaseKVCacheHashable):
58+
"""
59+
A cache key that compounds prefix and tokens.
60+
Args:
61+
prefix (np.ndarray | None): The prefix tokens of the kv tensors.
62+
tokens (np.ndarray): The tokens of the kv tensors.
63+
"""
64+
65+
def __init__(self, prefix: Tuple[int, ...] | None, tokens: Tuple[int, ...]):
66+
super().__init__(prefix, tokens)
67+
68+
def __len__(self) -> int:
69+
return len(self.prefix) + len(self.tokens)

0 commit comments

Comments
 (0)