Skip to content

Commit 6db13ff

Browse files
committed
chore: Update docs
1 parent af3f61b commit 6db13ff

File tree

4 files changed

+26
-6
lines changed

4 files changed

+26
-6
lines changed

.github/img/benchmark.png

40.1 KB
Loading

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ pip install flaxkv
7272
```python
7373
from flaxkv import FlaxKV
7474
import numpy as np
75+
import pandas as pd
7576

7677
db = FlaxKV('test_db')
7778
"""
@@ -89,6 +90,7 @@ db['a dict'] = {'a': 1, 'b': [1, 2, 3]}
8990
db['a list'] = [1, 2, 3, {'a': 1}]
9091
db[(1, 2, 3)] = [1, 2, 3]
9192
db['numpy array'] = np.random.randn(100, 100)
93+
db['df'] = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
9294

9395
db.setdefault('key', 'value_2')
9496
assert db['key'] == 'value'
@@ -113,7 +115,7 @@ print(len(db))
113115
### Benchmark
114116
![benchmark](.github/img/benchmark.png)
115117

116-
Test Content: Write and read traversal for N=10,000 numpy array vectors (each vector is 1000-dimensional).
118+
Test Content: Write and read traversal for N numpy array vectors (each vector is 1000-dimensional).
117119

118120
Execute the test:
119121
```bash

README_ZH.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040

4141

4242

43-
`flaxkv` 提供了一个非常类似字典的接口,用于与高性能键值数据库进行交互。更重要的是,它作为持久化数据库提供了接近原生字典(内存)存取的性能
43+
`flaxkv` 提供了一个非常类似字典的接口,用于与高性能键值数据库进行交互。更重要的是,它作为持久化数据库提供了接近原生字典(内存)的写入性能
4444
你可以直接将它当成python字典来使用而不必担心在任何时候操作数据库时会阻塞你的用户进程。
4545

4646
---
@@ -75,6 +75,7 @@ pip install flaxkv
7575
```python
7676
from flaxkv import FlaxKV
7777
import numpy as np
78+
import pandas as pd
7879

7980
db = FlaxKV('test_db')
8081

@@ -93,6 +94,7 @@ db['a dict'] = {'a': 1, 'b': [1, 2, 3]}
9394
db['a list'] = [1, 2, 3, {'a': 1}]
9495
db[(1, 2, 3)] = [1, 2, 3]
9596
db['numpy array'] = np.random.randn(100, 100)
97+
db['df'] = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
9698

9799
db.setdefault('key', 'value_2')
98100
assert db['key'] == 'value'
@@ -105,7 +107,7 @@ db.pop("key1")
105107
assert 'key1' not in db
106108

107109
for key, value in db.items():
108-
print(key, value)
110+
print(key, value)
109111

110112
print(len(db))
111113
```
@@ -119,7 +121,7 @@ print(len(db))
119121
### Benchmark
120122
![benchmark](.github/img/benchmark.png)
121123

122-
测试内容:对N=10,000 条1000维的numpy array进行写入和遍历读取
124+
测试内容:对N条1000维的numpy array进行写入和遍历读取
123125

124126
执行测试:
125127
```bash

benchmark/run.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,30 @@
2323

2424

2525
def prepare_data(n, key_only=False):
26-
import numpy as np
26+
global large_df
2727

2828
for i in range(n):
2929
if key_only:
3030
yield f'vector-{i}'
3131
else:
3232
yield (f'vector-{i}', np.random.rand(1000))
33+
# yield (f'vector-{i}', large_df)
34+
35+
36+
def gen_large_df():
37+
global large_df
38+
num_rows = 100_000
39+
num_cols = 10
40+
data = {
41+
f'col{i}': random.sample(range(num_rows), num_rows) for i in range(num_cols)
42+
}
43+
large_df = pd.DataFrame(data)
3344

3445

3546
@pytest.fixture(scope="session", autouse=True)
3647
def startup_and_shutdown(request):
48+
# gen_large_df()
49+
3750
process = subprocess.Popen(["flaxkv", "run"])
3851
try:
3952
wait_for_server_to_start(url="http://localhost:8000/healthz")
@@ -61,7 +74,7 @@ def process_result():
6174
"RocksDict",
6275
"Shelve",
6376
"Sqlite3",
64-
"flaxkv-LMDB",
77+
# "flaxkv-LMDB",
6578
"flaxkv-LevelDB",
6679
# "flaxkv-REMOTE",
6780
]
@@ -105,6 +118,9 @@ def benchmark(db, db_name, n=200):
105118
db.write_immediately()
106119
write_cost = mt.show_interval(f"{db_name} write")
107120

121+
if isinstance(db, BaseDBDict):
122+
db.write_immediately(block=True)
123+
108124
mt.start()
109125
for key in db.keys():
110126
...

0 commit comments

Comments
 (0)