原文
from kvboost import KVBoost
engine = KVBoost.from_pretrained(
"Qwen/Qwen2.5-3B"
)
# Warm a shared prefix once
engine.warm("You are a helpful assistant...")
# All subsequent calls reuse cache
result = engine.generate(prompt)
print(result.kv_reuse_ratio) # ✓ 80%+