Stoolap:纯Rust编写的高性能嵌入式SQL数据库
Stoolap: High-performance embedded SQL database in pure Rust

原始链接: https://github.com/stoolap/stoolap

## Stoolap:一个嵌入式 Rust SQL 数据库 Stoolap 是一个功能齐全的嵌入式 SQL 数据库,用 Rust 编写,提供内存和持久化存储选项,并完全符合 ACID 标准。 它可以轻松地通过 Cargo (`stoolap = "0.1"`) 集成到项目中,或通过 Git 从源代码构建。 主要特性包括多版本并发控制 (MVCC),具有读提交(默认)和快照隔离级别,以及查询特定时间戳或事务的历史数据能力。 Stoolap 自动优化索引 - 使用 B 树、哈希和位图索引 - 但允许显式索引定义。 它支持复杂的分析查询,包括窗口函数 (ROW_NUMBER, LAG)、递归查询 (CTEs) 和高级分组选项 (ROLLUP, CUBE, GROUPING SETS)。 基于成本的优化器和统计信息可提高查询性能,并提供一套全面的内置函数用于数据操作。 数据持久性通过预写日志 (WAL) 和定期快照来实现。 Stoolap 采用 Apache License 2.0 许可,并包含测试、linting 和文档工具。

黑客新闻 新 | 过去 | 评论 | 提问 | 展示 | 招聘 | 提交 登录 Stoolap: 用纯 Rust 编写的高性能嵌入式 SQL 数据库 (github.com/stoolap) 7 分,murat3ok 发表于 2 小时前 | 隐藏 | 过去 | 收藏 | 讨论 指南 | 常见问题 | 列表 | API | 安全 | 法律 | 申请 YC | 联系 搜索:
相关文章

原文

Stoolap is an embedded SQL database with MVCC transactions, written entirely in Rust. It supports both in-memory and persistent storage modes with full ACID compliance.

# Add to Cargo.toml
[dependencies]
stoolap = "0.1"

Or build from source:

git clone https://github.com/stoolap/stoolap.git
cd stoolap
cargo build --release
use stoolap::Database;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let db = Database::open_in_memory()?;

    db.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT)", ())?;
    db.execute("INSERT INTO users VALUES (1, 'Alice')", ())?;

    for row in db.query("SELECT * FROM users", ())? {
        let row = row?;
        println!("{}: {}", row.get::<i64>(0)?, row.get::<String>(1)?);
    }

    Ok(())
}
./stoolap                                    # In-memory REPL
./stoolap --db "file:///path/to/data"        # Persistent database
./stoolap -q "SELECT 1 + 1"                  # Execute query directly

Full multi-version concurrency control with two isolation levels:

-- Read Committed (default)
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
COMMIT;

-- Snapshot Isolation
BEGIN TRANSACTION ISOLATION LEVEL SNAPSHOT;
SELECT * FROM accounts;  -- Consistent view throughout transaction
COMMIT;

Query historical data at any point in time:

-- Query data as it existed at a specific timestamp
SELECT * FROM orders AS OF TIMESTAMP '2024-01-15 10:30:00';

-- Query data as of a specific transaction
SELECT * FROM inventory AS OF TRANSACTION 1234;

-- Compare current vs historical data
SELECT
    current.price,
    historical.price AS old_price
FROM products current
JOIN products AS OF TIMESTAMP '2024-01-01' historical
    ON current.id = historical.id
WHERE current.price != historical.price;

Stoolap automatically selects optimal index types, or you can specify explicitly:

-- B-tree: Range queries, sorting, prefix matching
CREATE INDEX idx_date ON orders(created_at) USING BTREE;
SELECT * FROM orders WHERE created_at BETWEEN '2024-01-01' AND '2024-12-31';

-- Hash: O(1) equality lookups
CREATE INDEX idx_email ON users(email) USING HASH;
SELECT * FROM users WHERE email = '[email protected]';

-- Bitmap: Low-cardinality columns, efficient AND/OR
CREATE INDEX idx_status ON orders(status) USING BITMAP;
SELECT * FROM orders WHERE status = 'pending' AND priority = 'high';

-- Multi-column composite indexes
CREATE INDEX idx_lookup ON events(user_id, event_type, created_at);
SELECT * FROM events WHERE user_id = 100 AND event_type = 'click';

Full support for analytical queries:

SELECT
    employee_name,
    department,
    salary,
    ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as rank,
    salary - LAG(salary) OVER (ORDER BY hire_date) as salary_change,
    AVG(salary) OVER (PARTITION BY department) as dept_avg,
    SUM(salary) OVER (ORDER BY hire_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as running_total
FROM employees;

Including recursive queries:

-- Non-recursive CTE
WITH high_value_orders AS (
    SELECT * FROM orders WHERE amount > 1000
)
SELECT customer_id, COUNT(*) FROM high_value_orders GROUP BY customer_id;

-- Recursive CTE (e.g., organizational hierarchy)
WITH RECURSIVE org_chart AS (
    SELECT id, name, manager_id, 1 as level
    FROM employees WHERE manager_id IS NULL

    UNION ALL

    SELECT e.id, e.name, e.manager_id, oc.level + 1
    FROM employees e
    JOIN org_chart oc ON e.manager_id = oc.id
)
SELECT * FROM org_chart ORDER BY level, name;
-- ROLLUP: Hierarchical subtotals
SELECT region, product, SUM(sales)
FROM sales_data
GROUP BY ROLLUP(region, product);

-- CUBE: All possible subtotal combinations
SELECT region, product, SUM(sales)
FROM sales_data
GROUP BY CUBE(region, product);

-- GROUPING SETS: Specific grouping combinations
SELECT region, product, category, SUM(sales)
FROM sales_data
GROUP BY GROUPING SETS ((region, product), (category), ());

Scalar, correlated, EXISTS, and IN subqueries:

-- Correlated subquery
SELECT * FROM employees e
WHERE salary > (SELECT AVG(salary) FROM employees WHERE department = e.department);

-- EXISTS
SELECT * FROM customers c
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.customer_id = c.id AND o.amount > 1000);

-- IN with subquery
SELECT * FROM products
WHERE category_id IN (SELECT id FROM categories WHERE active = true);

Cost-based optimizer with statistics:

-- Collect table statistics
ANALYZE orders;

-- View query execution plan
EXPLAIN SELECT * FROM orders WHERE customer_id = 100;

-- View plan with actual execution statistics
EXPLAIN ANALYZE SELECT * FROM orders o
JOIN customers c ON o.customer_id = c.id
WHERE c.country = 'US';
Type Description Example
INTEGER 64-bit signed integer 42, -100
FLOAT 64-bit floating point 3.14, -0.001
TEXT UTF-8 string 'hello', '日本語'
BOOLEAN true/false TRUE, FALSE
TIMESTAMP Date and time '2024-01-15 10:30:00'
DATE Date only '2024-01-15'
TIME Time only '10:30:00'
JSON JSON data '{"key": "value"}'

UPPER, LOWER, LENGTH, TRIM, LTRIM, RTRIM, CONCAT, SUBSTRING, REPLACE, REVERSE, LEFT, RIGHT, LPAD, RPAD, REPEAT, POSITION, LOCATE, INSTR, SPLIT_PART, INITCAP, ASCII, CHR, TRANSLATE

ABS, CEIL, FLOOR, ROUND, TRUNC, SQRT, POWER, MOD, SIGN, GREATEST, LEAST, EXP, LN, LOG, LOG10, LOG2, SIN, COS, TAN, ASIN, ACOS, ATAN, ATAN2, DEGREES, RADIANS, PI, RAND, RANDOM

NOW, CURRENT_DATE, CURRENT_TIME, CURRENT_TIMESTAMP, EXTRACT, DATE_TRUNC, DATE_ADD, DATE_SUB, DATEDIFF, YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, DAYOFWEEK, DAYOFYEAR, WEEK, QUARTER, TO_CHAR, TO_DATE, TO_TIMESTAMP

JSON_EXTRACT, JSON_EXTRACT_PATH, JSON_TYPE, JSON_TYPEOF, JSON_VALID, JSON_KEYS, JSON_ARRAY_LENGTH

COUNT, SUM, AVG, MIN, MAX, STDDEV, STDDEV_POP, STDDEV_SAMP, VARIANCE, VAR_POP, VAR_SAMP, STRING_AGG, ARRAY_AGG, FIRST, LAST, BIT_AND, BIT_OR, BIT_XOR, BOOL_AND, BOOL_OR

ROW_NUMBER, RANK, DENSE_RANK, NTILE, LAG, LEAD, FIRST_VALUE, LAST_VALUE, NTH_VALUE, PERCENT_RANK, CUME_DIST

COALESCE, NULLIF, CAST, CASE, IF, IIF, NVL, NVL2, DECODE, GREATEST, LEAST, GENERATE_SERIES

Stoolap uses write-ahead logging (WAL) with periodic snapshots:

# In-memory (default) - data lost on exit
./stoolap --db "memory://"

# File-based - durable storage
./stoolap --db "file:///var/lib/stoolap/data"

Features:

  • WAL: All changes logged before applied, survives crashes
  • Snapshots: Periodic full database snapshots for faster recovery
  • Index persistence: All indexes saved and restored
src/
├── api/        # Public API (Database, Connection, Rows)
├── core/       # Types (Value, Row, Schema, Error)
├── parser/     # SQL lexer and parser
├── planner/    # Query planning
├── optimizer/  # Cost-based query optimizer
├── executor/   # Query execution engine
├── functions/  # 100+ built-in functions
│   ├── scalar/     # String, math, date, JSON
│   ├── aggregate/  # COUNT, SUM, AVG, etc.
│   └── window/     # ROW_NUMBER, RANK, LAG, etc.
└── storage/    # Storage engine
    ├── mvcc/       # Multi-version concurrency control
    └── index/      # B-tree, Hash, Bitmap indexes
cargo build              # Debug build
cargo build --release    # Release build (optimized)
cargo test               # Run tests
cargo clippy             # Lint
cargo doc --open         # Generate documentation

See CONTRIBUTING.md for guidelines.

Apache License 2.0. See LICENSE.

联系我们 contact @ memedata.com