构建高性能 JSON 解析器

构建高性能 JSON 解析器
Building a high performance JSON parser

原始链接: https://dave.cheney.net/paste/gophercon-sg-2023.html

1.阿姆达尔定律：https://en.wikipedia.org/wiki/Amdahl%27s_law 在优化工作期间，了解分配和内存使用对整体性能的影响至关重要，特别是因为它们会占用执行时间的很大一部分。这一原理在题为“Go Guru Episode #12: Dave Cheney - Speedy JSON Parsing”的 Go 演讲中得到了阐述，其中作者分享了他们优化 pkg/json 库以减少分配和每个令牌成本的经验，从而带来了实质性的改进解析和解码 JSON 数据。通过专注于减少分配和代币化成本，而不是在整个实施过程中进行微小的改变，这些原则可以在原始性能方面带来更大的胜利。正如阿姆达尔定律所述，与解决主导因素相比，解决非主导因素（例如减少指令周期时间）所花费的任何努力对整体系统性能的影响很小。因此，通过围绕这些领域进行优先级排序和优化，在处理 JSON 解析等内存密集型应用程序时可以获得更大的回报。同样，其他演讲（例如 Steven Schveighoffer 关于 IOPipe 的演讲）强调了低级细节（尤其是与硬件资源相关的细节）在实现高水平应用程序性能方面的重要性。总体而言，了解资源管理和性能之间的关系可以指导程序员实现专为实现最佳效率和最小内存占用而设计的系统，同时提供出色的功能性能。

总的来说，由于资源限制、性能要求和 Web 客户端带来的限制等因素，在分布式系统设置中使用 JSON 似乎带来了独特的挑战。为了克服这些问题，可以采用包括最小化分配成本、避免动态内存分配以及通过流式解析分摊解析成本的策略。此外，诸如 Flatbuffers 之类的技术可以提供更高的吞吐量和更低的分配，可以在这方面提供显着的改进。尽管如此，尽管存在产生乱码或不完整 JSON 的风险，但维护正确的 JSON 语法对于确保可靠解析和准确传输数据仍然至关重要。最后，宽容的 JSON 解析器/词法分析器可能会成为处理部分处理的 JSON 字符串的有价值的工具。

原文

func (d *Decoder) stateObjectString(tok []byte) ([]byte, error) {
	switch tok[0] {
	case '}':
		inObj := d.pop()
		switch {
		case d.len() == 0:
			d.state = stateEnd
		case inObj:
			d.state = stateObjectComma
		case !inObj:
			d.state = stateArrayComma
		}
		return tok, nil
	case '"':
		d.state = stateObjectColon
		return tok, nil
	default:
		return nil, fmt.Errorf("stateObjectString: missing string key")
	}
}

func (d *Decoder) stateObjectColon(tok []byte) ([]byte, error) {
	switch tok[0] {
	case Colon:
		d.state = stateObjectValue
		return d.NextToken()
	default:
		return tok, fmt.Errorf("stateObjectColon: expecting colon")
	}
}

func (d *Decoder) stateObjectValue(tok []byte) ([]byte, error) {
	switch tok[0] {
	case '{':
		d.state = stateObjectString
		d.push(true)
		return tok, nil
	case '[':
		d.state = stateArrayValue
		d.push(false)
		return tok, nil
	default:
		d.state = stateObjectComma
		return tok, nil
	}
}

func (d *Decoder) stateObjectComma(tok []byte) ([]byte, error) {
	switch tok[0] {
	case '}':
		inObj := d.pop()
		switch {
		case d.len() == 0:
			d.state = stateEnd
		case inObj:
			d.state = stateObjectComma
		case !inObj:
			d.state = stateArrayComma
		}
		return tok, nil
	case Comma:
		d.state = stateObjectString
		return d.NextToken()
	default:
		return tok, fmt.Errorf("stateObjectComma: expecting comma")
	}
}

func (d *Decoder) stateArrayValue(tok []byte) ([]byte, error) {
	switch tok[0] {
	case '{':
		d.state = stateObjectString
		d.push(true)
		return tok, nil
	case '[':
		d.state = stateArrayValue
		d.push(false)
		return tok, nil
	case ']':
		inObj := d.pop()
		switch {
		case d.len() == 0:
			d.state = stateEnd
		case inObj:
			d.state = stateObjectComma
		case !inObj:
			d.state = stateArrayComma
		}
		return tok, nil
	case ',':
		return nil, fmt.Errorf("stateArrayValue: unexpected comma")
	default:
		d.state = stateArrayComma
		return tok, nil
	}
}

func (d *Decoder) stateArrayComma(tok []byte) ([]byte, error) {
	switch tok[0] {
	case ']':
		inObj := d.pop()
		switch {
		case d.len() == 0:
			d.state = stateEnd
		case inObj:
			d.state = stateObjectComma
		case !inObj:
			d.state = stateArrayComma
		}
		return tok, nil
	case Comma:
		d.state = stateArrayValue
		return d.NextToken()
	default:
		return nil, fmt.Errorf("stateArrayComma: expected comma, %v", d.stack)
	}
}

func (d *Decoder) stateValue(tok []byte) ([]byte, error) {
	switch tok[0] {
	case '{':
		d.state = stateObjectString
		d.push(true)
		return tok, nil
	case '[':
		d.state = stateArrayValue
		d.push(false)
		return tok, nil
	case ',':
		return nil, fmt.Errorf("stateValue: unexpected comma")
	default:
		d.state = stateEnd
		return tok, nil
	}
}

构建高性能 JSON 解析器 Building a high performance JSON parser

构建高性能 JSON 解析器
Building a high performance JSON parser