Repository: bytedance/sonic Branch: main Commit: 3835c030aefd Files: 710 Total size: 16.9 MB Directory structure: gitextract_08lhox7g/ ├── .codespellrc ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── PULL_REQUEST_TEMPLATE.md │ └── workflows/ │ ├── .ignore_words │ ├── benchmark.yml │ ├── compatibility_test-windows.yml │ ├── compatibility_test.yml │ ├── fuzzing.yml │ ├── lint.yml │ ├── test-arm64.yml │ └── test-x86.yml ├── .gitignore ├── .gitmodules ├── .licenserc.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CREDITS ├── LICENSE ├── README.md ├── README_ZH_CN.md ├── api.go ├── api_test.go ├── ast/ │ ├── api.go │ ├── api_compat.go │ ├── api_native_test.go │ ├── asm.s │ ├── buffer.go │ ├── buffer_test.go │ ├── decode.go │ ├── decode_test.go │ ├── encode.go │ ├── encode_test.go │ ├── error.go │ ├── iterator.go │ ├── iterator_test.go │ ├── node.go │ ├── node_test.go │ ├── parser.go │ ├── parser_test.go │ ├── search.go │ ├── search_test.go │ ├── stubs.go │ ├── testdata_test.go │ ├── visitor.go │ └── visitor_test.go ├── compat.go ├── compat_test.go ├── decode_test.go ├── decoder/ │ ├── decoder_compat.go │ ├── decoder_native.go │ ├── decoder_native_test.go │ ├── decoder_test.go │ └── testdata_test.go ├── docs/ │ ├── INTRODUCTION.md │ └── INTRODUCTION_ZH_CN.md ├── encode_test.go ├── encoder/ │ ├── encoder_compat.go │ ├── encoder_native.go │ ├── encoder_native_test.go │ ├── encoder_test.go │ └── testdata_test.go ├── examples/ │ └── example_stream_test.go ├── external_jsonlib_test/ │ ├── benchmark_test/ │ │ ├── ast_set_benchmark_test.go │ │ ├── decoder_stream_test.go │ │ ├── decoder_test.go │ │ ├── encoder_stream_test.go │ │ ├── encoder_test.go │ │ ├── msgpack_test.go │ │ ├── parser_test.go │ │ ├── search_test.go │ │ └── testdata_test.go │ ├── go.mod │ ├── go.sum │ └── unit_test/ │ ├── api_test.go │ ├── ast_compat_test.go │ └── decoder_stream_test.go ├── fuzz/ │ ├── ast_fuzz_test.go │ ├── corpus/ │ │ ├── htmescape3.json │ │ ├── htmlescap2.json │ │ ├── htmlescape.json │ │ ├── stringnumber.json │ │ ├── stringnumber2.json │ │ ├── struct.json │ │ ├── twitter.json │ │ └── twitterescaped.json │ ├── corpus.go │ ├── fuzz_test.go │ ├── go.mod │ ├── go.sum │ ├── other_fuzz_test.go │ └── struct_fuzz_test.go ├── generic_test/ │ ├── benchmark_test.go │ ├── go.mod │ ├── go.sum │ ├── sonic_test.go │ └── testdata_test.go ├── go.mod ├── go.sum ├── go.work ├── go.work.sum ├── internal/ │ ├── caching/ │ │ ├── asm.s │ │ ├── fcache.go │ │ ├── hashing.go │ │ ├── hashing_test.go │ │ ├── pcache.go │ │ └── pcache_test.go │ ├── compat/ │ │ └── warn.go │ ├── cpu/ │ │ └── features.go │ ├── decoder/ │ │ ├── api/ │ │ │ ├── decoder.go │ │ │ ├── decoder_amd64.go │ │ │ ├── decoder_arm64.go │ │ │ ├── norace_test.go │ │ │ ├── stream.go │ │ │ ├── stream_test.go │ │ │ └── testdata_test.go │ │ ├── consts/ │ │ │ └── option.go │ │ ├── errors/ │ │ │ ├── errors.go │ │ │ ├── errors_test.go │ │ │ └── fuzz_test.go │ │ ├── jitdec/ │ │ │ ├── asm.s │ │ │ ├── asm_stubs_amd64_go117.go │ │ │ ├── asm_stubs_amd64_go121.go │ │ │ ├── assembler_regabi_amd64.go │ │ │ ├── assembler_test.go │ │ │ ├── compiler.go │ │ │ ├── compiler_test.go │ │ │ ├── debug.go │ │ │ ├── decoder.go │ │ │ ├── generic_regabi_amd64.go │ │ │ ├── generic_regabi_amd64_test.s │ │ │ ├── generic_test.go │ │ │ ├── pcsp_test.go │ │ │ ├── pools.go │ │ │ ├── primitives.go │ │ │ ├── testdata_test.go │ │ │ ├── types.go │ │ │ └── utils.go │ │ └── optdec/ │ │ ├── compile_struct.go │ │ ├── compiler.go │ │ ├── const.go │ │ ├── context.go │ │ ├── decoder.go │ │ ├── errors.go │ │ ├── functor.go │ │ ├── helper.go │ │ ├── interface.go │ │ ├── map.go │ │ ├── native.go │ │ ├── native_test.go │ │ ├── node.go │ │ ├── slice.go │ │ ├── stringopts.go │ │ ├── structs.go │ │ ├── testdata_test.go │ │ └── types.go │ ├── encoder/ │ │ ├── alg/ │ │ │ ├── mapiter.go │ │ │ ├── opts.go │ │ │ ├── sort.go │ │ │ ├── sort_test.go │ │ │ ├── spec.go │ │ │ ├── spec_compat.go │ │ │ └── spec_test.go │ │ ├── compiler.go │ │ ├── compiler_test.go │ │ ├── encode_norace.go │ │ ├── encode_race.go │ │ ├── encoder.go │ │ ├── encoder_norace_test.go │ │ ├── encoder_test.go │ │ ├── ir/ │ │ │ └── op.go │ │ ├── omitzero_test.go │ │ ├── pools_amd64.go │ │ ├── pools_amd64_test.go │ │ ├── pools_compt.go │ │ ├── prim/ │ │ │ └── primitives.go │ │ ├── stream.go │ │ ├── stream_test.go │ │ ├── testdata_test.go │ │ ├── vars/ │ │ │ ├── cache.go │ │ │ ├── const.go │ │ │ ├── errors.go │ │ │ ├── stack.go │ │ │ └── types.go │ │ ├── vm/ │ │ │ ├── stbus.go │ │ │ ├── vm.go │ │ │ └── vm_test.go │ │ └── x86/ │ │ ├── asm_stubs_amd64_go117.go │ │ ├── asm_stubs_amd64_go121.go │ │ ├── assembler_regabi_amd64.go │ │ ├── assembler_test.go │ │ ├── debug_go116.go │ │ ├── debug_go117.go │ │ └── stbus.go │ ├── envs/ │ │ └── decode.go │ ├── jit/ │ │ ├── arch_amd64.go │ │ ├── asm.s │ │ ├── assembler_amd64.go │ │ ├── backend.go │ │ ├── backend_test.go │ │ └── runtime.go │ ├── native/ │ │ ├── avx2/ │ │ │ ├── f32toa.go │ │ │ ├── f32toa_subr.go │ │ │ ├── f32toa_text_amd64.go │ │ │ ├── f64toa.go │ │ │ ├── f64toa_subr.go │ │ │ ├── f64toa_text_amd64.go │ │ │ ├── fastfloat_test.go │ │ │ ├── fastint_test.go │ │ │ ├── get_by_path.go │ │ │ ├── get_by_path_subr.go │ │ │ ├── get_by_path_text_amd64.go │ │ │ ├── html_escape.go │ │ │ ├── html_escape_subr.go │ │ │ ├── html_escape_text_amd64.go │ │ │ ├── i64toa.go │ │ │ ├── i64toa_subr.go │ │ │ ├── i64toa_text_amd64.go │ │ │ ├── lspace.go │ │ │ ├── lspace_subr.go │ │ │ ├── lspace_text_amd64.go │ │ │ ├── native_export.go │ │ │ ├── native_test.go │ │ │ ├── parse_with_padding.go │ │ │ ├── parse_with_padding_subr.go │ │ │ ├── parse_with_padding_text_amd64.go │ │ │ ├── quote.go │ │ │ ├── quote_subr.go │ │ │ ├── quote_text_amd64.go │ │ │ ├── recover_test.go │ │ │ ├── skip_array.go │ │ │ ├── skip_array_subr.go │ │ │ ├── skip_array_text_amd64.go │ │ │ ├── skip_number.go │ │ │ ├── skip_number_subr.go │ │ │ ├── skip_number_text_amd64.go │ │ │ ├── skip_object.go │ │ │ ├── skip_object_subr.go │ │ │ ├── skip_object_text_amd64.go │ │ │ ├── skip_one.go │ │ │ ├── skip_one_fast.go │ │ │ ├── skip_one_fast_subr.go │ │ │ ├── skip_one_fast_text_amd64.go │ │ │ ├── skip_one_subr.go │ │ │ ├── skip_one_text_amd64.go │ │ │ ├── u64toa.go │ │ │ ├── u64toa_subr.go │ │ │ ├── u64toa_text_amd64.go │ │ │ ├── unquote.go │ │ │ ├── unquote_subr.go │ │ │ ├── unquote_text_amd64.go │ │ │ ├── validate_one.go │ │ │ ├── validate_one_subr.go │ │ │ ├── validate_one_text_amd64.go │ │ │ ├── validate_utf8.go │ │ │ ├── validate_utf8_fast.go │ │ │ ├── validate_utf8_fast_subr.go │ │ │ ├── validate_utf8_fast_text_amd64.go │ │ │ ├── validate_utf8_subr.go │ │ │ ├── validate_utf8_text_amd64.go │ │ │ ├── value.go │ │ │ ├── value_subr.go │ │ │ ├── value_text_amd64.go │ │ │ ├── vnumber.go │ │ │ ├── vnumber_subr.go │ │ │ ├── vnumber_text_amd64.go │ │ │ ├── vsigned.go │ │ │ ├── vsigned_subr.go │ │ │ ├── vsigned_text_amd64.go │ │ │ ├── vstring.go │ │ │ ├── vstring_subr.go │ │ │ ├── vstring_text_amd64.go │ │ │ ├── vunsigned.go │ │ │ ├── vunsigned_subr.go │ │ │ └── vunsigned_text_amd64.go │ │ ├── dispatch_amd64.go │ │ ├── dispatch_arm64.go │ │ ├── f32toa.tmpl │ │ ├── f64toa.tmpl │ │ ├── fastfloat_test.tmpl │ │ ├── fastint_test.tmpl │ │ ├── get_by_path.tmpl │ │ ├── html_escape.tmpl │ │ ├── i64toa.tmpl │ │ ├── lspace.tmpl │ │ ├── native_export.tmpl │ │ ├── native_test.tmpl │ │ ├── neon/ │ │ │ ├── f32toa_arm64.go │ │ │ ├── f32toa_arm64.s │ │ │ ├── f32toa_subr_arm64.go │ │ │ ├── f64toa_arm64.go │ │ │ ├── f64toa_arm64.s │ │ │ ├── f64toa_subr_arm64.go │ │ │ ├── fastfloat_arm64_test.go │ │ │ ├── fastint_arm64_test.go │ │ │ ├── get_by_path_arm64.go │ │ │ ├── get_by_path_arm64.s │ │ │ ├── get_by_path_subr_arm64.go │ │ │ ├── html_escape_arm64.go │ │ │ ├── html_escape_arm64.s │ │ │ ├── html_escape_subr_arm64.go │ │ │ ├── i64toa_arm64.go │ │ │ ├── i64toa_arm64.s │ │ │ ├── i64toa_subr_arm64.go │ │ │ ├── lspace_arm64.go │ │ │ ├── lspace_arm64.s │ │ │ ├── lspace_subr_arm64.go │ │ │ ├── native_arm64_test.go │ │ │ ├── native_export_arm64.go │ │ │ ├── parse_with_padding_arm64.go │ │ │ ├── parse_with_padding_arm64.s │ │ │ ├── parse_with_padding_subr_arm64.go │ │ │ ├── quote_arm64.go │ │ │ ├── quote_arm64.s │ │ │ ├── quote_subr_arm64.go │ │ │ ├── recover_arm64_test.go │ │ │ ├── skip_array_arm64.go │ │ │ ├── skip_array_arm64.s │ │ │ ├── skip_array_subr_arm64.go │ │ │ ├── skip_number_arm64.go │ │ │ ├── skip_number_arm64.s │ │ │ ├── skip_number_subr_arm64.go │ │ │ ├── skip_object_arm64.go │ │ │ ├── skip_object_arm64.s │ │ │ ├── skip_object_subr_arm64.go │ │ │ ├── skip_one_arm64.go │ │ │ ├── skip_one_arm64.s │ │ │ ├── skip_one_fast_arm64.go │ │ │ ├── skip_one_fast_arm64.s │ │ │ ├── skip_one_fast_subr_arm64.go │ │ │ ├── skip_one_subr_arm64.go │ │ │ ├── u64toa_arm64.go │ │ │ ├── u64toa_arm64.s │ │ │ ├── u64toa_subr_arm64.go │ │ │ ├── unquote_arm64.go │ │ │ ├── unquote_arm64.s │ │ │ ├── unquote_subr_arm64.go │ │ │ ├── validate_one_arm64.go │ │ │ ├── validate_one_arm64.s │ │ │ ├── validate_one_subr_arm64.go │ │ │ ├── validate_utf8_arm64.go │ │ │ ├── validate_utf8_arm64.s │ │ │ ├── validate_utf8_fast_arm64.go │ │ │ ├── validate_utf8_fast_arm64.s │ │ │ ├── validate_utf8_fast_subr_arm64.go │ │ │ ├── validate_utf8_subr_arm64.go │ │ │ ├── value_arm64.go │ │ │ ├── value_arm64.s │ │ │ ├── value_subr_arm64.go │ │ │ ├── vnumber_arm64.go │ │ │ ├── vnumber_arm64.s │ │ │ ├── vnumber_subr_arm64.go │ │ │ ├── vsigned_arm64.go │ │ │ ├── vsigned_arm64.s │ │ │ ├── vsigned_subr_arm64.go │ │ │ ├── vstring_arm64.go │ │ │ ├── vstring_arm64.s │ │ │ ├── vstring_subr_arm64.go │ │ │ ├── vunsigned_arm64.go │ │ │ ├── vunsigned_arm64.s │ │ │ └── vunsigned_subr_arm64.go │ │ ├── parse_with_padding.tmpl │ │ ├── quote.tmpl │ │ ├── recover_test.tmpl │ │ ├── skip_array.tmpl │ │ ├── skip_number.tmpl │ │ ├── skip_object.tmpl │ │ ├── skip_one.tmpl │ │ ├── skip_one_fast.tmpl │ │ ├── sse/ │ │ │ ├── f32toa.go │ │ │ ├── f32toa_subr.go │ │ │ ├── f32toa_text_amd64.go │ │ │ ├── f64toa.go │ │ │ ├── f64toa_subr.go │ │ │ ├── f64toa_text_amd64.go │ │ │ ├── fastfloat_test.go │ │ │ ├── fastint_test.go │ │ │ ├── get_by_path.go │ │ │ ├── get_by_path_subr.go │ │ │ ├── get_by_path_text_amd64.go │ │ │ ├── html_escape.go │ │ │ ├── html_escape_subr.go │ │ │ ├── html_escape_text_amd64.go │ │ │ ├── i64toa.go │ │ │ ├── i64toa_subr.go │ │ │ ├── i64toa_text_amd64.go │ │ │ ├── lspace.go │ │ │ ├── lspace_subr.go │ │ │ ├── lspace_text_amd64.go │ │ │ ├── native_export.go │ │ │ ├── native_test.go │ │ │ ├── parse_with_padding.go │ │ │ ├── parse_with_padding_subr.go │ │ │ ├── parse_with_padding_text_amd64.go │ │ │ ├── quote.go │ │ │ ├── quote_subr.go │ │ │ ├── quote_text_amd64.go │ │ │ ├── recover_test.go │ │ │ ├── skip_array.go │ │ │ ├── skip_array_subr.go │ │ │ ├── skip_array_text_amd64.go │ │ │ ├── skip_number.go │ │ │ ├── skip_number_subr.go │ │ │ ├── skip_number_text_amd64.go │ │ │ ├── skip_object.go │ │ │ ├── skip_object_subr.go │ │ │ ├── skip_object_text_amd64.go │ │ │ ├── skip_one.go │ │ │ ├── skip_one_fast.go │ │ │ ├── skip_one_fast_subr.go │ │ │ ├── skip_one_fast_text_amd64.go │ │ │ ├── skip_one_subr.go │ │ │ ├── skip_one_text_amd64.go │ │ │ ├── u64toa.go │ │ │ ├── u64toa_subr.go │ │ │ ├── u64toa_text_amd64.go │ │ │ ├── unquote.go │ │ │ ├── unquote_subr.go │ │ │ ├── unquote_text_amd64.go │ │ │ ├── validate_one.go │ │ │ ├── validate_one_subr.go │ │ │ ├── validate_one_text_amd64.go │ │ │ ├── validate_utf8.go │ │ │ ├── validate_utf8_fast.go │ │ │ ├── validate_utf8_fast_subr.go │ │ │ ├── validate_utf8_fast_text_amd64.go │ │ │ ├── validate_utf8_subr.go │ │ │ ├── validate_utf8_text_amd64.go │ │ │ ├── value.go │ │ │ ├── value_subr.go │ │ │ ├── value_text_amd64.go │ │ │ ├── vnumber.go │ │ │ ├── vnumber_subr.go │ │ │ ├── vnumber_text_amd64.go │ │ │ ├── vsigned.go │ │ │ ├── vsigned_subr.go │ │ │ ├── vsigned_text_amd64.go │ │ │ ├── vstring.go │ │ │ ├── vstring_subr.go │ │ │ ├── vstring_text_amd64.go │ │ │ ├── vunsigned.go │ │ │ ├── vunsigned_subr.go │ │ │ └── vunsigned_text_amd64.go │ │ ├── traceback_test.mock_tmpl │ │ ├── types/ │ │ │ └── types.go │ │ ├── u64toa.tmpl │ │ ├── unquote.tmpl │ │ ├── validate_one.tmpl │ │ ├── validate_utf8.tmpl │ │ ├── validate_utf8_fast.tmpl │ │ ├── value.tmpl │ │ ├── vnumber.tmpl │ │ ├── vsigned.tmpl │ │ ├── vstring.tmpl │ │ └── vunsigned.tmpl │ ├── optcaching/ │ │ ├── asm.s │ │ └── fcache.go │ ├── resolver/ │ │ ├── asm.s │ │ ├── fields.go │ │ ├── resolver.go │ │ └── resolver_test.go │ ├── rt/ │ │ ├── asm_amd64.s │ │ ├── asm_compat.s │ │ ├── assertI2I.go │ │ ├── base64_amd64.go │ │ ├── base64_compat.go │ │ ├── fastconv.go │ │ ├── fastconv_test.go │ │ ├── fastmem.go │ │ ├── fastvalue.go │ │ ├── gcwb.go │ │ ├── gcwb_legacy.go │ │ ├── gotype_go126.go │ │ ├── gotype_legacy.go │ │ ├── growslice.go │ │ ├── growslice_legacy.go │ │ ├── int48.go │ │ ├── map_go124.go │ │ ├── map_go126.go │ │ ├── map_legacy.go │ │ ├── maptype_indirectelem_go126.go │ │ ├── maptype_indirectelem_legacy.go │ │ ├── pool.go │ │ ├── pool_test.go │ │ ├── stubs.go │ │ ├── stubs_test.go │ │ ├── table.go │ │ └── types.go │ └── utils/ │ └── skip.go ├── issue_test/ │ ├── common_test.go │ ├── go.mod │ ├── go.sum │ ├── hugestruct_test.go │ ├── issue100_test.go │ ├── issue101_test.go │ ├── issue107_test.go │ ├── issue108_test.go │ ├── issue112_test.go │ ├── issue113_test.go │ ├── issue115_test.go │ ├── issue119_test.go │ ├── issue123_test.go │ ├── issue128_test.go │ ├── issue138_test.go │ ├── issue141_test.go │ ├── issue144_test.go │ ├── issue16_test.go │ ├── issue182_test.go │ ├── issue186_test.go │ ├── issue195_test.go │ ├── issue206_test.go │ ├── issue213_test.go │ ├── issue242_test.go │ ├── issue248_test.go │ ├── issue258_test.go │ ├── issue263_test.go │ ├── issue273_test.go │ ├── issue27_test.go │ ├── issue293_test.go │ ├── issue379_test.go │ ├── issue381_test.go │ ├── issue390_test.go │ ├── issue39_test.go │ ├── issue3_test.go │ ├── issue403_test.go │ ├── issue406_test.go │ ├── issue437_test.go │ ├── issue45_test.go │ ├── issue460_test.go │ ├── issue465_test.go │ ├── issue491_test.go │ ├── issue507_test.go │ ├── issue539_test.go │ ├── issue58_test.go │ ├── issue5_test.go │ ├── issue600_test.go │ ├── issue634_test.go │ ├── issue670_test.go │ ├── issue67_test.go │ ├── issue692_test.go │ ├── issue716_test.go │ ├── issue739_test.go │ ├── issue744_test.go │ ├── issue747_test.go │ ├── issue750_test.go │ ├── issue755_test.go │ ├── issue758_test.go │ ├── issue762_test.go │ ├── issue76_test.go │ ├── issue772_test.go │ ├── issue774_test.go │ ├── issue777_test.go │ ├── issue7_test.go │ ├── issue805_test.go │ ├── issue811_test.go │ ├── issue824_test.go │ ├── issue825_test.go │ ├── issue827_test.go │ ├── issue829_test.go │ ├── issue82_test.go │ ├── issue834_test.go │ ├── issue83_test.go │ ├── issue860_test.go │ ├── issue8_test.go │ ├── issue90_test.go │ ├── issue912_test.go │ ├── issue916_test.go │ ├── issue923_test.go │ ├── issue93_test.go │ ├── issue98_test.go │ ├── issue_recurse_test.go │ ├── plugin/ │ │ └── main.go │ ├── plugin_test.go │ ├── pretouch_test.go │ ├── race_test_go │ └── testmain_test.go ├── licenses/ │ ├── LICENSE-Drachennest │ ├── LICENSE-eisel_lemire │ ├── LICENSE-golang │ ├── LICENSE-golang-asm │ ├── LICENSE-simdjson │ └── LICENSE-yyjson ├── loader/ │ ├── funcdata.go │ ├── funcdata_compat.go │ ├── funcdata_go117.go │ ├── funcdata_go118.go │ ├── funcdata_go120.go │ ├── funcdata_go121.go │ ├── funcdata_go123.go │ ├── funcdata_go126.go │ ├── funcdata_legacy.go │ ├── go.mod │ ├── go.sum │ ├── internal/ │ │ ├── abi/ │ │ │ ├── abi.go │ │ │ ├── abi_amd64.go │ │ │ ├── abi_legacy_amd64.go │ │ │ ├── abi_regabi_amd64.go │ │ │ └── stubs.go │ │ ├── iasm/ │ │ │ ├── expr/ │ │ │ │ ├── ast.go │ │ │ │ ├── errors.go │ │ │ │ ├── ops.go │ │ │ │ ├── parser.go │ │ │ │ ├── parser_test.go │ │ │ │ ├── pools.go │ │ │ │ ├── term.go │ │ │ │ └── utils.go │ │ │ ├── obj/ │ │ │ │ ├── macho.go │ │ │ │ ├── macho_test.go │ │ │ │ └── obj.go │ │ │ ├── sync.sh │ │ │ ├── trim.py │ │ │ └── x86_64/ │ │ │ ├── arch.go │ │ │ ├── asm.s │ │ │ ├── eface.go │ │ │ ├── encodings.go │ │ │ ├── instructions.go │ │ │ ├── instructions_table.go │ │ │ ├── instructions_test.go │ │ │ ├── operands.go │ │ │ ├── pools.go │ │ │ ├── program.go │ │ │ ├── program_test.go │ │ │ ├── registers.go │ │ │ └── utils.go │ │ └── rt/ │ │ ├── fastmem.go │ │ ├── fastvalue.go │ │ └── stackmap.go │ ├── loader.go │ ├── loader_go117_test.go │ ├── loader_latest.go │ ├── mmap_unix.go │ ├── mmap_windows.go │ ├── moduledata.go │ ├── pcdata.go │ ├── register.go │ ├── register_tango.go │ ├── register_test.go │ ├── stubs.go │ ├── wrapper.go │ └── wrapper_test.go ├── native/ │ ├── atof_eisel_lemire.h │ ├── atof_native.h │ ├── f32toa.c │ ├── f64toa.c │ ├── fastint.h │ ├── get_by_path.c │ ├── html_escape.c │ ├── i64toa.c │ ├── lspace.c │ ├── lspace.h │ ├── native.h │ ├── parse_with_padding.c │ ├── parsing.h │ ├── quote.c │ ├── scanning.h │ ├── simd.h │ ├── skip_array.c │ ├── skip_number.c │ ├── skip_object.c │ ├── skip_one.c │ ├── skip_one_fast.c │ ├── tab.h │ ├── test/ │ │ ├── xassert.h │ │ └── xprintf.h │ ├── types.h │ ├── u64toa.c │ ├── unittest/ │ │ ├── test_fastfint.c │ │ └── test_to_lower.c │ ├── unquote.c │ ├── utf8.h │ ├── utils.h │ ├── validate_one.c │ ├── validate_utf8.c │ ├── validate_utf8_fast.c │ ├── value.c │ ├── vnumber.c │ ├── vsigned.c │ ├── vstring.c │ ├── vstring.h │ └── vunsigned.c ├── option/ │ └── option.go ├── rawmessage.go ├── rfc_test.go ├── scripts/ │ ├── bench-arm.sh │ ├── bench.py │ ├── bench.sh │ ├── build-arm.sh │ ├── build-x86.sh │ ├── check_branch_name.sh │ ├── fuzz.sh │ ├── go_flags.sh │ ├── qemu.sh │ ├── test_pcsp.py │ └── test_race.sh ├── search_test.go ├── sonic.go ├── testdata/ │ ├── JSONTestSuite/ │ │ ├── LICENSE │ │ └── README.md │ ├── small.go │ ├── twitter.go │ ├── twitter.json │ └── twitterescaped.json ├── tools/ │ └── asm2arm/ │ ├── arm.py │ └── requirements.txt ├── unquote/ │ ├── unquote.go │ └── unquote_fallback.go └── utf8/ ├── utf8.go ├── utf8_fallback.go ├── utf8_native_test.go └── utf8_test.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .codespellrc ================================================ [codespell] # ignore test files, go project names, binary files via `skip` and special var/regex via `ignore-words` skip = fuzz,*_test.tmpl,testdata,*_test.go,go.mod,go.sum,*.gz ignore-words = .github/workflows/.ignore_words check-filenames = true ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error **Expected behavior** A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. **Sonic version:** Please provide the version of Sonic you are using. **Environment:** The output of `go env`. **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ #### What type of PR is this? #### Check the PR title. - [ ] This PR title match the format: \(optional scope): \ - [ ] The description of this PR title is user-oriented and clear enough for others to understand. - [ ] Attach the PR updating the user documentation if the current PR requires user awareness at the usage level. [User docs repo](https://github.com/cloudwego/cloudwego.github.io) #### (Optional) Translate the PR title into Chinese. #### (Optional) More detailed description for this PR(en: English/zh: Chinese). en: zh(optional): #### (Optional) Which issue(s) this PR fixes: #### (optional) The PR that updates user documentation: ================================================ FILE: .github/workflows/.ignore_words ================================================ socio-economic nd regArgs oders ure alse ================================================ FILE: .github/workflows/benchmark.yml ================================================ name: Benchmark on: pull_request jobs: build: strategy: matrix: os: [ubuntu-latest, ubuntu-24.04-arm] runs-on: ${{ matrix.os }} steps: - name: Clear repository run: sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE - uses: actions/checkout@v2 - name: Set up Go uses: actions/setup-go@v6 with: go-version: 1.22 - uses: actions/cache@v4 with: path: ~/go/pkg/mod key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} restore-keys: | ${{ runner.os }}-go- - name: Benchmark Target continue-on-error: true run: | export SONIC_NO_ASYNC_GC=1 go test -run ^$ -count=20 -benchmem -bench 'BenchmarkDecoder_(Generic|Binding)_Sonic' ./decoder >> /var/tmp/sonic_bench_target_${{ github.run_id }}.out go test -run ^$ -count=20 -benchmem -bench 'BenchmarkEncoder_(Generic|Binding)_Sonic' ./encoder >> /var/tmp/sonic_bench_target_${{ github.run_id }}.out go test -run ^$ -count=20 -benchmem -bench 'Benchmark(Get|Set)One_Sonic|BenchmarkParseSeven_Sonic' ./ast >> /var/tmp/sonic_bench_target_${{ github.run_id }}.out - name: Clear repository run: sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE - name: Checkout main uses: actions/checkout@v2 with: ref: main - name: Benchmark main continue-on-error: true run: | export SONIC_NO_ASYNC_GC=1 go test -run ^$ -count=20 -benchmem -bench 'BenchmarkDecoder_(Generic|Binding)_Sonic' ./decoder >> /var/tmp/sonic_bench_main_${{ github.run_id }}.out go test -run ^$ -count=20 -benchmem -bench 'BenchmarkEncoder_(Generic|Binding)_Sonic' ./encoder >> /var/tmp/sonic_bench_main_${{ github.run_id }}.out go test -run ^$ -count=20 -benchmem -bench 'Benchmark(Get|Set)One_Sonic|BenchmarkParseSeven_Sonic' ./ast >> /var/tmp/sonic_bench_main_${{ github.run_id }}.out - name: Diff bench continue-on-error: true run: | ./scripts/bench.py -t 0.20 -d /var/tmp/sonic_bench_target_${{ github.run_id }}.out,/var/tmp/sonic_bench_main_${{ github.run_id }}.out x ================================================ FILE: .github/workflows/compatibility_test-windows.yml ================================================ name: Compatibility Test Windows-X64 on: pull_request jobs: build: strategy: matrix: go-version: [1.18.x, 1.21.x, 1.25.x] runs-on: windows-latest steps: - uses: actions/checkout@v2 - name: Set up Go uses: actions/setup-go@v6 with: go-version: ${{ matrix.go-version }} - uses: actions/cache@v4 with: path: ~/go/pkg/mod key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} restore-keys: | ${{ runner.os }}-go- - name: main run: | set GOMAXPROCS=4 go test -race ./ - name: ast run: | set GOMAXPROCS=4 go test -race ./ast - name: external run: | cd ./external_jsonlib_test set GOMAXPROCS=4 go test -race ./... ================================================ FILE: .github/workflows/compatibility_test.yml ================================================ name: Compatibility Test Linux-X64|ARM & macOS-ARM on: pull_request jobs: build: strategy: matrix: go-version: [1.18.x, 1.19.x, 1.20.x, 1.21.x, 1.22.x, 1.23.x, 1.24.x, 1.25.x] os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest] runs-on: ${{ matrix.os }} steps: - name: Clear repository run: | if [ "${{ runner.os }}" = "Linux" ]; then sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE else rm -rf $GITHUB_WORKSPACE && mkdir -p $GITHUB_WORKSPACE fi - uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v6 with: go-version: ${{ matrix.go-version }} - uses: actions/cache@v4 with: path: ~/go/pkg/mod key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} restore-keys: | ${{ runner.os }}-go- - name: main run: go test -race -gcflags="all=-l" ./ - name: decoder run: go test -race -gcflags="all=-l" ./decoder - name: encoder run: go test -race -gcflags="all=-l" ./encoder - name: ast run: go test -race -gcflags="all=-l" ./ast ================================================ FILE: .github/workflows/fuzzing.yml ================================================ name: Fuzz Test Linux-X64 on: pull_request jobs: build: strategy: max-parallel: 2 matrix: mode: [run, runopt] os: [ubuntu-latest, ubuntu-24.04-arm] exclude: - os: ubuntu-24.04-arm mode: runopt runs-on: ${{ matrix.os }} steps: - name: Clear repository run: sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE - uses: actions/checkout@v2 - name: Set up Go uses: actions/setup-go@v6 with: go-version: 1.20.x - uses: actions/cache@v4 with: path: ~/go/pkg/mod key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} restore-keys: | ${{ runner.os }}-go- - name: Fuzz sonic run: ./scripts/fuzz.sh ${{ matrix.mode }} ================================================ FILE: .github/workflows/lint.yml ================================================ name: Lint on: pull_request jobs: misc: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: spell check uses: codespell-project/actions-codespell@v2 with: skip: ./loader/internal/iasm/obj/macho.go,./loader/internal/iasm/x86_64/encodings.go,./loader/internal/iasm/x86_64/program.go,./loader/internal/iasm/expr/ast.go,./loader/internal/iasm/expr/errors.go ================================================ FILE: .github/workflows/test-arm64.yml ================================================ name: Unit Test on: pull_request jobs: build: permissions: contents: read id-token: write strategy: matrix: go-version: [1.20.x, 1.22.x, 1.25.x] runner_arch: [ubuntu-24.04-arm, macos-latest] runs-on: ${{ matrix.runner_arch }} steps: - name: Clear repository run: rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE - uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v6 with: go-version: ${{ matrix.go-version }} cache: true - name: Cache Go modules uses: actions/cache@v4 with: path: | ~/go/pkg/mod ${{ github.workspace }}/go.sum key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - name: Unit Test run: | GOMAXPROCS=4 go test -race -covermode=atomic -coverprofile=coverage.txt $(go list ./... | grep -v -E 'loader|jit|avx|x86|sse') - name: Data Race run: | ./scripts/test_race.sh - name: Issue Test run: GOMAXPROCS=4 go test -race ./issue_test - name: Generic Test run: GOMAXPROCS=4 go test -race ./generic_test - name: Codecov uses: codecov/codecov-action@v5 with: use_oidc: true files: ./coverage.txt flags: arm,${{ matrix.runner_arch }} fail_ci_if_error: false ================================================ FILE: .github/workflows/test-x86.yml ================================================ name: Unit Test on: pull_request jobs: build: permissions: contents: read id-token: write strategy: matrix: go-version: [1.18.x, 1.21.x, 1.25.x] runner_arch: [ubuntu-latest] runs-on: ${{ matrix.runner_arch }} steps: - name: Clear repository run: rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE - uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v6 with: go-version: ${{ matrix.go-version }} cache: true - name: Cache Go modules uses: actions/cache@v4 with: path: | ~/go/pkg/mod ${{ github.workspace }}/go.sum key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - name: Unit Test JIT run: | GOMAXPROCS=4 go test -race -covermode=atomic -coverprofile=coverage-jit.txt ./... - name: Unit Test JIT PCSP run: | GOMAXPROCS=4 go test -v ./internal/decoder/jitdec - name: Unit Test VM run: | SONIC_USE_OPTDEC=1 SONIC_USE_FASTMAP=1 SONIC_ENCODER_USE_VM=1 GOMAXPROCS=4 go test -race -covermode=atomic -coverprofile=coverage-vm.txt ./... - name: Loader Test run: | cd ./loader go test -race ./... - name: Data Race run: | ./scripts/test_race.sh - name: Issue Test run: GOMAXPROCS=4 go test -race ./issue_test - name: PCSP Test env: GOVERSION: ${{ matrix.go-version }} run: python3 ./scripts/test_pcsp.py - name: Generic Test JIT run: GOMAXPROCS=4 go test -race ./generic_test - name: Generic Test VM run: GOMAXPROCS=4 SONIC_USE_OPTDEC=1 SONIC_USE_FASTMAP=1 SONIC_ENCODER_USE_VM=1 go test -v -race ./generic_test - name: Codecov uses: codecov/codecov-action@v5 with: use_oidc: true files: ./coverage-jit.txt,./coverage-vm.txt flags: x86,${{ matrix.runner_arch }} fail_ci_if_error: false ================================================ FILE: .gitignore ================================================ *.o *.swp *.swm *.swn *.a *.so _obj _test *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.exe~ *.test *.prof *.rar *.zip *.gz *.psd *.bmd *.cfg *.pptx *.log *nohup.out *settings.pyc *.sublime-project *.sublime-workspace .DS_Store /.idea/ /.vscode/ /output/ /vendor/ /Gopkg.lock /Gopkg.toml coverage.html coverage.out coverage.xml junit.xml *.profile *.svg *.out ast/test.out ast/bench.sh !testdata/**/*.json.gz fuzz/testdata *__debug_bin* *pprof *coverage.txt .venv/ tools/venv/* ================================================ FILE: .gitmodules ================================================ [submodule "cloudwego"] path = tools/asm2asm url = https://github.com/cloudwego/asm2asm.git [submodule "tools/simde"] path = tools/simde url = https://github.com/simd-everywhere/simde.git [submodule "fuzz/go-fuzz-corpus"] path = fuzz/go-fuzz-corpus url = https://github.com/dvyukov/go-fuzz-corpus.git ================================================ FILE: .licenserc.yaml ================================================ header: license: spdx-id: Apache-2.0 copyright-owner: ByteDance Inc. paths: - '**/*.go' - '**/*.s' paths-ignore: - 'ast/asm.s' # empty file - 'decoder/asm.s' # empty file - 'encoder/asm.s' # empty file - 'internal/caching/asm.s' # empty file - 'internal/jit/asm.s' # empty file - 'internal/native/avx/native_amd64.s' # auto-generated by asm2asm - 'internal/native/avx/native_subr_amd64.go' # auto-generated by asm2asm - 'internal/native/avx2/native_amd64.s' # auto-generated by asm2asm - 'internal/native/avx2/native_subr_amd64.go' # auto-generated by asm2asm - 'internal/resolver/asm.s' # empty file - 'internal/rt/asm.s' # empty file - 'internal/loader/asm.s' # empty file comment: on-failure ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience * Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: * The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or email address, without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at wudi.daniel@bytedance.com. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. ## Enforcement Guidelines Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: ### 1. Correction **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. ### 2. Warning **Community Impact**: A violation through a single incident or series of actions. **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. ### 3. Temporary Ban **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. **Consequence**: A permanent ban from any sort of public interaction within the community. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations. ================================================ FILE: CONTRIBUTING.md ================================================ # How to Contribute ## Your First Pull Request We use GitHub for our codebase. You can start by reading [How To Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests). ## Without Semantic Versioning We keep the stable code in branch `main` like `golang.org/x`. Development base on branch `develop`. We promise the **Forward Compatibility** by adding new package directory with suffix `v2/v3` when code has break changes. ## Branch Organization We use [git-flow](https://nvie.com/posts/a-successful-git-branching-model/) as our branch organization, as known as [FDD](https://en.wikipedia.org/wiki/Feature-driven_development) ## Bugs ### 1. How to Find Known Issues We are using [Github Issues](https://github.com/bytedance/sonic/issues) for our public bugs. We keep a close eye on this and try to make it clear when we have an internal fix in progress. Before filing a new task, try to make sure your problem doesn’t already exist. ### 2. Reporting New Issues Providing a reduced test code is a recommended way for reporting issues. Then can be placed in: - Just in issues - [Golang Playground](https://play.golang.org/) ### 3. Security Bugs Please do not report the safe disclosure of bugs to public issues. Contact us by [Support Email](mailto:sonic@bytedance.com) ## How to Get in Touch - [Email](mailto:wudi.daniel@bytedance.com) ## Submit a Pull Request Before you submit your Pull Request (PR) consider the following guidelines: 1. Search [GitHub](https://github.com/bytedance/sonic/pulls) for an open or closed PR that relates to your submission. You don't want to duplicate existing efforts. 2. Be sure that an issue describes the problem you're fixing, or documents the design for the feature you'd like to add. Discussing the design upfront helps to ensure that we're ready to accept your work. 3. [Fork](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo) the bytedance/sonic repo. 4. In your forked repository, make your changes in a new git branch: ``` git checkout -b bugfix/security_bug develop ``` 5. Create your patch, including appropriate test cases. 6. Follow our [Style Guides](#code-style-guides). 7. Commit your changes using a descriptive commit message that follows [AngularJS Git Commit Message Conventions](https://docs.google.com/document/d/1QrDFcIiPjSLDn3EL15IJygNPiHORgU1_OOAqWjiDU5Y/edit). Adherence to these conventions is necessary because release notes will be automatically generated from these messages. 8. Push your branch to GitHub: ``` git push origin bugfix/security_bug ``` 9. In GitHub, send a pull request to `sonic:main` Note: you must use one of `optimize/feature/bugfix/doc/ci/test/refactor` following a slash(`/`) as the branch prefix. Your pr title and commit message should follow https://www.conventionalcommits.org/. ## Contribution Prerequisites - Our development environment keeps up with [Go Official](https://golang.org/project/). - You need fully checking with lint tools before submit your pull request. [gofmt](https://golang.org/pkg/cmd/gofmt/) & [golangci-lint](https://github.com/golangci/golangci-lint) - You are familiar with [Github](https://github.com) - Maybe you need familiar with [Actions](https://github.com/features/actions)(our default workflow tool). ## Code Style Guides See [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments). Good resources: - [Effective Go](https://golang.org/doc/effective_go) - [Pingcap General advice](https://pingcap.github.io/style-guide/general.html) - [Uber Go Style Guide](https://github.com/uber-go/guide/blob/master/style.md) ================================================ FILE: CREDITS ================================================ ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Sonic English | [中文](README_ZH_CN.md) A blazingly fast JSON serializing & deserializing library, accelerated by JIT (just-in-time compiling) and SIMD (single-instruction-multiple-data). ## Requirement - Go: 1.18~1.25 - Notice: Go1.24.0 is not supported due to the [issue](https://github.com/golang/go/issues/71672), please use higher go version or add build tag `--ldflags="-checklinkname=0"` - OS: Linux / MacOS / Windows - CPU: AMD64 / (ARM64, need go1.20 above) ## Features - Runtime object binding without code generation - Complete APIs for JSON value manipulation - Fast, fast, fast! ## APIs see [go.dev](https://pkg.go.dev/github.com/bytedance/sonic) ## Benchmarks For **all sizes** of json and **all scenarios** of usage, **Sonic performs best**. - [Medium](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13KB, 300+ key, 6 layers) ```powershell goversion: 1.17.1 goos: darwin goarch: amd64 cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz BenchmarkEncoder_Generic_Sonic-16 32393 ns/op 402.40 MB/s 11965 B/op 4 allocs/op BenchmarkEncoder_Generic_Sonic_Fast-16 21668 ns/op 601.57 MB/s 10940 B/op 4 allocs/op BenchmarkEncoder_Generic_JsonIter-16 42168 ns/op 309.12 MB/s 14345 B/op 115 allocs/op BenchmarkEncoder_Generic_GoJson-16 65189 ns/op 199.96 MB/s 23261 B/op 16 allocs/op BenchmarkEncoder_Generic_StdLib-16 106322 ns/op 122.60 MB/s 49136 B/op 789 allocs/op BenchmarkEncoder_Binding_Sonic-16 6269 ns/op 2079.26 MB/s 14173 B/op 4 allocs/op BenchmarkEncoder_Binding_Sonic_Fast-16 5281 ns/op 2468.16 MB/s 12322 B/op 4 allocs/op BenchmarkEncoder_Binding_JsonIter-16 20056 ns/op 649.93 MB/s 9488 B/op 2 allocs/op BenchmarkEncoder_Binding_GoJson-16 8311 ns/op 1568.32 MB/s 9481 B/op 1 allocs/op BenchmarkEncoder_Binding_StdLib-16 16448 ns/op 792.52 MB/s 9479 B/op 1 allocs/op BenchmarkEncoder_Parallel_Generic_Sonic-16 6681 ns/op 1950.93 MB/s 12738 B/op 4 allocs/op BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16 4179 ns/op 3118.99 MB/s 10757 B/op 4 allocs/op BenchmarkEncoder_Parallel_Generic_JsonIter-16 9861 ns/op 1321.84 MB/s 14362 B/op 115 allocs/op BenchmarkEncoder_Parallel_Generic_GoJson-16 18850 ns/op 691.52 MB/s 23278 B/op 16 allocs/op BenchmarkEncoder_Parallel_Generic_StdLib-16 45902 ns/op 283.97 MB/s 49174 B/op 789 allocs/op BenchmarkEncoder_Parallel_Binding_Sonic-16 1480 ns/op 8810.09 MB/s 13049 B/op 4 allocs/op BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16 1209 ns/op 10785.23 MB/s 11546 B/op 4 allocs/op BenchmarkEncoder_Parallel_Binding_JsonIter-16 6170 ns/op 2112.58 MB/s 9504 B/op 2 allocs/op BenchmarkEncoder_Parallel_Binding_GoJson-16 3321 ns/op 3925.52 MB/s 9496 B/op 1 allocs/op BenchmarkEncoder_Parallel_Binding_StdLib-16 3739 ns/op 3486.49 MB/s 9480 B/op 1 allocs/op BenchmarkDecoder_Generic_Sonic-16 66812 ns/op 195.10 MB/s 57602 B/op 723 allocs/op BenchmarkDecoder_Generic_Sonic_Fast-16 54523 ns/op 239.07 MB/s 49786 B/op 313 allocs/op BenchmarkDecoder_Generic_StdLib-16 124260 ns/op 104.90 MB/s 50869 B/op 772 allocs/op BenchmarkDecoder_Generic_JsonIter-16 91274 ns/op 142.81 MB/s 55782 B/op 1068 allocs/op BenchmarkDecoder_Generic_GoJson-16 88569 ns/op 147.17 MB/s 66367 B/op 973 allocs/op BenchmarkDecoder_Binding_Sonic-16 32557 ns/op 400.38 MB/s 28302 B/op 137 allocs/op BenchmarkDecoder_Binding_Sonic_Fast-16 28649 ns/op 455.00 MB/s 24999 B/op 34 allocs/op BenchmarkDecoder_Binding_StdLib-16 111437 ns/op 116.97 MB/s 10576 B/op 208 allocs/op BenchmarkDecoder_Binding_JsonIter-16 35090 ns/op 371.48 MB/s 14673 B/op 385 allocs/op BenchmarkDecoder_Binding_GoJson-16 28738 ns/op 453.59 MB/s 22039 B/op 49 allocs/op BenchmarkDecoder_Parallel_Generic_Sonic-16 12321 ns/op 1057.91 MB/s 57233 B/op 723 allocs/op BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16 10644 ns/op 1224.64 MB/s 49362 B/op 313 allocs/op BenchmarkDecoder_Parallel_Generic_StdLib-16 57587 ns/op 226.35 MB/s 50874 B/op 772 allocs/op BenchmarkDecoder_Parallel_Generic_JsonIter-16 38666 ns/op 337.12 MB/s 55789 B/op 1068 allocs/op BenchmarkDecoder_Parallel_Generic_GoJson-16 30259 ns/op 430.79 MB/s 66370 B/op 974 allocs/op BenchmarkDecoder_Parallel_Binding_Sonic-16 5965 ns/op 2185.28 MB/s 27747 B/op 137 allocs/op BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16 5170 ns/op 2521.31 MB/s 24715 B/op 34 allocs/op BenchmarkDecoder_Parallel_Binding_StdLib-16 27582 ns/op 472.58 MB/s 10576 B/op 208 allocs/op BenchmarkDecoder_Parallel_Binding_JsonIter-16 13571 ns/op 960.51 MB/s 14685 B/op 385 allocs/op BenchmarkDecoder_Parallel_Binding_GoJson-16 10031 ns/op 1299.51 MB/s 22111 B/op 49 allocs/op BenchmarkGetOne_Sonic-16 3276 ns/op 3975.78 MB/s 24 B/op 1 allocs/op BenchmarkGetOne_Gjson-16 9431 ns/op 1380.81 MB/s 0 B/op 0 allocs/op BenchmarkGetOne_Jsoniter-16 51178 ns/op 254.46 MB/s 27936 B/op 647 allocs/op BenchmarkGetOne_Parallel_Sonic-16 216.7 ns/op 60098.95 MB/s 24 B/op 1 allocs/op BenchmarkGetOne_Parallel_Gjson-16 1076 ns/op 12098.62 MB/s 0 B/op 0 allocs/op BenchmarkGetOne_Parallel_Jsoniter-16 17741 ns/op 734.06 MB/s 27945 B/op 647 allocs/op BenchmarkSetOne_Sonic-16 9571 ns/op 1360.61 MB/s 1584 B/op 17 allocs/op BenchmarkSetOne_Sjson-16 36456 ns/op 357.22 MB/s 52180 B/op 9 allocs/op BenchmarkSetOne_Jsoniter-16 79475 ns/op 163.86 MB/s 45862 B/op 964 allocs/op BenchmarkSetOne_Parallel_Sonic-16 850.9 ns/op 15305.31 MB/s 1584 B/op 17 allocs/op BenchmarkSetOne_Parallel_Sjson-16 18194 ns/op 715.77 MB/s 52247 B/op 9 allocs/op BenchmarkSetOne_Parallel_Jsoniter-16 33560 ns/op 388.05 MB/s 45892 B/op 964 allocs/op BenchmarkLoadNode/LoadAll()-16 11384 ns/op 1143.93 MB/s 6307 B/op 25 allocs/op BenchmarkLoadNode_Parallel/LoadAll()-16 5493 ns/op 2370.68 MB/s 7145 B/op 25 allocs/op BenchmarkLoadNode/Interface()-16 17722 ns/op 734.85 MB/s 13323 B/op 88 allocs/op BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.70 MB/s 15178 B/op 88 allocs/op ``` - [Small](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 keys, 3 layers) ![small benchmarks](./docs/imgs/bench-small.png) - [Large](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635KB, 10000+ key, 6 layers) ![large benchmarks](./docs/imgs/bench-large.png) See [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) for benchmark codes. ## How it works See [INTRODUCTION.md](./docs/INTRODUCTION.md). ## Usage ### Marshal/Unmarshal Default behaviors are mostly consistent with `encoding/json`, except HTML escaping form (see [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) and `SortKeys` feature (optional support see [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys)) that is **NOT** in conformity to [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259). ```go import "github.com/bytedance/sonic" var data YourSchema // Marshal output, err := sonic.Marshal(&data) // Unmarshal err := sonic.Unmarshal(output, &data) ``` ### Streaming IO Sonic supports decoding json from `io.Reader` or encoding objects into `io.Writer`, aims at handling multiple values as well as reducing memory consumption. - encoder ```go var o1 = map[string]interface{}{ "a": "b", } var o2 = 1 var w = bytes.NewBuffer(nil) var enc = sonic.ConfigDefault.NewEncoder(w) enc.Encode(o1) enc.Encode(o2) fmt.Println(w.String()) // Output: // {"a":"b"} // 1 ``` - decoder ```go var o = map[string]interface{}{} var r = strings.NewReader(`{"a":"b"}{"1":"2"}`) var dec = sonic.ConfigDefault.NewDecoder(r) dec.Decode(&o) dec.Decode(&o) fmt.Printf("%+v", o) // Output: // map[1:2 a:b] ``` ### Use Number/Use Int64 ```go import "github.com/bytedance/sonic/decoder" var input = `1` var data interface{} // default float64 dc := decoder.NewDecoder(input) dc.Decode(&data) // data == float64(1) // use json.Number dc = decoder.NewDecoder(input) dc.UseNumber() dc.Decode(&data) // data == json.Number("1") // use int64 dc = decoder.NewDecoder(input) dc.UseInt64() dc.Decode(&data) // data == int64(1) root, err := sonic.GetFromString(input) // Get json.Number jn := root.Number() jm := root.InterfaceUseNumber().(json.Number) // jn == jm // Get float64 fn := root.Float64() fm := root.Interface().(float64) // jn == jm ``` ### Sort Keys On account of the performance loss from sorting (roughly 10%), sonic doesn't enable this feature by default. If your component depends on it to work (like [zstd](https://github.com/facebook/zstd)), Use it like this: ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/encoder" // Binding map only m := map[string]interface{}{} v, err := encoder.Encode(m, encoder.SortMapKeys) // Or ast.Node.SortKeys() before marshal var root := sonic.Get(JSON) err := root.SortKeys() ``` ### Escape HTML On account of the performance loss (roughly 15%), sonic doesn't enable this feature by default. You can use `encoder.EscapeHTML` option to open this feature (align with `encoding/json.HTMLEscape`). ```go import "github.com/bytedance/sonic" v := map[string]string{"&&":"<>"} ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"}}` ``` ### Compact Format Sonic encodes primitive objects (struct/map...) as compact-format JSON by default, except marshaling `json.RawMessage` or `json.Marshaler`: sonic ensures validating their output JSON but **DO NOT** compacting them for performance concerns. We provide the option `encoder.CompactMarshaler` to add compacting process. ### Print Error If there invalid syntax in input JSON, sonic will return `decoder.SyntaxError`, which supports pretty-printing of error position ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/decoder" var data interface{} err := sonic.UnmarshalString("[[[}]]", &data) if err != nil { /* One line by default */ println(e.Error()) // "Syntax error at index 3: invalid char\n\n\t[[[}]]\n\t...^..\n" /* Pretty print */ if e, ok := err.(decoder.SyntaxError); ok { /*Syntax error at index 3: invalid char [[[}]] ...^.. */ print(e.Description()) } else if me, ok := err.(*decoder.MismatchTypeError); ok { // decoder.MismatchTypeError is new to Sonic v1.6.0 print(me.Description()) } } ``` #### Mismatched Types [Sonic v1.6.0] If there a **mismatch-typed** value for a given key, sonic will report `decoder.MismatchTypeError` (if there are many, report the last one), but still skip wrong the value and keep decoding next JSON. ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/decoder" var data = struct{ A int B int }{} err := UnmarshalString(`{"A":"1","B":1}`, &data) println(err.Error()) // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n" fmt.Printf("%+v", data) // {A:0 B:1} ``` ### Ast.Node Sonic/ast.Node is a completely self-contained AST for JSON. It implements serialization and deserialization both and provides robust APIs for obtaining and modification of generic data. #### Get/Index Search partial JSON by given paths, which must be non-negative integer or string, or nil ```go import "github.com/bytedance/sonic" input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`) // no path, returns entire json root, err := sonic.Get(input) raw := root.Raw() // == string(input) // multiple paths root, err := sonic.Get(input, "key1", 1, "key2") sub := root.Get("key3").Index(2).Int64() // == 3 ``` **Tip**: since `Index()` uses offset to locate data, which is much faster than scanning like `Get()`, we suggest you use it as much as possible. And sonic also provides another API `IndexOrGet()` to underlying use offset as well as ensure the key is matched. #### SearchOption `Searcher` provides some options for user to meet different needs: ```go opts := ast.SearchOption{ CopyReturn: true ... } val, err := sonic.GetWithOptions(JSON, opts, "key") ``` - CopyReturn Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results - ConcurentRead Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it. - ValidateJSON Indicate the searcher to validate the entire JSON. This option is enabled by default, which slow down the search speed a little. #### Set/Unset Modify the json content by Set()/Unset() ```go import "github.com/bytedance/sonic" // Set exist, err := root.Set("key4", NewBool(true)) // exist == false alias1 := root.Get("key4") println(alias1.Valid()) // true alias2 := root.Index(1) println(alias1 == alias2) // true // Unset exist, err := root.UnsetByIndex(1) // exist == true println(root.Get("key4").Check()) // "value not exist" ``` #### Serialize To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer) ```go import ( "encoding/json" "github.com/bytedance/sonic" ) buf, err := root.MarshalJson() println(string(buf)) // {"key1":[{},{"key2":{"key3":[1,2,3]}}]} exp, err := json.Marshal(&root) // WARN: use pointer println(string(buf) == string(exp)) // true ``` #### APIs - validation: `Check()`, `Error()`, `Valid()`, `Exist()` - searching: `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()` - go-type casting: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()` - go-type packing: `NewRaw()`, `NewNumber()`, `NewNull()`, `NewBool()`, `NewString()`, `NewObject()`, `NewArray()` - iteration: `Values()`, `Properties()`, `ForEach()`, `SortKeys()` - modification: `Set()`, `SetByIndex()`, `Add()` ### Ast.Visitor Sonic provides an advanced API for fully parsing JSON into non-standard types (neither `struct` not `map[string]interface{}`) without using any intermediate representation (`ast.Node` or `interface{}`). For example, you might have the following types which are like `interface{}` but actually not `interface{}`: ```go type UserNode interface {} // the following types implement the UserNode interface. type ( UserNull struct{} UserBool struct{ Value bool } UserInt64 struct{ Value int64 } UserFloat64 struct{ Value float64 } UserString struct{ Value string } UserObject struct{ Value map[string]UserNode } UserArray struct{ Value []UserNode } ) ``` Sonic provides the following API to return **the preorder traversal of a JSON AST**. The `ast.Visitor` is a SAX style interface which is used in some C++ JSON library. You should implement `ast.Visitor` by yourself and pass it to `ast.Preorder()` method. In your visitor you can make your custom types to represent JSON values. There may be an O(n) space container (such as stack) in your visitor to record the object / array hierarchy. ```go func Preorder(str string, visitor Visitor, opts *VisitorOptions) error type Visitor interface { OnNull() error OnBool(v bool) error OnString(v string) error OnInt64(v int64, n json.Number) error OnFloat64(v float64, n json.Number) error OnObjectBegin(capacity int) error OnObjectKey(key string) error OnObjectEnd() error OnArrayBegin(capacity int) error OnArrayEnd() error } ``` See [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) for detailed usage. We also implement a demo visitor for `UserNode` in [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go). ## Compatibility For developers who want to use sonic to meet different scenarios, we provide some integrated configs as `sonic.API` - `ConfigDefault`: the sonic's default config (`EscapeHTML=false`,`SortKeys=false`...) to run sonic fast meanwhile ensure security. - `ConfigStd`: the std-compatible config (`EscapeHTML=true`,`SortKeys=true`...) - `ConfigFastest`: the fastest config (`NoQuoteTextMarshaler=true`) to run on sonic as fast as possible. Sonic **DOES NOT** ensure to support all environments, due to the difficulty of developing high-performance codes. On non-sonic-supporting environment, the implementation will fall back to `encoding/json`. Thus below configs will all equal to `ConfigStd`. ## Tips ### Pretouch Since Sonic uses [golang-asm](https://github.com/twitchyliquid64/golang-asm) as a JIT assembler, which is NOT very suitable for runtime compiling, first-hit running of a huge schema may cause request-timeout or even process-OOM. For better stability, we advise **using `Pretouch()` for huge-schema or compact-memory applications** before `Marshal()/Unmarshal()`. ```go import ( "reflect" "github.com/bytedance/sonic" "github.com/bytedance/sonic/option" ) func init() { var v HugeStruct // For most large types (nesting depth <= option.DefaultMaxInlineDepth) err := sonic.Pretouch(reflect.TypeOf(v)) // with more CompileOption... err := sonic.Pretouch(reflect.TypeOf(v), // If the type is too deep nesting (nesting depth > option.DefaultMaxInlineDepth), // you can set compile recursive loops in Pretouch for better stability in JIT. option.WithCompileRecursiveDepth(loop), // For a large nested struct, try to set a smaller depth to reduce compiling time. option.WithCompileMaxInlineDepth(depth), ) } ``` ### Copy string When decoding **string values without any escaped characters**, sonic references them from the origin JSON buffer instead of mallocing a new buffer to copy. This helps a lot for CPU performance but may leave the whole JSON buffer in memory as long as the decoded objects are being used. In practice, we found the extra memory introduced by referring JSON buffer is usually 20% ~ 80% of decoded objects. Once an application holds these objects for a long time (for example, cache the decoded objects for reusing), its in-use memory on the server may go up. - `Config.CopyString`/`decoder.CopyString()`: We provide the option for `Decode()` / `Unmarshal()` users to choose not to reference the JSON buffer, which may cause a decline in CPU performance to some degree. - `GetFromStringNoCopy()`: For memory safety, `sonic.Get()` / `sonic.GetFromString()` now copies return JSON. If users want to get json more quickly and not care about memory usage, you can use `GetFromStringNoCopy()` to return a JSON directly referenced from source. ### Pass string or []byte? For alignment to `encoding/json`, we provide API to pass `[]byte` as an argument, but the string-to-bytes copy is conducted at the same time considering safety, which may lose performance when the origin JSON is huge. Therefore, you can use `UnmarshalString()` and `GetFromString()` to pass a string, as long as your origin data is a string or **nocopy-cast** is safe for your []byte. We also provide API `MarshalString()` for convenient **nocopy-cast** of encoded JSON []byte, which is safe since sonic's output bytes is always duplicated and unique. ### Accelerate `encoding.TextMarshaler` To ensure data security, sonic.Encoder quotes and escapes string values from `encoding.TextMarshaler` interfaces by default, which may degrade performance much if most of your data is in form of them. We provide `encoder.NoQuoteTextMarshaler` to skip these operations, which means you **MUST** ensure their output string escaped and quoted following [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259). ### Better performance for generic data In **fully-parsed** scenario, `Unmarshal()` performs better than `Get()`+`Node.Interface()`. But if you only have a part of the schema for specific json, you can combine `Get()` and `Unmarshal()` together: ```go import "github.com/bytedance/sonic" node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user") var user User // your partial schema... err = sonic.UnmarshalString(node.Raw(), &user) ``` Even if you don't have any schema, use `ast.Node` as the container of generic values instead of `map` or `interface`: ```go import "github.com/bytedance/sonic" root, err := sonic.GetFromString(_TwitterJson) user := root.GetByPath("statuses", 3, "user") // === root.Get("status").Index(3).Get("user") err = user.Check() // err = user.LoadAll() // only call this when you want to use 'user' concurrently... go someFunc(user) ``` Why? Because `ast.Node` stores its children using `array`: - `Array`'s performance is **much better** than `Map` when Inserting (Deserialize) and Scanning (Serialize) data; - **Hashing** (`map[x]`) is not as efficient as **Indexing** (`array[x]`), which `ast.Node` can conduct on **both array and object**; - Using `Interface()`/`Map()` means Sonic must parse all the underlying values, while `ast.Node` can parse them **on demand**. **CAUTION:** `ast.Node` **DOESN'T** ensure concurrent security directly, due to its **lazy-load** design. However, you can call `Node.Load()`/`Node.LoadAll()` to achieve that, which may bring performance reduction while it still works faster than converting to `map` or `interface{}` ### Ast.Node or Ast.Visitor? For generic data, `ast.Node` should be enough for your needs in most cases. However, `ast.Node` is designed for partially processing JSON string. It has some special designs such as lazy-load which might not be suitable for directly parsing the whole JSON string like `Unmarshal()`. Although `ast.Node` is better then `map` or `interface{}`, it's also a kind of intermediate representation after all if your final types are customized and you have to convert the above types to your custom types after parsing. For better performance, in previous case the `ast.Visitor` will be the better choice. It performs JSON decoding like `Unmarshal()` and you can directly use your final types to represents a JSON AST without any intermediate representations. But `ast.Visitor` is not a very handy API. You might need to write a lot of code to implement your visitor and carefully maintain the tree hierarchy during decoding. Please read the comments in [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) carefully if you decide to use this API. ### Buffer Size Sonic use memory pool in many places like `encoder.Encode`, `ast.Node.MarshalJSON` to improve performance, which may produce more memory usage (in-use) when server's load is high. See [issue 614](https://github.com/bytedance/sonic/issues/614). Therefore, we introduce some options to let user control the behavior of memory pool. See [option](https://pkg.go.dev/github.com/bytedance/sonic@v1.11.9/option#pkg-variables) package. ### Faster JSON Skip For security, sonic use [FSM](native/skip_one.c) algorithm to validate JSON when decoding raw JSON or encoding `json.Marshaler`, which is much slower (1~10x) than [SIMD-searching-pair](native/skip_one_fast.c) algorithm. If user has many redundant JSON value and DO NOT NEED to strictly validate JSON correctness, you can enable below options: - `Config.NoValidateSkipJSON`: for faster skipping JSON when decoding, such as unknown fields, json.Unmarshaler(json.RawMessage), mismatched values, and redundant array elements - `Config.NoValidateJSONMarshaler`: avoid validating JSON when encoding `json.Marshaler` - `SearchOption.ValidateJSON`: indicates if validate located JSON value when `Get` ## JSON-Path Support (GJSON) [tidwall/gjson](https://github.com/tidwall/gjson) has provided a comprehensive and popular JSON-Path API, and a lot of older codes heavily relies on it. Therefore, we provides a wrapper library, which combines gjson's API with sonic's SIMD algorithm to boost up the performance. See [cloudwego/gjson](https://github.com/cloudwego/gjson). ## Community Sonic is a subproject of [CloudWeGo](https://www.cloudwego.io/). We are committed to building a cloud native ecosystem. ================================================ FILE: README_ZH_CN.md ================================================ # Sonic [English](README.md) | 中文 一个速度奇快的 JSON 序列化/反序列化库,由 JIT (即时编译)和 SIMD (单指令流多数据流)加速。 ## 依赖 - Go: 1.18~1.25 - 注意:Go1.24.0 由于 [issue](https://github.com/golang/go/issues/71672) 不可用,请升级到更高 Go 版本,或添加编译选项 `--ldflags="-checklinkname=0"` - OS: Linux / MacOS / Windows - CPU: AMD64 / (ARM64, 需要 Go1.20 以上) ## 接口 详见 [go.dev](https://pkg.go.dev/github.com/bytedance/sonic) ## 特色 - 运行时对象绑定,无需代码生成 - 完备的 JSON 操作 API - 快,更快,还要更快! ## 基准测试 对于**所有大小**的 json 和**所有使用场景**, **Sonic 表现均为最佳**。 - [中型](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13kB, 300+ 键, 6 层) ```powershell goversion: 1.17.1 goos: darwin goarch: amd64 cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz BenchmarkEncoder_Generic_Sonic-16 32393 ns/op 402.40 MB/s 11965 B/op 4 allocs/op BenchmarkEncoder_Generic_Sonic_Fast-16 21668 ns/op 601.57 MB/s 10940 B/op 4 allocs/op BenchmarkEncoder_Generic_JsonIter-16 42168 ns/op 309.12 MB/s 14345 B/op 115 allocs/op BenchmarkEncoder_Generic_GoJson-16 65189 ns/op 199.96 MB/s 23261 B/op 16 allocs/op BenchmarkEncoder_Generic_StdLib-16 106322 ns/op 122.60 MB/s 49136 B/op 789 allocs/op BenchmarkEncoder_Binding_Sonic-16 6269 ns/op 2079.26 MB/s 14173 B/op 4 allocs/op BenchmarkEncoder_Binding_Sonic_Fast-16 5281 ns/op 2468.16 MB/s 12322 B/op 4 allocs/op BenchmarkEncoder_Binding_JsonIter-16 20056 ns/op 649.93 MB/s 9488 B/op 2 allocs/op BenchmarkEncoder_Binding_GoJson-16 8311 ns/op 1568.32 MB/s 9481 B/op 1 allocs/op BenchmarkEncoder_Binding_StdLib-16 16448 ns/op 792.52 MB/s 9479 B/op 1 allocs/op BenchmarkEncoder_Parallel_Generic_Sonic-16 6681 ns/op 1950.93 MB/s 12738 B/op 4 allocs/op BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16 4179 ns/op 3118.99 MB/s 10757 B/op 4 allocs/op BenchmarkEncoder_Parallel_Generic_JsonIter-16 9861 ns/op 1321.84 MB/s 14362 B/op 115 allocs/op BenchmarkEncoder_Parallel_Generic_GoJson-16 18850 ns/op 691.52 MB/s 23278 B/op 16 allocs/op BenchmarkEncoder_Parallel_Generic_StdLib-16 45902 ns/op 283.97 MB/s 49174 B/op 789 allocs/op BenchmarkEncoder_Parallel_Binding_Sonic-16 1480 ns/op 8810.09 MB/s 13049 B/op 4 allocs/op BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16 1209 ns/op 10785.23 MB/s 11546 B/op 4 allocs/op BenchmarkEncoder_Parallel_Binding_JsonIter-16 6170 ns/op 2112.58 MB/s 9504 B/op 2 allocs/op BenchmarkEncoder_Parallel_Binding_GoJson-16 3321 ns/op 3925.52 MB/s 9496 B/op 1 allocs/op BenchmarkEncoder_Parallel_Binding_StdLib-16 3739 ns/op 3486.49 MB/s 9480 B/op 1 allocs/op BenchmarkDecoder_Generic_Sonic-16 66812 ns/op 195.10 MB/s 57602 B/op 723 allocs/op BenchmarkDecoder_Generic_Sonic_Fast-16 54523 ns/op 239.07 MB/s 49786 B/op 313 allocs/op BenchmarkDecoder_Generic_StdLib-16 124260 ns/op 104.90 MB/s 50869 B/op 772 allocs/op BenchmarkDecoder_Generic_JsonIter-16 91274 ns/op 142.81 MB/s 55782 B/op 1068 allocs/op BenchmarkDecoder_Generic_GoJson-16 88569 ns/op 147.17 MB/s 66367 B/op 973 allocs/op BenchmarkDecoder_Binding_Sonic-16 32557 ns/op 400.38 MB/s 28302 B/op 137 allocs/op BenchmarkDecoder_Binding_Sonic_Fast-16 28649 ns/op 455.00 MB/s 24999 B/op 34 allocs/op BenchmarkDecoder_Binding_StdLib-16 111437 ns/op 116.97 MB/s 10576 B/op 208 allocs/op BenchmarkDecoder_Binding_JsonIter-16 35090 ns/op 371.48 MB/s 14673 B/op 385 allocs/op BenchmarkDecoder_Binding_GoJson-16 28738 ns/op 453.59 MB/s 22039 B/op 49 allocs/op BenchmarkDecoder_Parallel_Generic_Sonic-16 12321 ns/op 1057.91 MB/s 57233 B/op 723 allocs/op BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16 10644 ns/op 1224.64 MB/s 49362 B/op 313 allocs/op BenchmarkDecoder_Parallel_Generic_StdLib-16 57587 ns/op 226.35 MB/s 50874 B/op 772 allocs/op BenchmarkDecoder_Parallel_Generic_JsonIter-16 38666 ns/op 337.12 MB/s 55789 B/op 1068 allocs/op BenchmarkDecoder_Parallel_Generic_GoJson-16 30259 ns/op 430.79 MB/s 66370 B/op 974 allocs/op BenchmarkDecoder_Parallel_Binding_Sonic-16 5965 ns/op 2185.28 MB/s 27747 B/op 137 allocs/op BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16 5170 ns/op 2521.31 MB/s 24715 B/op 34 allocs/op BenchmarkDecoder_Parallel_Binding_StdLib-16 27582 ns/op 472.58 MB/s 10576 B/op 208 allocs/op BenchmarkDecoder_Parallel_Binding_JsonIter-16 13571 ns/op 960.51 MB/s 14685 B/op 385 allocs/op BenchmarkDecoder_Parallel_Binding_GoJson-16 10031 ns/op 1299.51 MB/s 22111 B/op 49 allocs/op BenchmarkGetOne_Sonic-16 3276 ns/op 3975.78 MB/s 24 B/op 1 allocs/op BenchmarkGetOne_Gjson-16 9431 ns/op 1380.81 MB/s 0 B/op 0 allocs/op BenchmarkGetOne_Jsoniter-16 51178 ns/op 254.46 MB/s 27936 B/op 647 allocs/op BenchmarkGetOne_Parallel_Sonic-16 216.7 ns/op 60098.95 MB/s 24 B/op 1 allocs/op BenchmarkGetOne_Parallel_Gjson-16 1076 ns/op 12098.62 MB/s 0 B/op 0 allocs/op BenchmarkGetOne_Parallel_Jsoniter-16 17741 ns/op 734.06 MB/s 27945 B/op 647 allocs/op BenchmarkSetOne_Sonic-16 9571 ns/op 1360.61 MB/s 1584 B/op 17 allocs/op BenchmarkSetOne_Sjson-16 36456 ns/op 357.22 MB/s 52180 B/op 9 allocs/op BenchmarkSetOne_Jsoniter-16 79475 ns/op 163.86 MB/s 45862 B/op 964 allocs/op BenchmarkSetOne_Parallel_Sonic-16 850.9 ns/op 15305.31 MB/s 1584 B/op 17 allocs/op BenchmarkSetOne_Parallel_Sjson-16 18194 ns/op 715.77 MB/s 52247 B/op 9 allocs/op BenchmarkSetOne_Parallel_Jsoniter-16 33560 ns/op 388.05 MB/s 45892 B/op 964 allocs/op BenchmarkLoadNode/LoadAll()-16 11384 ns/op 1143.93 MB/s 6307 B/op 25 allocs/op BenchmarkLoadNode_Parallel/LoadAll()-16 5493 ns/op 2370.68 MB/s 7145 B/op 25 allocs/op BenchmarkLoadNode/Interface()-16 17722 ns/op 734.85 MB/s 13323 B/op 88 allocs/op BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.70 MB/s 15178 B/op 88 allocs/op ``` - [小型](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 个键, 3 层) ![small benchmarks](./docs/imgs/bench-small.png) - [大型](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635kB, 10000+ 个键, 6 层) ![large benchmarks](./docs/imgs/bench-large.png) 要查看基准测试代码,请参阅 [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) 。 ## 工作原理 请参阅 [INTRODUCTION_ZH_CN.md](./docs/INTRODUCTION_ZH_CN.md). ## 使用方式 ### 序列化/反序列化 默认的行为基本上与 `encoding/json` 相一致,除了 HTML 转义形式(参见 [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) 和 `SortKeys` 功能(参见 [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys))**没有**遵循 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 。 ```go import "github.com/bytedance/sonic" var data YourSchema // Marshal output, err := sonic.Marshal(&data) // Unmarshal err := sonic.Unmarshal(output, &data) ``` ### 流式输入输出 Sonic 支持解码 `io.Reader` 中输入的 json,或将对象编码为 json 后输出至 `io.Writer`,以处理多个值并减少内存消耗。 - 编码器 ```go var o1 = map[string]interface{}{ "a": "b", } var o2 = 1 var w = bytes.NewBuffer(nil) var enc = sonic.ConfigDefault.NewEncoder(w) enc.Encode(o1) enc.Encode(o2) fmt.Println(w.String()) // Output: // {"a":"b"} // 1 ``` - 解码器 ```go var o = map[string]interface{}{} var r = strings.NewReader(`{"a":"b"}{"1":"2"}`) var dec = sonic.ConfigDefault.NewDecoder(r) dec.Decode(&o) dec.Decode(&o) fmt.Printf("%+v", o) // Output: // map[1:2 a:b] ``` ### 使用 `Number` / `int64` ```go import "github.com/bytedance/sonic/decoder" var input = `1` var data interface{} // default float64 dc := decoder.NewDecoder(input) dc.Decode(&data) // data == float64(1) // use json.Number dc = decoder.NewDecoder(input) dc.UseNumber() dc.Decode(&data) // data == json.Number("1") // use int64 dc = decoder.NewDecoder(input) dc.UseInt64() dc.Decode(&data) // data == int64(1) root, err := sonic.GetFromString(input) // Get json.Number jn := root.Number() jm := root.InterfaceUseNumber().(json.Number) // jn == jm // Get float64 fn := root.Float64() fm := root.Interface().(float64) // jn == jm ``` ### 对键排序 考虑到排序带来的性能损失(约 10% ), sonic 默认不会启用这个功能。如果你的组件依赖这个行为(如 [zstd](https://github.com/facebook/zstd)) ,可以仿照下面的例子: ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/encoder" // Binding map only m := map[string]interface{}{} v, err := encoder.Encode(m, encoder.SortMapKeys) // Or ast.Node.SortKeys() before marshal var root := sonic.Get(JSON) err := root.SortKeys() ``` ### HTML 转义 考虑到性能损失(约15%), sonic 默认不会启用这个功能。你可以使用 `encoder.EscapeHTML` 选项来开启(与 `encoding/json.HTMLEscape` 行为一致)。 ```go import "github.com/bytedance/sonic" v := map[string]string{"&&":"<>"} ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"}}` ``` ### 紧凑格式 Sonic 默认将基本类型( `struct` , `map` 等)编码为紧凑格式的 JSON ,除非使用 `json.RawMessage` or `json.Marshaler` 进行编码: sonic 确保输出的 JSON 合法,但出于性能考虑,**不会**加工成紧凑格式。我们提供选项 `encoder.CompactMarshaler` 来添加此过程, ### 打印错误 如果输入的 JSON 存在无效的语法,sonic 将返回 `decoder.SyntaxError`,该错误支持错误位置的美化输出。 ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/decoder" var data interface{} err := sonic.UnmarshalString("[[[}]]", &data) if err != nil { /* One line by default */ println(e.Error()) // "Syntax error at index 3: invalid char\n\n\t[[[}]]\n\t...^..\n" /* Pretty print */ if e, ok := err.(decoder.SyntaxError); ok { /*Syntax error at index 3: invalid char [[[}]] ...^.. */ print(e.Description()) } else if me, ok := err.(*decoder.MismatchTypeError); ok { // decoder.MismatchTypeError is new to Sonic v1.6.0 print(me.Description()) } } ``` #### 类型不匹配 [Sonic v1.6.0] 如果给定键中存在**类型不匹配**的值, sonic 会抛出 `decoder.MismatchTypeError` (如果有多个,只会报告最后一个),但仍会跳过错误的值并解码下一个 JSON 。 ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/decoder" var data = struct{ A int B int }{} err := UnmarshalString(`{"A":"1","B":1}`, &data) println(err.Error()) // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n" fmt.Printf("%+v", data) // {A:0 B:1} ``` ### `Ast.Node` Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改JSON数据的鲁棒的 API。 #### 查找/索引 通过给定的路径搜索 JSON 片段,路径必须为非负整数,字符串或 `nil` 。 ```go import "github.com/bytedance/sonic" input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`) // no path, returns entire json root, err := sonic.Get(input) raw := root.Raw() // == string(input) // multiple paths root, err := sonic.Get(input, "key1", 1, "key2") sub := root.Get("key3").Index(2).Int64() // == 3 ``` **注意**:由于 `Index()` 使用偏移量来定位数据,比使用扫描的 `Get()` 要快的多,建议尽可能的使用 `Index` 。 Sonic 也提供了另一个 API, `IndexOrGet()` ,以偏移量为基础并且也确保键的匹配。 #### 查找选项 `ast.Searcher`提供了一些选项,以满足用户的不同需求: ```go opts := ast.SearchOption{CopyReturn: true…} val, err := sonic.GetWithOptions(JSON, opts, "key") ``` - CopyReturn 指示搜索器复制结果JSON字符串,而不是从输入引用。如果用户缓存结果,这有助于减少内存使用 - ConcurentRead 因为`ast.Node`使用`Lazy-Load`设计,默认不支持并发读取。如果您想同时读取,请指定它。 - ValidateJSON 指示搜索器来验证整个JSON。默认情况下启用该选项, 但是对于查找速度有一定影响。 #### 修改 使用 `Set()` / `Unset()` 修改 json 的内容 ```go import "github.com/bytedance/sonic" // Set exist, err := root.Set("key4", NewBool(true)) // exist == false alias1 := root.Get("key4") println(alias1.Valid()) // true alias2 := root.Index(1) println(alias1 == alias2) // true // Unset exist, err := root.UnsetByIndex(1) // exist == true println(root.Get("key4").Check()) // "value not exist" ``` #### 序列化 要将 `ast.Node` 编码为 json ,使用 `MarshalJson()` 或者 `json.Marshal()` (必须传递指向节点的指针) ```go import ( "encoding/json" "github.com/bytedance/sonic" ) buf, err := root.MarshalJson() println(string(buf)) // {"key1":[{},{"key2":{"key3":[1,2,3]}}]} exp, err := json.Marshal(&root) // WARN: use pointer println(string(buf) == string(exp)) // true ``` #### APIs - 合法性检查: `Check()`, `Error()`, `Valid()`, `Exist()` - 索引: `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()` - 转换至 go 内置类型: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()` - go 类型打包: `NewRaw()`, `NewNumber()`, `NewNull()`, `NewBool()`, `NewString()`, `NewObject()`, `NewArray()` - 迭代: `Values()`, `Properties()`, `ForEach()`, `SortKeys()` - 修改: `Set()`, `SetByIndex()`, `Add()` ### `Ast.Visitor` Sonic 提供了一个高级的 API 用于直接全量解析 JSON 到非标准容器里 (既不是 `struct` 也不是 `map[string]interface{}`) 且不需要借助任何中间表示 (`ast.Node` 或 `interface{}`)。举个例子,你可能定义了下述的类型,它们看起来像 `interface{}`,但实际上并不是: ```go type UserNode interface {} // the following types implement the UserNode interface. type ( UserNull struct{} UserBool struct{ Value bool } UserInt64 struct{ Value int64 } UserFloat64 struct{ Value float64 } UserString struct{ Value string } UserObject struct{ Value map[string]UserNode } UserArray struct{ Value []UserNode } ) ``` Sonic 提供了下述的 API 来返回 **“对 JSON AST 的前序遍历”**。`ast.Visitor` 是一个 SAX 风格的接口,这在某些 C++ 的 JSON 解析库中被使用到。你需要自己实现一个 `ast.Visitor`,将它传递给 `ast.Preorder()` 方法。在你的实现中你可以使用自定义的类型来表示 JSON 的值。在你的 `ast.Visitor` 中,可能需要有一个 O(n) 空间复杂度的容器(比如说栈)来记录 object / array 的层级。 ```go func Preorder(str string, visitor Visitor, opts *VisitorOptions) error type Visitor interface { OnNull() error OnBool(v bool) error OnString(v string) error OnInt64(v int64, n json.Number) error OnFloat64(v float64, n json.Number) error OnObjectBegin(capacity int) error OnObjectKey(key string) error OnObjectEnd() error OnArrayBegin(capacity int) error OnArrayEnd() error } ``` 详细用法参看 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go),我们还为 `UserNode` 实现了一个示例 `ast.Visitor`,你可以在 [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go) 中找到它。 ## 兼容性 对于想要使用sonic来满足不同场景的开发人员,我们提供了一些集成配置: - `ConfigDefault`: sonic的默认配置 (`EscapeHTML=false`, `SortKeys=false`…) 保证性能同时兼顾安全性。 - `ConfigStd`: 与 `encoding/json` 保证完全兼容的配置 - `ConfigFastest`: 最快的配置(`NoQuoteTextMarshaler=true...`) 保证性能最优但是会缺少一些安全性检查(validate UTF8 等) Sonic **不**确保支持所有环境,由于开发高性能代码的困难。在不支持sonic的环境中,实现将回落到 `encoding/json`。因此上述配置将全部等于`ConfigStd`。 ## 注意事项 ### 预热 由于 Sonic 使用 [golang-asm](https://github.com/twitchyliquid64/golang-asm) 作为 JIT 汇编器,这个库并不适用于运行时编译,第一次运行一个大型模式可能会导致请求超时甚至进程内存溢出。为了更好地稳定性,我们建议在运行大型模式或在内存有限的应用中,在使用 `Marshal()/Unmarshal()` 前运行 `Pretouch()`。 ```go import ( "reflect" "github.com/bytedance/sonic" "github.com/bytedance/sonic/option" ) func init() { var v HugeStruct // For most large types (nesting depth <= option.DefaultMaxInlineDepth) err := sonic.Pretouch(reflect.TypeOf(v)) // with more CompileOption... err := sonic.Pretouch(reflect.TypeOf(v), // If the type is too deep nesting (nesting depth > option.DefaultMaxInlineDepth), // you can set compile recursive loops in Pretouch for better stability in JIT. option.WithCompileRecursiveDepth(loop), // For a large nested struct, try to set a smaller depth to reduce compiling time. option.WithCompileMaxInlineDepth(depth), ) } ``` ### 拷贝字符串 当解码 **没有转义字符的字符串**时, sonic 会从原始的 JSON 缓冲区内引用而不是复制到新的一个缓冲区中。这对 CPU 的性能方面很有帮助,但是可能因此在解码后对象仍在使用的时候将整个 JSON 缓冲区保留在内存中。实践中我们发现,通过引用 JSON 缓冲区引入的额外内存通常是解码后对象的 20% 至 80% ,一旦应用长期保留这些对象(如缓存以备重用),服务器所使用的内存可能会增加。我们提供了选项 `decoder.CopyString()` 供用户选择,不引用 JSON 缓冲区。这可能在一定程度上降低 CPU 性能。 ### 传递字符串还是字节数组? 为了和 `encoding/json` 保持一致,我们提供了传递 `[]byte` 作为参数的 API ,但考虑到安全性,字符串到字节的复制是同时进行的,这在原始 JSON 非常大时可能会导致性能损失。因此,你可以使用 `UnmarshalString()` 和 `GetFromString()` 来传递字符串,只要你的原始数据是字符串,或**零拷贝类型转换**对于你的字节数组是安全的。我们也提供了 `MarshalString()` 的 API ,以便对编码的 JSON 字节数组进行**零拷贝类型转换**,因为 sonic 输出的字节始终是重复并且唯一的,所以这样是安全的。 ### 加速 `encoding.TextMarshaler` 为了保证数据安全性, `sonic.Encoder` 默认会对来自 `encoding.TextMarshaler` 接口的字符串进行引用和转义,如果大部分数据都是这种形式那可能会导致很大的性能损失。我们提供了 `encoder.NoQuoteTextMarshaler` 选项来跳过这些操作,但你**必须**保证他们的输出字符串依照 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 进行了转义和引用。 ### 泛型的性能优化 在 **完全解析**的场景下, `Unmarshal()` 表现得比 `Get()`+`Node.Interface()` 更好。但是如果你只有特定 JSON 的部分模式,你可以将 `Get()` 和 `Unmarshal()` 结合使用: ```go import "github.com/bytedance/sonic" node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user") var user User // your partial schema... err = sonic.UnmarshalString(node.Raw(), &user) ``` 甚至如果你没有任何模式,可以用 `ast.Node` 代替 `map` 或 `interface` 作为泛型的容器: ```go import "github.com/bytedance/sonic" root, err := sonic.GetFromString(_TwitterJson) user := root.GetByPath("statuses", 3, "user") // === root.Get("status").Index(3).Get("user") err = user.Check() // err = user.LoadAll() // only call this when you want to use 'user' concurrently... go someFunc(user) ``` 为什么?因为 `ast.Node` 使用 `array` 来存储其子节点: - 在插入(反序列化)和扫描(序列化)数据时,`Array` 的性能比 `Map` **好得多**; - **哈希**(`map[x]`)的效率不如**索引**(`array[x]`)高效,而 `ast.Node` 可以在数组和对象上使用索引; - 使用 `Interface()` / `Map()` 意味着 sonic 必须解析所有的底层值,而 `ast.Node` 可以**按需解析**它们。 **注意**:由于 `ast.Node` 的惰性加载设计,其**不能**直接保证并发安全性,但你可以调用 `Node.Load()` / `Node.LoadAll()` 来实现并发安全。尽管可能会带来性能损失,但仍比转换成 `map` 或 `interface{}` 更为高效。 ### 使用 `ast.Node` 还是 `ast.Visitor`? 对于泛型数据的解析,`ast.Node` 在大多数场景上应该能够满足你的需求。 然而,`ast.Node` 是一种针对部分解析 JSON 而设计的泛型容器,它包含一些特殊设计,比如惰性加载,如果你希望像 `Unmarshal()` 那样直接解析整个 JSON,这些设计可能并不合适。尽管 `ast.Node` 相较于 `map` 或 `interface{}` 来说是更好的一种泛型容器,但它毕竟也是一种中间表示,如果你的最终类型是自定义的,你还得在解析完成后将上述类型转化成你自定义的类型。 在上述场景中,如果想要有更极致的性能,`ast.Visitor` 会是更好的选择。它采用和 `Unmarshal()` 类似的形式解析 JSON,并且你可以直接使用你的最终类型去表示 JSON AST,而不需要经过额外的任何中间表示。 但是,`ast.Visitor` 并不是一个很易用的 API。你可能需要写大量的代码去实现自己的 `ast.Visitor`,并且需要在解析过程中仔细维护树的层级。如果你决定要使用这个 API,请先仔细阅读 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) 中的注释。 ### 缓冲区大小 Sonic在许多地方使用内存池,如`encoder.Encode`, `ast.Node.MarshalJSON`等来提高性能,这可能会在服务器负载高时产生更多的内存使用(in-use)。参见[issue 614](https://github.com/bytedance/sonic/issues/614)。因此,我们引入了一些选项来让用户配置内存池的行为。参见[option](https://pkg.go.dev/github.com/bytedance/sonic@v1.11.9/option#pkg-variables)包。 ### 更快的 JSON Skip 为了安全起见,在跳过原始JSON 时,sonic decoder 默认使用[FSM](native/skip_one.c)算法扫描来跳过同时校验 JSON。它相比[SIMD-searching-pair](native/skip_one_fast.c)算法跳过要慢得多(1~10倍)。如果用户有很多冗余的JSON值,并且不需要严格验证JSON的正确性,你可以启用以下选项: - `Config.NoValidateSkipJSON`: 用于在解码时更快地跳过JSON,例如未知字段,`json.RawMessage`,不匹配的值和冗余的数组元素等 - `Config.NoValidateJSONMarshaler`: 编码JSON时避免验证JSON。封送拆收器 - `SearchOption.ValidateJSON`: 指示当`Get`时是否验证定位的JSON值 ## 社区 Sonic 是 [CloudWeGo](https://www.cloudwego.io/) 下的一个子项目。我们致力于构建云原生生态系统。 ================================================ FILE: api.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package sonic import ( "io" "github.com/bytedance/sonic/ast" "github.com/bytedance/sonic/internal/rt" ) const ( // UseStdJSON indicates you are using fallback implementation (encoding/json) UseStdJSON = iota // UseSonicJSON indicates you are using real sonic implementation UseSonicJSON ) // APIKind is the kind of API, 0 is std json, 1 is sonic. const APIKind = apiKind // Config is a combination of sonic/encoder.Options and sonic/decoder.Options type Config struct { // EscapeHTML indicates encoder to escape all HTML characters // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape). // WARNING: This hurts performance A LOT, USE WITH CARE. EscapeHTML bool // SortMapKeys indicates encoder that the keys of a map needs to be sorted // before serializing into JSON. // WARNING: This hurts performance A LOT, USE WITH CARE. SortMapKeys bool // CompactMarshaler indicates encoder that the output JSON from json.Marshaler // is always compact and needs no validation CompactMarshaler bool // NoQuoteTextMarshaler indicates encoder that the output text from encoding.TextMarshaler // is always escaped string and needs no quoting NoQuoteTextMarshaler bool // NoNullSliceOrMap indicates encoder that all empty Array or Object are encoded as '[]' or '{}', // instead of 'null' NoNullSliceOrMap bool // UseInt64 indicates decoder to unmarshal an integer into an interface{} as an // int64 instead of as a float64. UseInt64 bool // UseNumber indicates decoder to unmarshal a number into an interface{} as a // json.Number instead of as a float64. UseNumber bool // UseUnicodeErrors indicates decoder to return an error when encounter invalid // UTF-8 escape sequences. UseUnicodeErrors bool // DisallowUnknownFields indicates decoder to return an error when the destination // is a struct and the input contains object keys which do not match any // non-ignored, exported fields in the destination. DisallowUnknownFields bool // CopyString indicates decoder to decode string values by copying instead of referring. CopyString bool // ValidateString indicates decoder and encoder to validate string values: decoder will return errors // when unescaped control chars(\u0000-\u001f) in the string value of JSON. ValidateString bool // NoValidateJSONMarshaler indicates that the encoder should not validate the output string // after encoding the JSONMarshaler to JSON. NoValidateJSONMarshaler bool // NoValidateJSONSkip indicates the decoder should not validate the JSON value when skipping it, // such as unknown-fields, mismatched-type, redundant elements.. NoValidateJSONSkip bool // NoEncoderNewline indicates that the encoder should not add a newline after every message NoEncoderNewline bool // Encode Infinity or Nan float into `null`, instead of returning an error. EncodeNullForInfOrNan bool // CaseSensitive indicates that the decoder should not ignore the case of object keys. CaseSensitive bool } var ( // ConfigDefault is the default config of APIs, aiming at efficiency and safety. ConfigDefault = Config{}.Froze() // ConfigStd is the standard config of APIs, aiming at being compatible with encoding/json. ConfigStd = Config{ EscapeHTML: true, SortMapKeys: true, CompactMarshaler: true, CopyString: true, ValidateString: true, }.Froze() // ConfigFastest is the fastest config of APIs, aiming at speed. ConfigFastest = Config{ NoValidateJSONMarshaler: true, NoValidateJSONSkip: true, }.Froze() ) // API is a binding of specific config. // This interface is inspired by github.com/json-iterator/go, // and has same behaviors under equivalent config. type API interface { // MarshalToString returns the JSON encoding string of v MarshalToString(v interface{}) (string, error) // Marshal returns the JSON encoding bytes of v. Marshal(v interface{}) ([]byte, error) // MarshalIndent returns the JSON encoding bytes with indent and prefix. MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) // UnmarshalFromString parses the JSON-encoded bytes and stores the result in the value pointed to by v. UnmarshalFromString(str string, v interface{}) error // Unmarshal parses the JSON-encoded string and stores the result in the value pointed to by v. Unmarshal(data []byte, v interface{}) error // NewEncoder create a Encoder holding writer NewEncoder(writer io.Writer) Encoder // NewDecoder create a Decoder holding reader NewDecoder(reader io.Reader) Decoder // Valid validates the JSON-encoded bytes and reports if it is valid Valid(data []byte) bool } // Encoder encodes JSON into io.Writer type Encoder interface { // Encode writes the JSON encoding of v to the stream, followed by a newline character. Encode(val interface{}) error // SetEscapeHTML specifies whether problematic HTML characters // should be escaped inside JSON quoted strings. // The default behavior NOT ESCAPE SetEscapeHTML(on bool) // SetIndent instructs the encoder to format each subsequent encoded value // as if indented by the package-level function Indent(dst, src, prefix, indent). // Calling SetIndent("", "") disables indentation SetIndent(prefix, indent string) } // Decoder decodes JSON from io.Read type Decoder interface { // Decode reads the next JSON-encoded value from its input and stores it in the value pointed to by v. Decode(val interface{}) error // Buffered returns a reader of the data remaining in the Decoder's buffer. // The reader is valid until the next call to Decode. Buffered() io.Reader // DisallowUnknownFields causes the Decoder to return an error when the destination is a struct // and the input contains object keys which do not match any non-ignored, exported fields in the destination. DisallowUnknownFields() // More reports whether there is another element in the current array or object being parsed. More() bool // UseNumber causes the Decoder to unmarshal a number into an interface{} as a Number instead of as a float64. UseNumber() } // Marshal returns the JSON encoding bytes of v. func Marshal(val interface{}) ([]byte, error) { return ConfigDefault.Marshal(val) } // MarshalIndent is like Marshal but applies Indent to format the output. // Each JSON element in the output will begin on a new line beginning with prefix // followed by one or more copies of indent according to the indentation nesting. func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) { return ConfigDefault.MarshalIndent(v, prefix, indent) } // MarshalString returns the JSON encoding string of v. func MarshalString(val interface{}) (string, error) { return ConfigDefault.MarshalToString(val) } // Unmarshal parses the JSON-encoded data and stores the result in the value pointed to by v. // NOTICE: This API copies given buffer by default, // if you want to pass JSON more efficiently, use UnmarshalString instead. func Unmarshal(buf []byte, val interface{}) error { return ConfigDefault.Unmarshal(buf, val) } // UnmarshalString is like Unmarshal, except buf is a string. func UnmarshalString(buf string, val interface{}) error { return ConfigDefault.UnmarshalFromString(buf, val) } // Get searches and locates the given path from src json, // and returns a ast.Node representing the partially json. // // Each path arg must be integer or string: // - Integer is target index(>=0), means searching current node as array. // - String is target key, means searching current node as object. // // Notice: It expects the src json is **Well-formed** and **Immutable** when calling, // otherwise it may return unexpected result. // Considering memory safety, the returned JSON is **Copied** from the input func Get(src []byte, path ...interface{}) (ast.Node, error) { return GetCopyFromString(rt.Mem2Str(src), path...) } // GetWithOptions searches and locates the given path from src json, // with specific options of ast.Searcher func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) { s := ast.NewSearcher(rt.Mem2Str(src)) s.SearchOptions = opts return s.GetByPath(path...) } // GetFromString is same with Get except src is string. // // WARNING: The returned JSON is **Referenced** from the input. // Caching or long-time holding the returned node may cause OOM. // If your src is big, consider use GetFromStringCopy(). func GetFromString(src string, path ...interface{}) (ast.Node, error) { return ast.NewSearcher(src).GetByPath(path...) } // GetCopyFromString is same with Get except src is string func GetCopyFromString(src string, path ...interface{}) (ast.Node, error) { return ast.NewSearcher(src).GetByPathCopy(path...) } // Valid reports whether data is a valid JSON encoding. func Valid(data []byte) bool { return ConfigDefault.Valid(data) } // Valid reports whether data is a valid JSON encoding. func ValidString(data string) bool { return ConfigDefault.Valid(rt.Str2Mem(data)) } ================================================ FILE: api_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package sonic import ( "testing" "github.com/stretchr/testify/require" ) func TestValid(t *testing.T) { require.False(t, Valid(nil)) testCase := []struct { data string expected bool }{ {``, false}, {`s`, false}, {`{`, false}, {`[`, false}, {`[1,2`, false}, {`{"so":nic"}`, false}, {`null`, true}, {`""`, true}, {`1`, true}, {`"sonic"`, true}, {`{}`, true}, {`[]`, true}, {`[1,2]`, true}, {`{"so":"nic"}`, true}, } for _, tc := range testCase { require.Equal(t, tc.expected, Valid([]byte(tc.data)), tc.data) } } func TestIdent(t *testing.T) { foo := struct { Name string Age int }{ Name: "sonic", Age: 20, } out, err := MarshalIndent(&foo, "", " ") require.Nil(t, err) require.Equal(t, `{ "Name": "sonic", "Age": 20 }`, string(out)) } ================================================ FILE: ast/api.go ================================================ //go:build (amd64 && go1.17 && !go1.27) || (arm64 && go1.20 && !go1.27) // +build amd64,go1.17,!go1.27 arm64,go1.20,!go1.27 /* * Copyright 2022 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "runtime" "unsafe" "github.com/bytedance/sonic/encoder" "github.com/bytedance/sonic/internal/encoder/alg" "github.com/bytedance/sonic/internal/native" "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/internal/rt" "github.com/bytedance/sonic/utf8" ) var typeByte = rt.UnpackEface(byte(0)).Type func quote(buf *[]byte, val string) { *buf = alg.Quote(*buf, val, false) } func (self *Parser) decodeValue() (val types.JsonState) { sv := (*rt.GoString)(unsafe.Pointer(&self.s)) flag := types.F_USE_NUMBER if self.dbuf != nil { flag = 0 val.Dbuf = self.dbuf val.Dcap = types.MaxDigitNums } self.p = native.Value(sv.Ptr, sv.Len, self.p, &val, uint64(flag)) return } func (self *Parser) skip() (int, types.ParsingError) { fsm := types.NewStateMachine() start := native.SkipOne(&self.s, &self.p, fsm, 0) types.FreeStateMachine(fsm) if start < 0 { return self.p, types.ParsingError(-start) } return start, 0 } func (self *Node) encodeInterface(buf *[]byte) error { //WARN: NOT compatible with json.Encoder return encoder.EncodeInto(buf, self.packAny(), encoder.NoEncoderNewline) } func (self *Parser) skipFast() (int, types.ParsingError) { start := native.SkipOneFast(&self.s, &self.p) if start < 0 { return self.p, types.ParsingError(-start) } return start, 0 } func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) { var fsm *types.StateMachine if validate { fsm = types.NewStateMachine() } start := native.GetByPath(&self.s, &self.p, &path, fsm) if validate { types.FreeStateMachine(fsm) } runtime.KeepAlive(path) if start < 0 { return self.p, types.ParsingError(-start) } return start, 0 } func validate_utf8(str string) bool { return utf8.ValidateString(str) } ================================================ FILE: ast/api_compat.go ================================================ //go:build (!amd64 && !arm64) || go1.27 || !go1.17 || (arm64 && !go1.20) // +build !amd64,!arm64 go1.27 !go1.17 arm64,!go1.20 /* * Copyright 2022 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "encoding/json" "unicode/utf8" "github.com/bytedance/sonic/internal/compat" "github.com/bytedance/sonic/internal/native/types" ) func init() { compat.Warn("sonic/ast") } func quote(buf *[]byte, val string) { quoteString(buf, val) } func (self *Parser) decodeValue() (val types.JsonState) { e, v := decodeValue(self.s, self.p, self.dbuf == nil) if e < 0 { return v } self.p = e return v } func (self *Parser) skip() (int, types.ParsingError) { e, s := skipValue(self.s, self.p) if e < 0 { return self.p, types.ParsingError(-e) } self.p = e return s, 0 } func (self *Parser) skipFast() (int, types.ParsingError) { e, s := skipValueFast(self.s, self.p) if e < 0 { return self.p, types.ParsingError(-e) } self.p = e return s, 0 } func (self *Node) encodeInterface(buf *[]byte) error { out, err := json.Marshal(self.packAny()) if err != nil { return err } *buf = append(*buf, out...) return nil } func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) { for _, p := range path { if idx, ok := p.(int); ok && idx >= 0 { if err := self.searchIndex(idx); err != 0 { return self.p, err } } else if key, ok := p.(string); ok { if err := self.searchKey(key); err != 0 { return self.p, err } } else { panic("path must be either int(>=0) or string") } } var start int var e types.ParsingError if validate { start, e = self.skip() } else { start, e = self.skipFast() } if e != 0 { return self.p, e } return start, 0 } func validate_utf8(str string) bool { return utf8.ValidString(str) } ================================================ FILE: ast/api_native_test.go ================================================ //go:build (amd64 && go1.17 && !go1.27) || (arm64 && go1.20 && !go1.27) // +build amd64,go1.17,!go1.27 arm64,go1.20,!go1.27 /* * Copyright 2022 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "encoding/json" "fmt" "reflect" "runtime" "runtime/debug" "testing" "github.com/bytedance/sonic/encoder" "github.com/stretchr/testify/require" ) func TestSortNodeTwitter(t *testing.T) { if encoder.EnableFallback { return } root, err := NewSearcher(_TwitterJson).GetByPath() if err != nil { t.Fatal(err) } obj, err := root.MapUseNumber() if err != nil { t.Fatal(err) } exp, err := encoder.Encode(obj, encoder.SortMapKeys|encoder.NoEncoderNewline) if err != nil { t.Fatal(err) } var expObj interface{} require.NoError(t, json.Unmarshal(exp, &expObj)) if err := root.SortKeys(true); err != nil { t.Fatal(err) } act, err := root.MarshalJSON() if err != nil { t.Fatal(err) } var actObj interface{} require.NoError(t, json.Unmarshal(act, &actObj)) require.Equal(t, expObj, actObj) require.Equal(t, len(exp), len(act)) require.Equal(t, string(exp), string(act)) } func TestNodeAny(t *testing.T) { empty := Node{} _, err := empty.SetAny("any", map[string]interface{}{"a": []int{0}}) if err != nil { t.Fatal(err) } if m, err := empty.Get("any").Interface(); err != nil { t.Fatal(err) } else if v, ok := m.(map[string]interface{}); !ok { t.Fatal(v) } if buf, err := empty.MarshalJSON(); err != nil { t.Fatal(err) } else if string(buf) != `{"any":{"a":[0]}}` { t.Fatal(string(buf)) } if _, err := empty.Set("any2", Node{}); err != nil { t.Fatal(err) } if err := empty.Get("any2").AddAny(nil); err != nil { t.Fatal(err) } if buf, err := empty.MarshalJSON(); err != nil { t.Fatal(err) } else if string(buf) != `{"any":{"a":[0]},"any2":[null]}` { t.Fatal(string(buf)) } if _, err := empty.Get("any2").SetAnyByIndex(0, NewNumber("-0.0")); err != nil { t.Fatal(err) } if buf, err := empty.MarshalJSON(); err != nil { t.Fatal(err) } else if string(buf) != `{"any":{"a":[0]},"any2":[-0.0]}` { t.Fatal(string(buf)) } } func TestTypeCast2(t *testing.T) { type tcase struct { method string node Node exp interface{} err error } var cases = []tcase{ {"Raw", NewAny(""), "\"\"", nil}, } for i, c := range cases { fmt.Println(i, c) rt := reflect.ValueOf(&c.node) m := rt.MethodByName(c.method) rets := m.Call([]reflect.Value{}) if len(rets) != 2 { t.Fatal(i, rets) } require.Equal(t, c.exp, rets[0].Interface()) v := rets[1].Interface() if v != c.err { t.Fatal(i, v) } } } func TestStackAny(t *testing.T) { var obj = stackObj() any := NewAny(obj) fmt.Printf("any: %#v\n", any) runtime.GC() debug.FreeOSMemory() println("finish GC") buf, err := any.MarshalJSON() println("finish marshal") if err != nil { t.Fatal(err) } if string(buf) != "1" { t.Fatal(string(buf)) } } func Test_Export(t *testing.T) { type args struct { src string path []interface{} } tests := []struct { name string args args wantStart int wantEnd int wantTyp int wantErr bool wantValid bool }{ {"bool", args{`[true ,2]`, []interface{}{0}}, 1, 5, V_TRUE, false, true}, {"bool", args{`[t2ue ,2]`, []interface{}{0}}, 1, 5, V_TRUE, false, false}, {"number", args{`[1 ,2]`, []interface{}{0}}, 1, 2, V_NUMBER, false, true}, {"number", args{`[1w ,2]`, []interface{}{0}}, 1, 3, V_NUMBER, false, false}, {"string", args{`[" " ,2]`, []interface{}{0}}, 1, 4, V_STRING, false, true}, {"string", args{`[" "] ,2]`, []interface{}{0}}, 1, 4, V_STRING, false, true}, {"object", args{`[{"":""} ,2]`, []interface{}{0}}, 1, 8, V_OBJECT, false, true}, {"object", args{`[{x} ,2]`, []interface{}{0}}, 1, 4, V_OBJECT, false, false}, {"array", args{`[[{}] ,2]`, []interface{}{0}}, 1, 5, V_ARRAY, false, true}, {"array", args{`[[xx] ,2]`, []interface{}{0}}, 1, 5, V_ARRAY, false, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { gotStart, gotEnd, gotTyp, err := _GetByPath(tt.args.src, tt.args.path...) if (err != nil) != tt.wantErr { t.Errorf("_GetByPath() error = %v, wantErr %v", err, tt.wantErr) return } if gotStart != tt.wantStart { t.Errorf("_GetByPath() gotStart = %v, want %v", gotStart, tt.wantStart) } if gotEnd != tt.wantEnd { t.Errorf("_GetByPath() gotEnd = %v, want %v", gotEnd, tt.wantEnd) } if gotTyp != tt.wantTyp { t.Errorf("_GetByPath() gotTyp = %v, want %v", gotTyp, tt.wantTyp) } gotStart, gotEnd, err = _SkipFast(tt.args.src, tt.wantStart) if (err != nil) != tt.wantErr { t.Errorf("_SkipFast() error = %v, wantErr %v", err, tt.wantErr) return } if gotStart != tt.wantStart { t.Errorf("_SkipFast() gotStart = %v, want %v", gotStart, tt.wantStart) } if gotEnd != tt.wantEnd { t.Errorf("_SkipFast() gotEnd = %v, want %v", gotEnd, tt.wantEnd) } valid := _ValidSyntax(tt.args.src[tt.wantStart:tt.wantEnd]) if valid != tt.wantValid { t.Errorf("_ValidSyntax() gotValid = %v, want %v", valid, tt.wantValid) } }) } } ================================================ FILE: ast/asm.s ================================================ ================================================ FILE: ast/buffer.go ================================================ /** * Copyright 2023 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "sort" "unsafe" "github.com/bytedance/sonic/internal/caching" ) type nodeChunk [_DEFAULT_NODE_CAP]Node type linkedNodes struct { head nodeChunk tail []*nodeChunk size int } func (self *linkedNodes) Cap() int { if self == nil { return 0 } return (len(self.tail) + 1) * _DEFAULT_NODE_CAP } func (self *linkedNodes) Len() int { if self == nil { return 0 } return self.size } func (self *linkedNodes) At(i int) *Node { if self == nil { return nil } if i >= 0 && i < self.size && i < _DEFAULT_NODE_CAP { return &self.head[i] } else if i >= _DEFAULT_NODE_CAP && i < self.size { a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP if a < len(self.tail) { return &self.tail[a][b] } } return nil } func (self *linkedNodes) MoveOne(source int, target int) { if source == target { return } if source < 0 || source >= self.size || target < 0 || target >= self.size { return } // reserve source n := *self.At(source) if source < target { // move every element (source,target] one step back for i := source; i < target; i++ { *self.At(i) = *self.At(i + 1) } } else { // move every element [target,source) one step forward for i := source; i > target; i-- { *self.At(i) = *self.At(i - 1) } } // set target *self.At(target) = n } func (self *linkedNodes) Pop() { if self == nil || self.size == 0 { return } self.Set(self.size-1, Node{}) self.size-- } func (self *linkedNodes) Push(v Node) { self.Set(self.size, v) } func (self *linkedNodes) Set(i int, v Node) { if i < _DEFAULT_NODE_CAP { self.head[i] = v if self.size <= i { self.size = i + 1 } return } a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP if a < 0 { self.head[b] = v } else { self.growTailLength(a + 1) var n = &self.tail[a] if *n == nil { *n = new(nodeChunk) } (*n)[b] = v } if self.size <= i { self.size = i + 1 } } func (self *linkedNodes) growTailLength(l int) { if l <= len(self.tail) { return } c := cap(self.tail) for c < l { c += 1 + c>>_APPEND_GROW_SHIFT } if c == cap(self.tail) { self.tail = self.tail[:l] return } tmp := make([]*nodeChunk, l, c) copy(tmp, self.tail) self.tail = tmp } func (self *linkedNodes) ToSlice(con []Node) { if len(con) < self.size { return } i := (self.size - 1) a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP if a < 0 { copy(con, self.head[:b+1]) return } else { copy(con, self.head[:]) con = con[_DEFAULT_NODE_CAP:] } for i := 0; i < a; i++ { copy(con, self.tail[i][:]) con = con[_DEFAULT_NODE_CAP:] } copy(con, self.tail[a][:b+1]) } func (self *linkedNodes) FromSlice(con []Node) { self.size = len(con) i := self.size - 1 a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP if a < 0 { copy(self.head[:b+1], con) return } else { copy(self.head[:], con) con = con[_DEFAULT_NODE_CAP:] } if cap(self.tail) <= a { c := (a + 1) + (a+1)>>_APPEND_GROW_SHIFT self.tail = make([]*nodeChunk, a+1, c) } self.tail = self.tail[:a+1] for i := 0; i < a; i++ { self.tail[i] = new(nodeChunk) copy(self.tail[i][:], con) con = con[_DEFAULT_NODE_CAP:] } self.tail[a] = new(nodeChunk) copy(self.tail[a][:b+1], con) } type pairChunk [_DEFAULT_NODE_CAP]Pair type linkedPairs struct { index map[uint64]int head pairChunk tail []*pairChunk size int } func (self *linkedPairs) BuildIndex() { if self.index == nil { self.index = make(map[uint64]int, self.size) } for i := 0; i < self.size; i++ { p := self.At(i) self.index[p.hash] = i } } func (self *linkedPairs) Cap() int { if self == nil { return 0 } return (len(self.tail) + 1) * _DEFAULT_NODE_CAP } func (self *linkedPairs) Len() int { if self == nil { return 0 } return self.size } func (self *linkedPairs) At(i int) *Pair { if self == nil { return nil } if i >= 0 && i < _DEFAULT_NODE_CAP && i < self.size { return &self.head[i] } else if i >= _DEFAULT_NODE_CAP && i < self.size { a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP if a < len(self.tail) { return &self.tail[a][b] } } return nil } func (self *linkedPairs) Push(v Pair) { self.Set(self.size, v) } func (self *linkedPairs) Pop() { if self == nil || self.size == 0 { return } self.Unset(self.size - 1) self.size-- } func (self *linkedPairs) Unset(i int) { if self.index != nil { p := self.At(i) delete(self.index, p.hash) } self.set(i, Pair{}) } func (self *linkedPairs) Set(i int, v Pair) { if self.index != nil { h := v.hash self.index[h] = i } self.set(i, v) } func (self *linkedPairs) set(i int, v Pair) { if i < _DEFAULT_NODE_CAP { self.head[i] = v if self.size <= i { self.size = i + 1 } return } a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP if a < 0 { self.head[b] = v } else { self.growTailLength(a + 1) var n = &self.tail[a] if *n == nil { *n = new(pairChunk) } (*n)[b] = v } if self.size <= i { self.size = i + 1 } } func (self *linkedPairs) growTailLength(l int) { if l <= len(self.tail) { return } c := cap(self.tail) for c < l { c += 1 + c>>_APPEND_GROW_SHIFT } if c == cap(self.tail) { self.tail = self.tail[:l] return } tmp := make([]*pairChunk, l, c) copy(tmp, self.tail) self.tail = tmp } // linear search func (self *linkedPairs) Get(key string) (*Pair, int) { if self.index != nil { // fast-path i, ok := self.index[caching.StrHash(key)] if ok { n := self.At(i) if n.Key == key { return n, i } // hash conflicts goto linear_search } else { return nil, -1 } } linear_search: for i := 0; i < self.size; i++ { if n := self.At(i); n.Key == key { return n, i } } return nil, -1 } func (self *linkedPairs) ToSlice(con []Pair) { if len(con) < self.size { return } i := self.size - 1 a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP if a < 0 { copy(con, self.head[:b+1]) return } else { copy(con, self.head[:]) con = con[_DEFAULT_NODE_CAP:] } for i := 0; i < a; i++ { copy(con, self.tail[i][:]) con = con[_DEFAULT_NODE_CAP:] } copy(con, self.tail[a][:b+1]) } func (self *linkedPairs) ToMap(con map[string]Node) { for i := 0; i < self.size; i++ { n := self.At(i) con[n.Key] = n.Value } } func (self *linkedPairs) copyPairs(to []Pair, from []Pair, l int) { copy(to, from) if self.index != nil { for i := 0; i < l; i++ { // NOTICE: in case of user not pass hash, just cal it h := caching.StrHash(from[i].Key) from[i].hash = h self.index[h] = i } } } func (self *linkedPairs) FromSlice(con []Pair) { self.size = len(con) i := self.size - 1 a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP if a < 0 { self.copyPairs(self.head[:b+1], con, b+1) return } else { self.copyPairs(self.head[:], con, len(self.head)) con = con[_DEFAULT_NODE_CAP:] } if cap(self.tail) <= a { c := (a + 1) + (a+1)>>_APPEND_GROW_SHIFT self.tail = make([]*pairChunk, a+1, c) } self.tail = self.tail[:a+1] for i := 0; i < a; i++ { self.tail[i] = new(pairChunk) self.copyPairs(self.tail[i][:], con, len(self.tail[i])) con = con[_DEFAULT_NODE_CAP:] } self.tail[a] = new(pairChunk) self.copyPairs(self.tail[a][:b+1], con, b+1) } func (self *linkedPairs) Less(i, j int) bool { return lessFrom(self.At(i).Key, self.At(j).Key, 0) } func (self *linkedPairs) Swap(i, j int) { a, b := self.At(i), self.At(j) if self.index != nil { self.index[a.hash] = j self.index[b.hash] = i } *a, *b = *b, *a } func (self *linkedPairs) Sort() { sort.Stable(self) } // Compare two strings from the pos d. func lessFrom(a, b string, d int) bool { l := len(a) if l > len(b) { l = len(b) } for i := d; i < l; i++ { if a[i] == b[i] { continue } return a[i] < b[i] } return len(a) < len(b) } type parseObjectStack struct { parser Parser v linkedPairs } type parseArrayStack struct { parser Parser v linkedNodes } func newLazyArray(p *Parser) Node { s := new(parseArrayStack) s.parser = *p return Node{ t: _V_ARRAY_LAZY, p: unsafe.Pointer(s), } } func newLazyObject(p *Parser) Node { s := new(parseObjectStack) s.parser = *p return Node{ t: _V_OBJECT_LAZY, p: unsafe.Pointer(s), } } func (self *Node) getParserAndArrayStack() (*Parser, *parseArrayStack) { stack := (*parseArrayStack)(self.p) return &stack.parser, stack } func (self *Node) getParserAndObjectStack() (*Parser, *parseObjectStack) { stack := (*parseObjectStack)(self.p) return &stack.parser, stack } ================================================ FILE: ast/buffer_test.go ================================================ /** * Copyright 2023 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "strconv" "testing" "github.com/stretchr/testify/require" ) func makeNodes(l int) []Node { r := make([]Node, l) for i := 0; i < l; i++ { r[i] = NewBool(true) } return r } func makePairs(l int) []Pair { r := make([]Pair, l) for i := 0; i < l; i++ { r[i] = NewPair(strconv.Itoa(i), NewBool(true)) } return r } func Test_linkedPairs_Push(t *testing.T) { type args struct { in []Pair v Pair exp []Pair } tests := []struct { name string args args }{ { name: "add empty", args: args{ in: []Pair{}, v: NewPair("a", NewBool(true)), exp: []Pair{NewPair("a", NewBool(true))}, }, }, { name: "add one", args: args{ in: []Pair{NewPair("a", NewBool(false))}, v: NewPair("b", NewBool(true)), exp: []Pair{NewPair("a", NewBool(false)), NewPair("b", NewBool(true))}, }, }, { name: "add _DEFAULT_NODE_CAP", args: args{ in: makePairs(_DEFAULT_NODE_CAP), v: NewPair(strconv.Itoa(_DEFAULT_NODE_CAP), NewBool(true)), exp: makePairs(_DEFAULT_NODE_CAP + 1), }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { self := &linkedPairs{} self.FromSlice(tt.args.in) self.Push(tt.args.v) act := make([]Pair, self.Len()) self.ToSlice(act) require.Equal(t, tt.args.exp, act) }) } } func Test_linkedNodes_Push(t *testing.T) { type args struct { in []Node v Node exp []Node } tests := []struct { name string args args }{ { name: "add empty", args: args{ in: []Node{}, v: NewBool(true), exp: []Node{NewBool(true)}, }, }, { name: "add one", args: args{ in: []Node{NewBool(false)}, v: NewBool(true), exp: []Node{NewBool(false), NewBool(true)}, }, }, { name: "add _DEFAULT_NODE_CAP", args: args{ in: makeNodes(_DEFAULT_NODE_CAP), v: NewBool(true), exp: makeNodes(_DEFAULT_NODE_CAP + 1), }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { self := &linkedNodes{} self.FromSlice(tt.args.in) self.Push(tt.args.v) act := make([]Node, self.Len()) self.ToSlice(act) require.Equal(t, tt.args.exp, act) }) } } func Test_linkedNodes_Pop(t *testing.T) { type args struct { in []Node exp []Node } tests := []struct { name string args args }{ { name: "remove empty", args: args{ in: []Node{}, exp: []Node{}, }, }, { name: "remove one", args: args{ in: []Node{NewBool(false)}, exp: []Node{}, }, }, { name: "add _DEFAULT_NODE_CAP", args: args{ in: makeNodes(_DEFAULT_NODE_CAP), exp: makeNodes(_DEFAULT_NODE_CAP - 1), }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { self := &linkedNodes{} self.FromSlice(tt.args.in) self.Pop() act := make([]Node, self.Len()) self.ToSlice(act) require.Equal(t, tt.args.exp, act) }) } } func Test_linkedNodes_MoveOne(t *testing.T) { type args struct { in []Node source int target int exp []Node } tests := []struct { name string args args }{ { name: "over index", args: args{ in: []Node{NewBool(true)}, source: 1, target: 0, exp: []Node{NewBool(true)}, }, }, { name: "equal index", args: args{ in: []Node{NewBool(true)}, source: 0, target: 0, exp: []Node{NewBool(true)}, }, }, { name: "forward index", args: args{ in: []Node{NewString("a"), NewString("b"), NewString("c")}, source: 0, target: 2, exp: []Node{NewString("b"), NewString("c"), NewString("a")}, }, }, { name: "backward index", args: args{ in: []Node{NewString("a"), NewString("b"), NewString("c")}, source: 2, target: 1, exp: []Node{NewString("a"), NewString("c"), NewString("b")}, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { self := &linkedNodes{} self.FromSlice(tt.args.in) self.MoveOne(tt.args.source, tt.args.target) act := make([]Node, self.Len()) self.ToSlice(act) require.Equal(t, tt.args.exp, act) }) } } ================================================ FILE: ast/decode.go ================================================ /* * Copyright 2022 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "encoding/base64" "runtime" "strconv" "unsafe" "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/internal/rt" "github.com/bytedance/sonic/internal/utils" "github.com/bytedance/sonic/unquote" ) var bytesNull = []byte("null") const ( strNull = "null" bytesTrue = "true" bytesFalse = "false" bytesObject = "{}" bytesArray = "[]" ) //go:nocheckptr func skipBlank(src string, pos int) int { se := uintptr(rt.IndexChar(src, len(src))) sp := uintptr(rt.IndexChar(src, pos)) for sp < se { if !utils.IsSpace(*(*byte)(unsafe.Pointer(sp))) { break } sp += 1 } if sp >= se { return -int(types.ERR_EOF) } runtime.KeepAlive(src) return int(sp - uintptr(rt.IndexChar(src, 0))) } func decodeNull(src string, pos int) (ret int) { ret = pos + 4 if ret > len(src) { return -int(types.ERR_EOF) } if src[pos:ret] == strNull { return ret } else { return -int(types.ERR_INVALID_CHAR) } } func decodeTrue(src string, pos int) (ret int) { ret = pos + 4 if ret > len(src) { return -int(types.ERR_EOF) } if src[pos:ret] == bytesTrue { return ret } else { return -int(types.ERR_INVALID_CHAR) } } func decodeFalse(src string, pos int) (ret int) { ret = pos + 5 if ret > len(src) { return -int(types.ERR_EOF) } if src[pos:ret] == bytesFalse { return ret } return -int(types.ERR_INVALID_CHAR) } //go:nocheckptr func decodeString(src string, pos int) (ret int, v string) { ret, ep := skipString(src, pos) if ep == -1 { (*rt.GoString)(unsafe.Pointer(&v)).Ptr = rt.IndexChar(src, pos+1) (*rt.GoString)(unsafe.Pointer(&v)).Len = ret - pos - 2 return ret, v } result, err := unquote.String(src[pos:ret]) if err != 0 { return -int(types.ERR_INVALID_CHAR), "" } runtime.KeepAlive(src) return ret, result } func decodeBinary(src string, pos int) (ret int, v []byte) { var vv string ret, vv = decodeString(src, pos) if ret < 0 { return ret, nil } var err error v, err = base64.StdEncoding.DecodeString(vv) if err != nil { return -int(types.ERR_INVALID_CHAR), nil } return ret, v } func isDigit(c byte) bool { return c >= '0' && c <= '9' } //go:nocheckptr func decodeInt64(src string, pos int) (ret int, v int64, err error) { sp := uintptr(rt.IndexChar(src, pos)) ss := uintptr(sp) se := uintptr(rt.IndexChar(src, len(src))) if uintptr(sp) >= se { return -int(types.ERR_EOF), 0, nil } if c := *(*byte)(unsafe.Pointer(sp)); c == '-' { sp += 1 } if sp == se { return -int(types.ERR_EOF), 0, nil } for ; sp < se; sp += uintptr(1) { if !isDigit(*(*byte)(unsafe.Pointer(sp))) { break } } if sp < se { if c := *(*byte)(unsafe.Pointer(sp)); c == '.' || c == 'e' || c == 'E' { return -int(types.ERR_INVALID_NUMBER_FMT), 0, nil } } var vv string ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)) (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss) (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos v, err = strconv.ParseInt(vv, 10, 64) if err != nil { //NOTICE: allow overflow here if err.(*strconv.NumError).Err == strconv.ErrRange { return ret, 0, err } return -int(types.ERR_INVALID_CHAR), 0, err } runtime.KeepAlive(src) return ret, v, nil } func isNumberChars(c byte) bool { return (c >= '0' && c <= '9') || c == '+' || c == '-' || c == 'e' || c == 'E' || c == '.' } //go:nocheckptr func decodeFloat64(src string, pos int) (ret int, v float64, err error) { sp := uintptr(rt.IndexChar(src, pos)) ss := uintptr(sp) se := uintptr(rt.IndexChar(src, len(src))) if uintptr(sp) >= se { return -int(types.ERR_EOF), 0, nil } if c := *(*byte)(unsafe.Pointer(sp)); c == '-' { sp += 1 } if sp == se { return -int(types.ERR_EOF), 0, nil } for ; sp < se; sp += uintptr(1) { if !isNumberChars(*(*byte)(unsafe.Pointer(sp))) { break } } var vv string ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)) (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss) (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos v, err = strconv.ParseFloat(vv, 64) if err != nil { //NOTICE: allow overflow here if err.(*strconv.NumError).Err == strconv.ErrRange { return ret, 0, err } return -int(types.ERR_INVALID_CHAR), 0, err } runtime.KeepAlive(src) return ret, v, nil } func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState) { pos = skipBlank(src, pos) if pos < 0 { return pos, types.JsonState{Vt: types.ValueType(pos)} } switch c := src[pos]; c { case 'n': ret = decodeNull(src, pos) if ret < 0 { return ret, types.JsonState{Vt: types.ValueType(ret)} } return ret, types.JsonState{Vt: types.V_NULL} case '"': var ep int ret, ep = skipString(src, pos) if ret < 0 { return ret, types.JsonState{Vt: types.ValueType(ret)} } return ret, types.JsonState{Vt: types.V_STRING, Iv: int64(pos + 1), Ep: ep} case '{': return pos + 1, types.JsonState{Vt: types.V_OBJECT} case '[': return pos + 1, types.JsonState{Vt: types.V_ARRAY} case 't': ret = decodeTrue(src, pos) if ret < 0 { return ret, types.JsonState{Vt: types.ValueType(ret)} } return ret, types.JsonState{Vt: types.V_TRUE} case 'f': ret = decodeFalse(src, pos) if ret < 0 { return ret, types.JsonState{Vt: types.ValueType(ret)} } return ret, types.JsonState{Vt: types.V_FALSE} case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': if skipnum { ret = skipNumber(src, pos) if ret >= 0 { return ret, types.JsonState{Vt: types.V_DOUBLE, Iv: 0, Ep: pos} } else { return ret, types.JsonState{Vt: types.ValueType(ret)} } } else { var iv int64 ret, iv, _ = decodeInt64(src, pos) if ret >= 0 { return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos} } else if ret != -int(types.ERR_INVALID_NUMBER_FMT) { return ret, types.JsonState{Vt: types.ValueType(ret)} } var fv float64 ret, fv, _ = decodeFloat64(src, pos) if ret >= 0 { return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos} } else { return ret, types.JsonState{Vt: types.ValueType(ret)} } } default: return -int(types.ERR_INVALID_CHAR), types.JsonState{Vt: -types.ValueType(types.ERR_INVALID_CHAR)} } } //go:nocheckptr func skipNumber(src string, pos int) (ret int) { return utils.SkipNumber(src, pos) } //go:nocheckptr func skipString(src string, pos int) (ret int, ep int) { if pos+1 >= len(src) { return -int(types.ERR_EOF), -1 } sp := uintptr(rt.IndexChar(src, pos)) se := uintptr(rt.IndexChar(src, len(src))) // not start with quote if *(*byte)(unsafe.Pointer(sp)) != '"' { return -int(types.ERR_INVALID_CHAR), -1 } sp += 1 ep = -1 for sp < se { c := *(*byte)(unsafe.Pointer(sp)) if c == '\\' { if ep == -1 { ep = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)) } sp += 2 continue } sp += 1 if c == '"' { return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)), ep } } runtime.KeepAlive(src) // not found the closed quote until EOF return -int(types.ERR_EOF), -1 } //go:nocheckptr func skipPair(src string, pos int, lchar byte, rchar byte) (ret int) { if pos+1 >= len(src) { return -int(types.ERR_EOF) } sp := uintptr(rt.IndexChar(src, pos)) se := uintptr(rt.IndexChar(src, len(src))) if *(*byte)(unsafe.Pointer(sp)) != lchar { return -int(types.ERR_INVALID_CHAR) } sp += 1 nbrace := 1 inquote := false for sp < se { c := *(*byte)(unsafe.Pointer(sp)) if c == '\\' { sp += 2 continue } else if c == '"' { inquote = !inquote } else if c == lchar { if !inquote { nbrace += 1 } } else if c == rchar { if !inquote { nbrace -= 1 if nbrace == 0 { sp += 1 break } } } sp += 1 } if nbrace != 0 { return -int(types.ERR_INVALID_CHAR) } runtime.KeepAlive(src) return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)) } func skipValueFast(src string, pos int) (ret int, start int) { pos = skipBlank(src, pos) if pos < 0 { return pos, -1 } switch c := src[pos]; c { case 'n': ret = decodeNull(src, pos) case '"': ret, _ = skipString(src, pos) case '{': ret = skipPair(src, pos, '{', '}') case '[': ret = skipPair(src, pos, '[', ']') case 't': ret = decodeTrue(src, pos) case 'f': ret = decodeFalse(src, pos) case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': ret = skipNumber(src, pos) default: ret = -int(types.ERR_INVALID_CHAR) } return ret, pos } func skipValue(src string, pos int) (ret int, start int) { pos = skipBlank(src, pos) if pos < 0 { return pos, -1 } switch c := src[pos]; c { case 'n': ret = decodeNull(src, pos) case '"': ret, _ = skipString(src, pos) case '{': ret, _ = skipObject(src, pos) case '[': ret, _ = skipArray(src, pos) case 't': ret = decodeTrue(src, pos) case 'f': ret = decodeFalse(src, pos) case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': ret = skipNumber(src, pos) default: ret = -int(types.ERR_INVALID_CHAR) } return ret, pos } func skipObject(src string, pos int) (ret int, start int) { start = skipBlank(src, pos) if start < 0 { return start, -1 } if src[start] != '{' { return -int(types.ERR_INVALID_CHAR), -1 } pos = start + 1 pos = skipBlank(src, pos) if pos < 0 { return pos, -1 } if src[pos] == '}' { return pos + 1, start } for { pos, _ = skipString(src, pos) if pos < 0 { return pos, -1 } pos = skipBlank(src, pos) if pos < 0 { return pos, -1 } if src[pos] != ':' { return -int(types.ERR_INVALID_CHAR), -1 } pos++ pos, _ = skipValue(src, pos) if pos < 0 { return pos, -1 } pos = skipBlank(src, pos) if pos < 0 { return pos, -1 } if src[pos] == '}' { return pos + 1, start } if src[pos] != ',' { return -int(types.ERR_INVALID_CHAR), -1 } pos++ pos = skipBlank(src, pos) if pos < 0 { return pos, -1 } } } func skipArray(src string, pos int) (ret int, start int) { start = skipBlank(src, pos) if start < 0 { return start, -1 } if src[start] != '[' { return -int(types.ERR_INVALID_CHAR), -1 } pos = start + 1 pos = skipBlank(src, pos) if pos < 0 { return pos, -1 } if src[pos] == ']' { return pos + 1, start } for { pos, _ = skipValue(src, pos) if pos < 0 { return pos, -1 } pos = skipBlank(src, pos) if pos < 0 { return pos, -1 } if src[pos] == ']' { return pos + 1, start } if src[pos] != ',' { return -int(types.ERR_INVALID_CHAR), -1 } pos++ } } // DecodeString decodes a JSON string from pos and return golang string. // - needEsc indicates if to unescaped escaping chars // - hasEsc tells if the returned string has escaping chars // - validStr enables validating UTF8 charset func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) { p := NewParserObj(src) p.p = pos switch val := p.decodeValue(); val.Vt { case types.V_STRING: str := p.s[val.Iv : p.p-1] if validStr && !validate_utf8(str) { return "", -int(types.ERR_INVALID_UTF8), false } /* fast path: no escape sequence */ if val.Ep == -1 { return str, p.p, false } else if !needEsc { return str, p.p, true } /* unquote the string */ out, err := unquote.String(str) /* check for errors */ if err != 0 { return "", -int(err), true } else { return out, p.p, true } default: return "", -int(_ERR_UNSUPPORT_TYPE), false } } ================================================ FILE: ast/decode_test.go ================================================ /* * Copyright 2022 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "testing" "unicode/utf8" "github.com/bytedance/sonic/internal/rt" ) func Test_DecodeString(t *testing.T) { type args struct { src string pos int needEsc bool validStr bool } invalidstr := rt.Mem2Str([]byte{'"', 193, 255, '"'}) println(utf8.ValidString(invalidstr)) tests := []struct { name string args args wantV string wantRet int wantHasEsc bool }{ {"empty", args{`""`, 0, false, false}, "", 2, false}, {"one", args{`"1"`, 0, false, false}, "1", 3, false}, {"escape", args{`"\\"`, 0, false, false}, `\\`, 4, true}, {"escape", args{`"\\"`, 0, true, true}, `\`, 4, true}, {"uft8", args{`"\u263a"`, 0, false, false}, `\u263a`, 8, true}, {"uft8", args{`"\u263a"`, 0, true, true}, `☺`, 8, true}, {"invalid uft8", args{`"\xx"`, 0, false, false}, `\xx`, 5, true}, {"invalid escape", args{`"\xx"`, 0, false, true}, `\xx`, 5, true}, {"invalid escape", args{`"\xx"`, 0, true, true}, ``, -3, true}, {"invalid string", args{invalidstr, 0, false, false}, invalidstr[1:3], 4, false}, {"invalid string", args{invalidstr, 0, true, true}, "", -10, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { gotV, gotRet, gotHasEsc := _DecodeString(tt.args.src, tt.args.pos, tt.args.needEsc, tt.args.validStr) if gotV != tt.wantV { t.Errorf("_DecodeString() gotV = %v, want %v", gotV, tt.wantV) } if gotRet != tt.wantRet { t.Errorf("_DecodeString() gotRet = %v, want %v", gotRet, tt.wantRet) } if gotHasEsc != tt.wantHasEsc { t.Errorf("_DecodeString() gotHasEsc = %v, want %v", gotHasEsc, tt.wantHasEsc) } }) } } ================================================ FILE: ast/encode.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "sync" "unicode/utf8" "github.com/bytedance/gopkg/lang/dirtmake" "github.com/bytedance/sonic/internal/rt" "github.com/bytedance/sonic/option" ) func quoteString(e *[]byte, s string) { *e = append(*e, '"') start := 0 for i := 0; i < len(s); { if b := s[i]; b < utf8.RuneSelf { if rt.SafeSet[b] { i++ continue } if start < i { *e = append(*e, s[start:i]...) } *e = append(*e, '\\') switch b { case '\\', '"': *e = append(*e, b) case '\n': *e = append(*e, 'n') case '\r': *e = append(*e, 'r') case '\t': *e = append(*e, 't') default: // This encodes bytes < 0x20 except for \t, \n and \r. // If escapeHTML is set, it also escapes <, >, and & // because they can lead to security holes when // user-controlled strings are rendered into JSON // and served to some browsers. *e = append(*e, `u00`...) *e = append(*e, rt.Hex[b>>4]) *e = append(*e, rt.Hex[b&0xF]) } i++ start = i continue } c, size := utf8.DecodeRuneInString(s[i:]) // if c == utf8.RuneError && size == 1 { // if start < i { // e.Write(s[start:i]) // } // e.WriteString(`\ufffd`) // i += size // start = i // continue // } if c == '\u2028' || c == '\u2029' { if start < i { *e = append(*e, s[start:i]...) } *e = append(*e, `\u202`...) *e = append(*e, rt.Hex[c&0xF]) i += size start = i continue } i += size } if start < len(s) { *e = append(*e, s[start:]...) } *e = append(*e, '"') } var bytesPool = sync.Pool{} func (self *Node) MarshalJSON() ([]byte, error) { if self == nil { return bytesNull, nil } // fast path for raw node if self.isRaw() { return rt.Str2Mem(self.toString()), nil } buf := newBuffer() err := self.encode(buf) if err != nil { freeBuffer(buf) return nil, err } var ret []byte if !rt.CanSizeResue(cap(*buf)) { ret = *buf } else { ret = dirtmake.Bytes(len(*buf), len(*buf)) copy(ret, *buf) freeBuffer(buf) } return ret, err } func newBuffer() *[]byte { if ret := bytesPool.Get(); ret != nil { return ret.(*[]byte) } else { buf := make([]byte, 0, option.DefaultAstBufferSize) return &buf } } func freeBuffer(buf *[]byte) { if !rt.CanSizeResue(cap(*buf)) { return } *buf = (*buf)[:0] bytesPool.Put(buf) } func (self *Node) encode(buf *[]byte) error { if self.isRaw() { return self.encodeRaw(buf) } switch int(self.itype()) { case V_NONE: return ErrNotExist case V_ERROR: return self.Check() case V_NULL: return self.encodeNull(buf) case V_TRUE: return self.encodeTrue(buf) case V_FALSE: return self.encodeFalse(buf) case V_ARRAY: return self.encodeArray(buf) case V_OBJECT: return self.encodeObject(buf) case V_STRING: return self.encodeString(buf) case V_NUMBER: return self.encodeNumber(buf) case V_ANY: return self.encodeInterface(buf) default: return ErrUnsupportType } } func (self *Node) encodeRaw(buf *[]byte) error { lock := self.rlock() if !self.isRaw() { self.runlock() return self.encode(buf) } raw := self.toString() if lock { self.runlock() } *buf = append(*buf, raw...) return nil } func (self *Node) encodeNull(buf *[]byte) error { *buf = append(*buf, strNull...) return nil } func (self *Node) encodeTrue(buf *[]byte) error { *buf = append(*buf, bytesTrue...) return nil } func (self *Node) encodeFalse(buf *[]byte) error { *buf = append(*buf, bytesFalse...) return nil } func (self *Node) encodeNumber(buf *[]byte) error { str := self.toString() *buf = append(*buf, str...) return nil } func (self *Node) encodeString(buf *[]byte) error { if self.l == 0 { *buf = append(*buf, '"', '"') return nil } quote(buf, self.toString()) return nil } func (self *Node) encodeArray(buf *[]byte) error { if self.isLazy() { if err := self.skipAllIndex(); err != nil { return err } } nb := self.len() if nb == 0 { *buf = append(*buf, bytesArray...) return nil } *buf = append(*buf, '[') var started bool for i := 0; i < nb; i++ { n := self.nodeAt(i) if !n.Exists() { continue } if started { *buf = append(*buf, ',') } started = true if err := n.encode(buf); err != nil { return err } } *buf = append(*buf, ']') return nil } func (self *Pair) encode(buf *[]byte) error { if len(*buf) == 0 { *buf = append(*buf, '"', '"', ':') return self.Value.encode(buf) } quote(buf, self.Key) *buf = append(*buf, ':') return self.Value.encode(buf) } func (self *Node) encodeObject(buf *[]byte) error { if self.isLazy() { if err := self.skipAllKey(); err != nil { return err } } nb := self.len() if nb == 0 { *buf = append(*buf, bytesObject...) return nil } *buf = append(*buf, '{') var started bool for i := 0; i < nb; i++ { n := self.pairAt(i) if n == nil || !n.Value.Exists() { continue } if started { *buf = append(*buf, ',') } started = true if err := n.encode(buf); err != nil { return err } } *buf = append(*buf, '}') return nil } ================================================ FILE: ast/encode_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "encoding/json" "runtime" "strings" "sync" "testing" "github.com/bytedance/sonic/internal/native/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestGC_Encode(t *testing.T) { if debugSyncGC { return } root, err := NewSearcher(_TwitterJson).GetByPath() if err != nil { t.Fatal(err) } root.LoadAll() _, err = root.MarshalJSON() if err != nil { t.Fatal(err) } wg := &sync.WaitGroup{} N := 10000 for i := 0; i < N; i++ { wg.Add(1) go func(wg *sync.WaitGroup) { defer wg.Done() root, err := NewSearcher(_TwitterJson).GetByPath() if err != nil { t.Error(err) return } root.Load() _, err = root.MarshalJSON() if err != nil { t.Error(err) return } runtime.GC() }(wg) } wg.Wait() } func TestEncodeValue(t *testing.T) { obj := new(_TwitterStruct) if err := json.Unmarshal([]byte(_TwitterJson), obj); err != nil { t.Fatal(err) } // buf, err := encoder.Encode(obj, encoder.EscapeHTML|encoder.SortMapKeys) buf, err := json.Marshal(obj) if err != nil { t.Fatal(err) } quote, err := json.Marshal(_TwitterJson) if err != nil { t.Fatal(err) } type Case struct { node Node exp string err bool } input := []Case{ {NewNull(), "null", false}, {NewBool(true), "true", false}, {NewBool(false), "false", false}, {NewNumber("0.0"), "0.0", false}, {NewString(""), `""`, false}, {NewString(`\"\"`), `"\\\"\\\""`, false}, {NewString(_TwitterJson), string(quote), false}, {NewArray([]Node{}), "[]", false}, {NewArray([]Node{NewString(""), NewNull()}), `["",null]`, false}, {NewArray([]Node{NewBool(true), NewString("true"), NewString("\t")}), `[true,"true","\t"]`, false}, {NewObject([]Pair{NewPair("a", NewNull()), NewPair("b", NewNumber("0"))}), `{"a":null,"b":0}`, false}, {NewObject([]Pair{NewPair("\ta", NewString("\t")), NewPair("\bb", NewString("\b")), NewPair("\nb", NewString("\n")), NewPair("\ra", NewString("\r"))}), `{"\ta":"\t","\u0008b":"\u0008","\nb":"\n","\ra":"\r"}`, false}, {NewObject([]Pair{}), `{}`, false}, {NewObject([]Pair{Pair{Key: "", Value: NewNull()}}), `{"":null}`, false}, {NewBytes([]byte("hello, world")), `"aGVsbG8sIHdvcmxk"`, false}, {NewAny(obj), string(buf), false}, {NewRaw(`[{ }]`), "[{}]", false}, {Node{}, "", true}, {Node{t: types.ValueType(1)}, "", true}, } for i, c := range input { t.Log(i) buf, err := json.Marshal(&c.node) if c.err { if err == nil { t.Fatal(i) } continue } if err != nil { t.Fatal(i, err) } assert.Equal(t, c.exp, string(buf)) } } func BenchmarkNil(b *testing.B) { for i := 0; i < b.N; i++ { null := (*Node)(nil) _, _ = null.MarshalJSON() } } func TestEncodeNode(t *testing.T) { null := (*Node)(nil) js, err := null.MarshalJSON() if err != nil { t.Fatal(err) } if string(js) != "null" { t.Fatal(string(js)) } data := `{"a":[{},[],-0.1,true,false,null,""],"b":0,"c":true,"d":false,"e":null,"g":""}` root, e := NewSearcher(data).GetByPath() if e != nil { t.Fatal(root) } ret, err := root.MarshalJSON() if err != nil { t.Fatal(err) } if string(ret) != data { t.Fatal(string(ret)) } root.skipAllKey() ret, err = root.MarshalJSON() if err != nil { t.Fatal(err) } if string(ret) != data { t.Fatal(string(ret)) } root.Load() ret, err = root.MarshalJSON() if err != nil { t.Fatal(err) } if string(ret) != data { t.Fatal(string(ret)) } } type SortableNode struct { sorted bool *Node } func (j *SortableNode) UnmarshalJSON(data []byte) error { j.Node = new(Node) return j.Node.UnmarshalJSON(data) } func (j *SortableNode) MarshalJSON() ([]byte, error) { if !j.sorted { j.Node.SortKeys(true) j.sorted = true } return j.Node.MarshalJSON() } func TestMarshalSort(t *testing.T) { var data = `{"d":3,"a":{"c":1,"b":2},"e":null}` var obj map[string]*SortableNode require.NoError(t, json.Unmarshal([]byte(data), &obj)) out, err := json.Marshal(obj) require.NoError(t, err) require.Equal(t, `{"a":{"b":2,"c":1},"d":3,"e":null}`, string(out)) out, err = json.Marshal(obj) require.NoError(t, err) require.Equal(t, `{"a":{"b":2,"c":1},"d":3,"e":null}`, string(out)) } func BenchmarkEncodeRaw_Sonic(b *testing.B) { data := _TwitterJson root, e := NewSearcher(data).GetByPath() if e != nil { b.Fatal(root) } _, err := root.MarshalJSON() if err != nil { b.Fatal(err) } b.SetBytes(int64(len(data))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := root.MarshalJSON() if err != nil { b.Fatal(err) } } } func BenchmarkEncodeSkip_Sonic(b *testing.B) { data := _TwitterJson root, e := NewParser(data).Parse() if e != 0 { b.Fatal(root) } root.skipAllKey() _, err := root.MarshalJSON() if err != nil { b.Fatal(err) } b.SetBytes(int64(len(data))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := root.MarshalJSON() if err != nil { b.Fatal(err) } } } func BenchmarkEncodeLoad_Sonic(b *testing.B) { data := _TwitterJson root, e := NewParser(data).Parse() if e != 0 { b.Fatal(root) } root.Load() _, err := root.MarshalJSON() if err != nil { b.Fatal(err) } b.SetBytes(int64(len(data))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := root.MarshalJSON() if err != nil { b.Fatal(err) } } } func TestEncodeNone(t *testing.T) { n := NewObject([]Pair{{Key: "a", Value: Node{}}}) out, err := n.MarshalJSON() require.NoError(t, err) require.Equal(t, "{}", string(out)) n = NewObject([]Pair{{Key: "a", Value: NewNull()}, {Key: "b", Value: Node{}}}) out, err = n.MarshalJSON() require.NoError(t, err) require.Equal(t, `{"a":null}`, string(out)) n = NewArray([]Node{Node{}}) out, err = n.MarshalJSON() require.NoError(t, err) require.Equal(t, "[]", string(out)) n = NewArray([]Node{NewNull(), Node{}}) out, err = n.MarshalJSON() require.NoError(t, err) require.Equal(t, `[null]`, string(out)) } type Path = []interface{} type testGetApi struct { json string path Path } type checkError func(error) bool func isSyntaxError(err error) bool { if err == nil { return false } return strings.HasPrefix(err.Error(), `"Syntax error at index`) } func isEmptySource(err error) bool { if err == nil { return false } return strings.Contains(err.Error(), "no sources available") } func isErrNotExist(err error) bool { return err == ErrNotExist } func isErrUnsupportType(err error) bool { return err == ErrUnsupportType } func testSyntaxJson(t *testing.T, json string, path ...interface{}) { search := NewSearcher(json) _, err := search.GetByPath(path...) assert.True(t, isSyntaxError(err)) } func TestGetFromEmptyJson(t *testing.T) { tests := []testGetApi{ {"", nil}, {"", Path{}}, {"", Path{""}}, {"", Path{0}}, {"", Path{"", ""}}, } for _, test := range tests { f := func(t *testing.T) { search := NewSearcher(test.json) _, err := search.GetByPath(test.path...) assert.True(t, isEmptySource(err)) } t.Run(test.json, f) } } func TestGetFromSyntaxError(t *testing.T) { tests := []testGetApi{ {" \r\n\f\t", Path{}}, {"123.", Path{}}, {"+124", Path{}}, {"-", Path{}}, {"-e123", Path{}}, {"-1.e123", Path{}}, {"-12e456.1", Path{}}, {"-12e.1", Path{}}, {"[", Path{}}, {"{", Path{}}, {"[}", Path{}}, {"{]", Path{}}, {"{,}", Path{}}, {"[,]", Path{}}, {"tru", Path{}}, {"fals", Path{}}, {"nul", Path{}}, {`{"a":"`, Path{"a"}}, {`{"`, Path{}}, {`"`, Path{}}, {`"\"`, Path{}}, {`"\\\"`, Path{}}, {`"hello`, Path{}}, {`{{}}`, Path{}}, {`{[]}`, Path{}}, {`{:,}`, Path{}}, {`{test:error}`, Path{}}, {`{":true}`, Path{}}, {`{"" false}`, Path{}}, {`{ "" : "false }`, Path{}}, {`{"":"",}`, Path{}}, {`{ " test : true}`, Path{}}, {`{ "test" : tru }`, Path{}}, {`{ "test" : true , }`, Path{}}, {`{ {"test" : true , } }`, Path{}}, {`{"test":1. }`, Path{}}, {`{"\\\""`, Path{}}, {`{"\\\"":`, Path{}}, {`{"\\\":",""}`, Path{}}, {`[{]`, Path{}}, {`[tru]`, Path{}}, {`[-1.]`, Path{}}, {`[[]`, Path{}}, {`[[],`, Path{}}, {`[ true , false , [ ]`, Path{}}, {`[true, false, [],`, Path{}}, {`[true, false, [],]`, Path{}}, {`{"key": [true, false, []], "key2": {{}}`, Path{}}, } for _, test := range tests { f := func(t *testing.T) { testSyntaxJson(t, test.json, test.path...) path := append(Path{"key"}, test.path...) testSyntaxJson(t, `{"key":`+test.json, path...) path = append(Path{""}, test.path...) testSyntaxJson(t, `{"":`+test.json, path...) path = append(Path{1}, test.path...) testSyntaxJson(t, `["",`+test.json, path...) } t.Run(test.json, f) } } // NOTE: GetByPath API not validate the undemanded fields for performance. func TestGetWithInvalidUndemandedField(t *testing.T) { type Any = interface{} tests := []struct { json string path Path exp Any }{ {"-0xyz", Path{}, Any(float64(-0))}, {"-12e4xyz", Path{}, Any(float64(-12e4))}, {"truex", Path{}, Any(true)}, {"false,", Path{}, Any(false)}, {`{"a":{,xxx},"b":true}`, Path{"b"}, Any(true)}, {`{"a":[,xxx],"b":true}`, Path{"b"}, Any(true)}, } for _, test := range tests { f := func(t *testing.T) { search := NewSearcher(test.json) node, err := search.GetByPath(test.path...) assert.NoError(t, err) v, err := node.Interface() assert.NoError(t, err) assert.Equal(t, v, test.exp) } t.Run(test.json, f) } } func TestGet_InvalidPathType(t *testing.T) { assert.Panics(t, assert.PanicTestFunc(func() { data := `{"a":[{"b":true}]}` s := NewSearcher(data) s.GetByPath("a", true) s = NewSearcher(data) s.GetByPath("a", nil) s = NewSearcher(data) s.GetByPath("a", -1) })) } ================================================ FILE: ast/error.go ================================================ package ast import ( "fmt" "strings" "unsafe" "github.com/bytedance/sonic/internal/native/types" ) func newError(err types.ParsingError, msg string) *Node { return &Node{ t: V_ERROR, l: uint(err), p: unsafe.Pointer(&msg), } } func newErrorPair(err SyntaxError) *Pair { return &Pair{0, "", *newSyntaxError(err)} } // Error returns error message if the node is invalid func (self Node) Error() string { if self.t != V_ERROR { return "" } else { return *(*string)(self.p) } } func newSyntaxError(err SyntaxError) *Node { msg := err.Description() return &Node{ t: V_ERROR, l: uint(err.Code), p: unsafe.Pointer(&msg), } } func (self *Parser) syntaxError(err types.ParsingError) SyntaxError { return SyntaxError{ Pos: self.p, Src: self.s, Code: err, } } func unwrapError(err error) *Node { if se, ok := err.(*Node); ok { return se } else if sse, ok := err.(Node); ok { return &sse } else { msg := err.Error() return &Node{ t: V_ERROR, p: unsafe.Pointer(&msg), } } } type SyntaxError struct { Pos int Src string Code types.ParsingError Msg string } func (self SyntaxError) Error() string { return fmt.Sprintf("%q", self.Description()) } func (self SyntaxError) Description() string { return "Syntax error " + self.description() } func (self SyntaxError) description() string { i := 16 p := self.Pos - i q := self.Pos + i /* check for empty source */ if self.Src == "" { return fmt.Sprintf("no sources available, the input json is empty: %#v", self) } /* prevent slicing before the beginning */ if p < 0 { p, q, i = 0, q-p, i+p } /* prevent slicing beyond the end */ if n := len(self.Src); q > n { n = q - n q = len(self.Src) /* move the left bound if possible */ if p > n { i += n p -= n } } /* left and right length */ x := clamp_zero(i) y := clamp_zero(q - p - i - 1) /* compose the error description */ return fmt.Sprintf( "at index %d: %s\n\n\t%s\n\t%s^%s\n", self.Pos, self.Message(), self.Src[p:q], strings.Repeat(".", x), strings.Repeat(".", y), ) } func (self SyntaxError) Message() string { if self.Msg == "" { return self.Code.Message() } return self.Msg } func clamp_zero(v int) int { if v < 0 { return 0 } else { return v } } ================================================ FILE: ast/iterator.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "fmt" "github.com/bytedance/sonic/internal/caching" "github.com/bytedance/sonic/internal/native/types" ) type Pair struct { hash uint64 Key string Value Node } func NewPair(key string, val Node) Pair { return Pair{ hash: caching.StrHash(key), Key: key, Value: val, } } // Values returns iterator for array's children traversal func (self *Node) Values() (ListIterator, error) { if err := self.should(types.V_ARRAY); err != nil { return ListIterator{}, err } return self.values(), nil } func (self *Node) values() ListIterator { return ListIterator{Iterator{p: self}} } // Properties returns iterator for object's children traversal func (self *Node) Properties() (ObjectIterator, error) { if err := self.should(types.V_OBJECT); err != nil { return ObjectIterator{}, err } return self.properties(), nil } func (self *Node) properties() ObjectIterator { return ObjectIterator{Iterator{p: self}} } type Iterator struct { i int p *Node } func (self *Iterator) Pos() int { return self.i } func (self *Iterator) Len() int { return self.p.len() } // HasNext reports if it is the end of iteration or has error. func (self *Iterator) HasNext() bool { if !self.p.isLazy() { return self.p.Valid() && self.i < self.p.len() } else if self.p.t == _V_ARRAY_LAZY { return self.p.skipNextNode().Valid() } else if self.p.t == _V_OBJECT_LAZY { pair := self.p.skipNextPair() if pair == nil { return false } return pair.Value.Valid() } return false } // ListIterator is specialized iterator for V_ARRAY type ListIterator struct { Iterator } // ObjectIterator is specialized iterator for V_ARRAY type ObjectIterator struct { Iterator } func (self *ListIterator) next() *Node { next_start: if !self.HasNext() { return nil } else { n := self.p.nodeAt(self.i) self.i++ if !n.Exists() { goto next_start } return n } } // Next scans through children of underlying V_ARRAY, // copies each child to v, and returns .HasNext(). func (self *ListIterator) Next(v *Node) bool { n := self.next() if n == nil { return false } *v = *n return true } func (self *ObjectIterator) next() *Pair { next_start: if !self.HasNext() { return nil } else { n := self.p.pairAt(self.i) self.i++ if n == nil || !n.Value.Exists() { goto next_start } return n } } // Next scans through children of underlying V_OBJECT, // copies each child to v, and returns .HasNext(). func (self *ObjectIterator) Next(p *Pair) bool { n := self.next() if n == nil { return false } *p = *n return true } // Sequence represents scanning path of single-layer nodes. // Index indicates the value's order in both V_ARRAY and V_OBJECT json. // Key is the value's key (for V_OBJECT json only, otherwise it will be nil). type Sequence struct { Index int Key *string // Level int } // String is string representation of one Sequence func (s Sequence) String() string { k := "" if s.Key != nil { k = *s.Key } return fmt.Sprintf("Sequence(%d, %q)", s.Index, k) } type Scanner func(path Sequence, node *Node) bool // ForEach scans one V_OBJECT node's children from JSON head to tail, // and pass the Sequence and Node of corresponding JSON value. // // Especially, if the node is not V_ARRAY or V_OBJECT, // the node itself will be returned and Sequence.Index == -1. // // NOTICE: An unset node WON'T trigger sc, but its index still counts into Path.Index func (self *Node) ForEach(sc Scanner) error { if err := self.checkRaw(); err != nil { return err } switch self.itype() { case types.V_ARRAY: iter, err := self.Values() if err != nil { return err } v := iter.next() for v != nil { if !sc(Sequence{iter.i - 1, nil}, v) { return nil } v = iter.next() } case types.V_OBJECT: iter, err := self.Properties() if err != nil { return err } v := iter.next() for v != nil { if !sc(Sequence{iter.i - 1, &v.Key}, &v.Value) { return nil } v = iter.next() } default: if self.Check() != nil { return self } sc(Sequence{-1, nil}, self) } return nil } ================================================ FILE: ast/iterator_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "fmt" "strconv" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func getTestIteratorSample(loop int) (string, int) { var data []int var v1 = "" var v2 = "" for i := 0; i < loop; i++ { data = append(data, i*i) v1 += strconv.Itoa(i) v2 += `"k` + strconv.Itoa(i) + `":` + strconv.Itoa(i) if i != loop-1 { v1 += `,` v2 += `,` } } return `{"array":[` + v1 + `], "object":{` + v2 + `}}`, loop } func TestForEach(t *testing.T) { pathes := []Sequence{} values := []*Node{} sc := func(path Sequence, node *Node) bool { pathes = append(pathes, path) values = append(values, node) if path.Key != nil && *path.Key == "array" { node.ForEach(func(path Sequence, node *Node) bool { pathes = append(pathes, path) values = append(values, node) return true }) } return true } str, _ := getTestIteratorSample(3) fmt.Println(str) root, err := NewSearcher(str).GetByPath() if err != nil { t.Fatal(err) } err = root.ForEach(sc) if err != nil { t.Fatal(err) } eObjKey := "object" eArrKey := "array" expPath := []Sequence{ {0, &eArrKey}, {0, nil}, {1, nil}, {2, nil}, {1, &eObjKey}, } expValue := []*Node{ root.Get("array"), root.GetByPath("array", 0), root.GetByPath("array", 1), root.GetByPath("array", 2), root.Get("object"), } // fmt.Printf("pathes:%+v\n", pathes) // fmt.Printf("values:%+v\n", values) assert.Equal(t, expPath, pathes) assert.Equal(t, expValue, values) } func TestRawIterator(t *testing.T) { str, loop := getTestIteratorSample(_DEFAULT_NODE_CAP) fmt.Println(str) root, err := NewSearcher(str).GetByPath("array") if err != nil { t.Fatal(err) } ai, _ := root.Values() i := int64(0) for ai.HasNext() { v := &Node{} if !ai.Next(v) { t.Fatalf("no next") } x, _ := v.Int64() if i < int64(loop) && x != i { t.Fatalf("exp:%v, got:%v", i, v) } if i != int64(ai.Pos())-1 || i >= int64(ai.Len()) { t.Fatal(i) } i++ } if i != int64(loop) { t.Fatal(i) } root, err = NewSearcher(str).GetByPath("object") if err != nil { t.Fatal(err) } mi, _ := root.Properties() i = int64(0) for mi.HasNext() { v := &Pair{} if !mi.Next(v) { t.Fatalf("no next") } x, _ := v.Value.Int64() if i < int64(loop) && (x != i || v.Key != fmt.Sprintf("k%d", i)) { vv, _ := v.Value.Interface() t.Fatalf("exp:%v, got:%v", i, vv) } if i != int64(mi.Pos())-1 || i >= int64(mi.Len()) { t.Fatal(i) } i++ } if i != int64(loop) { t.Fatal(i) } } func TestIterator(t *testing.T) { str, loop := getTestIteratorSample(_DEFAULT_NODE_CAP) fmt.Println(str) root, err := NewParser(str).Parse() if err != 0 { t.Fatal(err) } ai, _ := root.Get("array").Values() i := int64(0) for ai.HasNext() { v := &Node{} if !ai.Next(v) { t.Fatalf("no next") } x, _ := v.Int64() if i < int64(loop) && x != i { t.Fatalf("exp:%v, got:%v", i, v) } if i != int64(ai.Pos())-1 || i >= int64(ai.Len()) { t.Fatal(i) } i++ } if i != int64(loop) { t.Fatal(i) } root, err = NewParser(str).Parse() if err != 0 { t.Fatal(err) } mi, _ := root.Get("object").Properties() i = int64(0) for mi.HasNext() { v := &Pair{} if !mi.Next(v) { t.Fatalf("no next") } x, _ := v.Value.Int64() if i < int64(loop) && (x != i || v.Key != fmt.Sprintf("k%d", i)) { vv, _ := v.Value.Interface() t.Fatalf("exp:%v, got:%v", i, vv) } if i != int64(mi.Pos())-1 || i >= int64(mi.Len()) { t.Fatal(i) } i++ } if i != int64(loop) { t.Fatal(i) } str, _ = getTestIteratorSample(0) root, err = NewParser(str).Parse() if err != 0 { t.Fatal(err) } mi, _ = root.Get("object").Properties() if mi.HasNext() { t.Fatalf("should not have next") } } func TestExist(t *testing.T) { n := NewRaw(`null`) if !n.Exists() { t.Fatal() } nn := n.Get("xx") if nn.Exists() { t.Fatal() } root := NewRaw(`{"a":1, "b":[1,2], "c":{"1":1, "2":2}}`) if !root.Exists() { t.Fatal() } exi, err := root.Unset("a") if !exi || err != nil { t.Fatal(exi, err) } root.ForEach(func(path Sequence, node *Node) bool { if path.Key != nil && *path.Key == "a" { t.Fatal() } if path.Index == 0 { if *path.Key != "b" { t.Fatal() } exi, err := node.UnsetByIndex(1) if !exi || err != nil { t.Fatal(exi, err) } node.ForEach(func(path Sequence, node *Node) bool { if path.Index == 1 { t.Fatal() } return true }) } if path.Index == 1 { if *path.Key != "c" { t.Fatal() } exi, err := node.UnsetByIndex(1) if !exi || err != nil { t.Fatal(exi, err) } node.ForEach(func(path Sequence, node *Node) bool { if path.Index == 1 { t.Fatal() } return true }) } return true }) out, err := root.Raw() if err != nil { t.Fatal(err) } require.Equal(t, `{"b":[1],"c":{"1":1}}`, out) } func BenchmarkArrays(b *testing.B) { for i := 0; i < b.N; i++ { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 1, "entities", "hashtags") if err != nil { b.Fatal(err) } a, _ := root.Array() for _, v := range a { _ = v } } } func BenchmarkListIterator(b *testing.B) { for i := 0; i < b.N; i++ { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 1, "entities", "hashtags") if err != nil { b.Fatal(err) } it, _ := root.Values() for it.HasNext() { v := &Node{} if !it.Next(v) { b.Fatalf("no value") } } } } func BenchmarkMap(b *testing.B) { for i := 0; i < b.N; i++ { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 1, "user") if err != nil { b.Fatal(err) } m, _ := root.Map() for k, v := range m { _ = v _ = k } } } func BenchmarkObjectIterator(b *testing.B) { for i := 0; i < b.N; i++ { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 1, "user") if err != nil { b.Fatal(err) } it, _ := root.Properties() for it.HasNext() { v := &Pair{} if !it.Next(v) { b.Fatalf("no value") } } } } ================================================ FILE: ast/node.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "encoding/json" "fmt" "strconv" "sync" "sync/atomic" "unsafe" "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/internal/rt" ) const ( _V_NONE types.ValueType = 0 _V_NODE_BASE types.ValueType = 1 << 5 _V_LAZY types.ValueType = 1 << 7 _V_RAW types.ValueType = 1 << 8 _V_NUMBER = _V_NODE_BASE + 1 _V_ANY = _V_NODE_BASE + 2 _V_ARRAY_LAZY = _V_LAZY | types.V_ARRAY _V_OBJECT_LAZY = _V_LAZY | types.V_OBJECT _MASK_LAZY = _V_LAZY - 1 _MASK_RAW = _V_RAW - 1 ) const ( V_NONE = 0 V_ERROR = 1 V_NULL = int(types.V_NULL) V_TRUE = int(types.V_TRUE) V_FALSE = int(types.V_FALSE) V_ARRAY = int(types.V_ARRAY) V_OBJECT = int(types.V_OBJECT) V_STRING = int(types.V_STRING) V_NUMBER = int(_V_NUMBER) V_ANY = int(_V_ANY) ) type Node struct { t types.ValueType l uint p unsafe.Pointer m *sync.RWMutex } // UnmarshalJSON is just an adapter to json.Unmarshaler. // If you want better performance, use Searcher.GetByPath() directly func (self *Node) UnmarshalJSON(data []byte) (err error) { *self = newRawNode(rt.Mem2Str(data), switchRawType(data[0]), false) return nil } /** Node Type Accessor **/ // Type returns json type represented by the node // It will be one of bellows: // // V_NONE = 0 (empty node, key not exists) // V_ERROR = 1 (error node) // V_NULL = 2 (json value `null`, key exists) // V_TRUE = 3 (json value `true`) // V_FALSE = 4 (json value `false`) // V_ARRAY = 5 (json value array) // V_OBJECT = 6 (json value object) // V_STRING = 7 (json value string) // V_NUMBER = 33 (json value number ) // V_ANY = 34 (golang interface{}) // // Deprecated: not concurrent safe. Use TypeSafe instead func (self Node) Type() int { return int(self.t & _MASK_LAZY & _MASK_RAW) } // Type concurrently-safe returns json type represented by the node // It will be one of bellows: // // V_NONE = 0 (empty node, key not exists) // V_ERROR = 1 (error node) // V_NULL = 2 (json value `null`, key exists) // V_TRUE = 3 (json value `true`) // V_FALSE = 4 (json value `false`) // V_ARRAY = 5 (json value array) // V_OBJECT = 6 (json value object) // V_STRING = 7 (json value string) // V_NUMBER = 33 (json value number ) // V_ANY = 34 (golang interface{}) func (self *Node) TypeSafe() int { return int(self.loadt() & _MASK_LAZY & _MASK_RAW) } func (self *Node) itype() types.ValueType { return self.t & _MASK_LAZY & _MASK_RAW } // Exists returns false only if the self is nil or empty node V_NONE func (self *Node) Exists() bool { if self == nil { return false } t := self.loadt() return t != V_ERROR && t != _V_NONE } // Valid reports if self is NOT V_ERROR or nil func (self *Node) Valid() bool { if self == nil { return false } return self.loadt() != V_ERROR } // Check checks if the node itself is valid, and return: // - ErrNotExist If the node is nil // - Its underlying error If the node is V_ERROR func (self *Node) Check() error { if self == nil { return ErrNotExist } else if self.loadt() != V_ERROR { return nil } else { return self } } func (self *Node) checkFast() error { if self == nil { return ErrNotExist } else if self.t != V_ERROR { return nil } else { return self } } // isRaw returns true if node's underlying value is raw json // // Deprecated: not concurrent safe func (self Node) IsRaw() bool { return self.t&_V_RAW != 0 } // IsRaw returns true if node's underlying value is raw json func (self *Node) isRaw() bool { return self.loadt()&_V_RAW != 0 } func (self *Node) isLazy() bool { return self != nil && self.t&_V_LAZY != 0 } func (self *Node) isAny() bool { return self != nil && self.loadt() == _V_ANY } /** Simple Value Methods **/ // Raw returns json representation of the node, func (self *Node) Raw() (string, error) { if self == nil { return "", ErrNotExist } lock := self.rlock() if !self.isRaw() { if lock { self.runlock() } buf, err := self.MarshalJSON() return rt.Mem2Str(buf), err } ret := self.toString() if lock { self.runlock() } return ret, nil } func (self *Node) checkRaw() error { if self == nil { return ErrNotExist } t := self.loadt() if t == V_ERROR { return self } if t&_V_RAW != 0 { self.parseRaw(false) } return self.checkFast() } // Bool returns bool value represented by this node, // including types.V_TRUE|V_FALSE|V_NUMBER|V_STRING|V_ANY|V_NULL, // V_NONE will return error func (self *Node) Bool() (bool, error) { if err := self.checkRaw(); err != nil { return false, err } switch self.t { case types.V_TRUE: return true, nil case types.V_FALSE: return false, nil case types.V_NULL: return false, nil case _V_NUMBER: if i, err := self.toInt64(); err == nil { return i != 0, nil } else if f, err := self.toFloat64(); err == nil { return f != 0, nil } else { return false, err } case types.V_STRING: return strconv.ParseBool(self.toString()) case _V_ANY: any := self.packAny() switch v := any.(type) { case bool: return v, nil case int: return v != 0, nil case int8: return v != 0, nil case int16: return v != 0, nil case int32: return v != 0, nil case int64: return v != 0, nil case uint: return v != 0, nil case uint8: return v != 0, nil case uint16: return v != 0, nil case uint32: return v != 0, nil case uint64: return v != 0, nil case float32: return v != 0, nil case float64: return v != 0, nil case string: return strconv.ParseBool(v) case json.Number: if i, err := v.Int64(); err == nil { return i != 0, nil } else if f, err := v.Float64(); err == nil { return f != 0, nil } else { return false, err } default: return false, ErrUnsupportType } default: return false, ErrUnsupportType } } // Int64 casts the node to int64 value, // including V_NUMBER|V_TRUE|V_FALSE|V_ANY|V_STRING // V_NONE it will return error func (self *Node) Int64() (int64, error) { if err := self.checkRaw(); err != nil { return 0, err } switch self.t { case _V_NUMBER, types.V_STRING: if i, err := self.toInt64(); err == nil { return i, nil } else if f, err := self.toFloat64(); err == nil { return int64(f), nil } else { return 0, err } case types.V_TRUE: return 1, nil case types.V_FALSE: return 0, nil case types.V_NULL: return 0, nil case _V_ANY: any := self.packAny() switch v := any.(type) { case bool: if v { return 1, nil } else { return 0, nil } case int: return int64(v), nil case int8: return int64(v), nil case int16: return int64(v), nil case int32: return int64(v), nil case int64: return int64(v), nil case uint: return int64(v), nil case uint8: return int64(v), nil case uint16: return int64(v), nil case uint32: return int64(v), nil case uint64: return int64(v), nil case float32: return int64(v), nil case float64: return int64(v), nil case string: if i, err := strconv.ParseInt(v, 10, 64); err == nil { return i, nil } else if f, err := strconv.ParseFloat(v, 64); err == nil { return int64(f), nil } else { return 0, err } case json.Number: if i, err := v.Int64(); err == nil { return i, nil } else if f, err := v.Float64(); err == nil { return int64(f), nil } else { return 0, err } default: return 0, ErrUnsupportType } default: return 0, ErrUnsupportType } } // StrictInt64 exports underlying int64 value, including V_NUMBER, V_ANY func (self *Node) StrictInt64() (int64, error) { if err := self.checkRaw(); err != nil { return 0, err } switch self.t { case _V_NUMBER: return self.toInt64() case _V_ANY: any := self.packAny() switch v := any.(type) { case int: return int64(v), nil case int8: return int64(v), nil case int16: return int64(v), nil case int32: return int64(v), nil case int64: return int64(v), nil case uint: return int64(v), nil case uint8: return int64(v), nil case uint16: return int64(v), nil case uint32: return int64(v), nil case uint64: return int64(v), nil case json.Number: if i, err := v.Int64(); err == nil { return i, nil } else { return 0, err } default: return 0, ErrUnsupportType } default: return 0, ErrUnsupportType } } func castNumber(v bool) json.Number { if v { return json.Number("1") } else { return json.Number("0") } } // Number casts node to float64, // including V_NUMBER|V_TRUE|V_FALSE|V_ANY|V_STRING|V_NULL, // V_NONE it will return error func (self *Node) Number() (json.Number, error) { if err := self.checkRaw(); err != nil { return json.Number(""), err } switch self.t { case _V_NUMBER: return self.toNumber(), nil case types.V_STRING: if _, err := self.toInt64(); err == nil { return self.toNumber(), nil } else if _, err := self.toFloat64(); err == nil { return self.toNumber(), nil } else { return json.Number(""), err } case types.V_TRUE: return json.Number("1"), nil case types.V_FALSE: return json.Number("0"), nil case types.V_NULL: return json.Number("0"), nil case _V_ANY: any := self.packAny() switch v := any.(type) { case bool: return castNumber(v), nil case int: return castNumber(v != 0), nil case int8: return castNumber(v != 0), nil case int16: return castNumber(v != 0), nil case int32: return castNumber(v != 0), nil case int64: return castNumber(v != 0), nil case uint: return castNumber(v != 0), nil case uint8: return castNumber(v != 0), nil case uint16: return castNumber(v != 0), nil case uint32: return castNumber(v != 0), nil case uint64: return castNumber(v != 0), nil case float32: return castNumber(v != 0), nil case float64: return castNumber(v != 0), nil case string: if _, err := strconv.ParseFloat(v, 64); err == nil { return json.Number(v), nil } else { return json.Number(""), err } case json.Number: return v, nil default: return json.Number(""), ErrUnsupportType } default: return json.Number(""), ErrUnsupportType } } // Number exports underlying float64 value, including V_NUMBER, V_ANY of json.Number func (self *Node) StrictNumber() (json.Number, error) { if err := self.checkRaw(); err != nil { return json.Number(""), err } switch self.t { case _V_NUMBER: return self.toNumber(), nil case _V_ANY: if v, ok := self.packAny().(json.Number); ok { return v, nil } else { return json.Number(""), ErrUnsupportType } default: return json.Number(""), ErrUnsupportType } } // String cast node to string, // including V_NUMBER|V_TRUE|V_FALSE|V_ANY|V_STRING|V_NULL, // V_NONE it will return error func (self *Node) String() (string, error) { if err := self.checkRaw(); err != nil { return "", err } switch self.t { case types.V_NULL: return "", nil case types.V_TRUE: return "true", nil case types.V_FALSE: return "false", nil case types.V_STRING, _V_NUMBER: return self.toString(), nil case _V_ANY: any := self.packAny() switch v := any.(type) { case bool: return strconv.FormatBool(v), nil case int: return strconv.Itoa(v), nil case int8: return strconv.Itoa(int(v)), nil case int16: return strconv.Itoa(int(v)), nil case int32: return strconv.Itoa(int(v)), nil case int64: return strconv.Itoa(int(v)), nil case uint: return strconv.Itoa(int(v)), nil case uint8: return strconv.Itoa(int(v)), nil case uint16: return strconv.Itoa(int(v)), nil case uint32: return strconv.Itoa(int(v)), nil case uint64: return strconv.Itoa(int(v)), nil case float32: return strconv.FormatFloat(float64(v), 'g', -1, 64), nil case float64: return strconv.FormatFloat(float64(v), 'g', -1, 64), nil case string: return v, nil case json.Number: return v.String(), nil default: return "", ErrUnsupportType } default: return "", ErrUnsupportType } } // StrictString returns string value (unescaped), including V_STRING, V_ANY of string. // In other cases, it will return empty string. func (self *Node) StrictString() (string, error) { if err := self.checkRaw(); err != nil { return "", err } switch self.t { case types.V_STRING: return self.toString(), nil case _V_ANY: if v, ok := self.packAny().(string); ok { return v, nil } else { return "", ErrUnsupportType } default: return "", ErrUnsupportType } } // Float64 cast node to float64, // including V_NUMBER|V_TRUE|V_FALSE|V_ANY|V_STRING|V_NULL, // V_NONE it will return error func (self *Node) Float64() (float64, error) { if err := self.checkRaw(); err != nil { return 0.0, err } switch self.t { case _V_NUMBER, types.V_STRING: return self.toFloat64() case types.V_TRUE: return 1.0, nil case types.V_FALSE: return 0.0, nil case types.V_NULL: return 0.0, nil case _V_ANY: any := self.packAny() switch v := any.(type) { case bool: if v { return 1.0, nil } else { return 0.0, nil } case int: return float64(v), nil case int8: return float64(v), nil case int16: return float64(v), nil case int32: return float64(v), nil case int64: return float64(v), nil case uint: return float64(v), nil case uint8: return float64(v), nil case uint16: return float64(v), nil case uint32: return float64(v), nil case uint64: return float64(v), nil case float32: return float64(v), nil case float64: return float64(v), nil case string: if f, err := strconv.ParseFloat(v, 64); err == nil { return float64(f), nil } else { return 0, err } case json.Number: if f, err := v.Float64(); err == nil { return float64(f), nil } else { return 0, err } default: return 0, ErrUnsupportType } default: return 0.0, ErrUnsupportType } } func (self *Node) StrictBool() (bool, error) { if err := self.checkRaw(); err != nil { return false, err } switch self.t { case types.V_TRUE: return true, nil case types.V_FALSE: return false, nil case _V_ANY: any := self.packAny() switch v := any.(type) { case bool: return v, nil default: return false, ErrUnsupportType } default: return false, ErrUnsupportType } } // Float64 exports underlying float64 value, including V_NUMBER, V_ANY func (self *Node) StrictFloat64() (float64, error) { if err := self.checkRaw(); err != nil { return 0.0, err } switch self.t { case _V_NUMBER: return self.toFloat64() case _V_ANY: any := self.packAny() switch v := any.(type) { case float32: return float64(v), nil case float64: return float64(v), nil default: return 0, ErrUnsupportType } default: return 0.0, ErrUnsupportType } } /** Sequential Value Methods **/ // Len returns children count of a array|object|string node // WARN: For partially loaded node, it also works but only counts the parsed children func (self *Node) Len() (int, error) { if err := self.checkRaw(); err != nil { return 0, err } if self.t == types.V_ARRAY || self.t == types.V_OBJECT || self.t == _V_ARRAY_LAZY || self.t == _V_OBJECT_LAZY || self.t == types.V_STRING { return int(self.l), nil } else if self.t == _V_NONE || self.t == types.V_NULL { return 0, nil } else { return 0, ErrUnsupportType } } func (self *Node) len() int { return int(self.l) } // Cap returns malloc capacity of a array|object node for children func (self *Node) Cap() (int, error) { if err := self.checkRaw(); err != nil { return 0, err } switch self.t { case types.V_ARRAY: return (*linkedNodes)(self.p).Cap(), nil case types.V_OBJECT: return (*linkedPairs)(self.p).Cap(), nil case _V_ARRAY_LAZY: return (*parseArrayStack)(self.p).v.Cap(), nil case _V_OBJECT_LAZY: return (*parseObjectStack)(self.p).v.Cap(), nil case _V_NONE, types.V_NULL: return 0, nil default: return 0, ErrUnsupportType } } // Set sets the node of given key under self, and reports if the key has existed. // // If self is V_NONE or V_NULL, it becomes V_OBJECT and sets the node at the key. func (self *Node) Set(key string, node Node) (bool, error) { if err := self.checkRaw(); err != nil { return false, err } // check the node, not use Check() to avoid unescape the node parameter if node.t == V_ERROR { return false, node } if self.t == _V_NONE || self.t == types.V_NULL { *self = NewObject([]Pair{NewPair(key, node)}) return false, nil } else if self.itype() != types.V_OBJECT { return false, ErrUnsupportType } p := self.Get(key) if !p.Exists() { // self must be fully-loaded here if self.len() == 0 { *self = newObject(new(linkedPairs)) } s := (*linkedPairs)(self.p) s.Push(NewPair(key, node)) self.l++ return false, nil } else if err := p.Check(); err != nil { return false, err } *p = node return true, nil } // SetAny wraps val with V_ANY node, and Set() the node. func (self *Node) SetAny(key string, val interface{}) (bool, error) { return self.Set(key, NewAny(val)) } // Unset REMOVE (soft) the node of given key under object parent, and reports if the key has existed. func (self *Node) Unset(key string) (bool, error) { if err := self.should(types.V_OBJECT); err != nil { return false, err } // NOTICE: must get accurate length before deduct if err := self.skipAllKey(); err != nil { return false, err } p, i := self.skipKey(key) if !p.Exists() { return false, nil } else if err := p.Check(); err != nil { return false, err } self.removePairAt(i) return true, nil } // SetByIndex sets the node of given index, and reports if the key has existed. // // The index must be within self's children. func (self *Node) SetByIndex(index int, node Node) (bool, error) { if err := self.checkRaw(); err != nil { return false, err } if err := node.Check(); err != nil { return false, err } if index == 0 && (self.t == _V_NONE || self.t == types.V_NULL) { *self = NewArray([]Node{node}) return false, nil } p := self.Index(index) if !p.Exists() { return false, ErrNotExist } else if err := p.Check(); err != nil { return false, err } *p = node return true, nil } // SetAny wraps val with V_ANY node, and SetByIndex() the node. func (self *Node) SetAnyByIndex(index int, val interface{}) (bool, error) { return self.SetByIndex(index, NewAny(val)) } // UnsetByIndex REMOVE (softly) the node of given index. // // WARN: this will change address of elements, which is a dangerous action. // Use Unset() for object or Pop() for array instead. func (self *Node) UnsetByIndex(index int) (bool, error) { if err := self.checkRaw(); err != nil { return false, err } var p *Node it := self.itype() if it == types.V_ARRAY { if err := self.skipAllIndex(); err != nil { return false, err } p = self.nodeAt(index) } else if it == types.V_OBJECT { if err := self.skipAllKey(); err != nil { return false, err } pr := self.pairAt(index) if pr == nil { return false, ErrNotExist } p = &pr.Value } else { return false, ErrUnsupportType } if !p.Exists() { return false, ErrNotExist } // last elem if index == self.len()-1 { return true, self.Pop() } // not last elem, self.len() change but linked-chunk not change if it == types.V_ARRAY { self.removeNode(index) } else if it == types.V_OBJECT { self.removePair(index) } return true, nil } // Add appends the given node under self. // // If self is V_NONE or V_NULL, it becomes V_ARRAY and sets the node at index 0. func (self *Node) Add(node Node) error { if err := self.checkRaw(); err != nil { return err } if self != nil && (self.t == _V_NONE || self.t == types.V_NULL) { *self = NewArray([]Node{node}) return nil } if err := self.should(types.V_ARRAY); err != nil { return err } s, err := self.unsafeArray() if err != nil { return err } // Notice: array won't have unset node in tail s.Push(node) self.l++ return nil } // Pop remove the last child of the V_Array or V_Object node. func (self *Node) Pop() error { if err := self.checkRaw(); err != nil { return err } if it := self.itype(); it == types.V_ARRAY { s, err := self.unsafeArray() if err != nil { return err } // remove tail unset nodes for i := s.Len() - 1; i >= 0; i-- { if s.At(i).Exists() { s.Pop() self.l-- break } s.Pop() } } else if it == types.V_OBJECT { s, err := self.unsafeMap() if err != nil { return err } // remove tail unset nodes for i := s.Len() - 1; i >= 0; i-- { if p := s.At(i); p != nil && p.Value.Exists() { s.Pop() self.l-- break } s.Pop() } } else { return ErrUnsupportType } return nil } // Move moves the child at src index to dst index, // meanwhile slides siblings from src+1 to dst. // // WARN: this will change address of elements, which is a dangerous action. func (self *Node) Move(dst, src int) error { if err := self.should(types.V_ARRAY); err != nil { return err } s, err := self.unsafeArray() if err != nil { return err } // check if any unset node exists if l := s.Len(); self.len() != l { di, si := dst, src // find real pos of src and dst for i := 0; i < l; i++ { if s.At(i).Exists() { di-- si-- } if di == -1 { dst = i di-- } if si == -1 { src = i si-- } if di == -2 && si == -2 { break } } } s.MoveOne(src, dst) return nil } // AddAny wraps val with V_ANY node, and Add() the node. func (self *Node) AddAny(val interface{}) error { return self.Add(NewAny(val)) } // GetByPath load given path on demands, // which only ensure nodes before this path got parsed. // // Note, the api expects the json is well-formed at least, // otherwise it may return unexpected result. func (self *Node) GetByPath(path ...interface{}) *Node { if !self.Valid() { return self } var s = self for _, p := range path { switch p := p.(type) { case int: s = s.Index(p) if !s.Valid() { return s } case string: s = s.Get(p) if !s.Valid() { return s } default: panic("path must be either int or string") } } return s } // Get loads given key of an object node on demands func (self *Node) Get(key string) *Node { if err := self.should(types.V_OBJECT); err != nil { return unwrapError(err) } n, _ := self.skipKey(key) return n } // Index indexies node at given idx, // node type CAN be either V_OBJECT or V_ARRAY func (self *Node) Index(idx int) *Node { if err := self.checkRaw(); err != nil { return unwrapError(err) } it := self.itype() if it == types.V_ARRAY { return self.skipIndex(idx) } else if it == types.V_OBJECT { pr := self.skipIndexPair(idx) if pr == nil { return newError(_ERR_NOT_FOUND, "value not exists") } return &pr.Value } else { return newError(_ERR_UNSUPPORT_TYPE, fmt.Sprintf("unsupported type: %v", self.itype())) } } // IndexPair indexies pair at given idx, // node type MUST be either V_OBJECT func (self *Node) IndexPair(idx int) *Pair { if err := self.should(types.V_OBJECT); err != nil { return nil } return self.skipIndexPair(idx) } func (self *Node) indexOrGet(idx int, key string) (*Node, int) { if err := self.should(types.V_OBJECT); err != nil { return unwrapError(err), idx } pr := self.skipIndexPair(idx) if pr != nil && pr.Key == key { return &pr.Value, idx } return self.skipKey(key) } // IndexOrGet firstly use idx to index a value and check if its key matches // If not, then use the key to search value func (self *Node) IndexOrGet(idx int, key string) *Node { node, _ := self.indexOrGet(idx, key) return node } // IndexOrGetWithIdx attempts to retrieve a node by index and key, returning the node and its correct index. // If the key does not match at the given index, it searches by key and returns the node with its updated index. func (self *Node) IndexOrGetWithIdx(idx int, key string) (*Node, int) { return self.indexOrGet(idx, key) } /** Generic Value Converters **/ // Map loads all keys of an object node func (self *Node) Map() (map[string]interface{}, error) { if self.isAny() { any := self.packAny() if v, ok := any.(map[string]interface{}); ok { return v, nil } else { return nil, ErrUnsupportType } } if err := self.should(types.V_OBJECT); err != nil { return nil, err } if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObject() } // MapUseNumber loads all keys of an object node, with numeric nodes cast to json.Number func (self *Node) MapUseNumber() (map[string]interface{}, error) { if self.isAny() { any := self.packAny() if v, ok := any.(map[string]interface{}); ok { return v, nil } else { return nil, ErrUnsupportType } } if err := self.should(types.V_OBJECT); err != nil { return nil, err } if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObjectUseNumber() } // MapUseNode scans both parsed and non-parsed children nodes, // and map them by their keys func (self *Node) MapUseNode() (map[string]Node, error) { if self.isAny() { any := self.packAny() if v, ok := any.(map[string]Node); ok { return v, nil } else { return nil, ErrUnsupportType } } if err := self.should(types.V_OBJECT); err != nil { return nil, err } if err := self.skipAllKey(); err != nil { return nil, err } return self.toGenericObjectUseNode() } // MapUnsafe exports the underlying pointer to its children map // WARN: don't use it unless you know what you are doing // // Deprecated: this API now returns copied nodes instead of directly reference, // func (self *Node) UnsafeMap() ([]Pair, error) { // if err := self.should(types.V_OBJECT, "an object"); err != nil { // return nil, err // } // if err := self.skipAllKey(); err != nil { // return nil, err // } // return self.toGenericObjectUsePair() // } //go:nocheckptr func (self *Node) unsafeMap() (*linkedPairs, error) { if err := self.skipAllKey(); err != nil { return nil, err } if self.p == nil { *self = newObject(new(linkedPairs)) } return (*linkedPairs)(self.p), nil } // SortKeys sorts children of a V_OBJECT node in ascending key-order. // If recurse is true, it recursively sorts children's children as long as a V_OBJECT node is found. func (self *Node) SortKeys(recurse bool) error { // check raw node first if err := self.checkRaw(); err != nil { return err } if self.itype() == types.V_OBJECT { return self.sortKeys(recurse) } else if self.itype() == types.V_ARRAY { var err error err2 := self.ForEach(func(path Sequence, node *Node) bool { it := node.itype() if it == types.V_ARRAY || it == types.V_OBJECT { err = node.SortKeys(recurse) if err != nil { return false } } return true }) if err != nil { return err } return err2 } else { return nil } } func (self *Node) sortKeys(recurse bool) (err error) { // check raw node first if err := self.checkRaw(); err != nil { return err } ps, err := self.unsafeMap() if err != nil { return err } ps.Sort() if recurse { var sc Scanner sc = func(path Sequence, node *Node) bool { if node.itype() == types.V_OBJECT { if err := node.sortKeys(recurse); err != nil { return false } } if node.itype() == types.V_ARRAY { if err := node.ForEach(sc); err != nil { return false } } return true } if err := self.ForEach(sc); err != nil { return err } } return nil } // Array loads all indexes of an array node func (self *Node) Array() ([]interface{}, error) { if self.isAny() { any := self.packAny() if v, ok := any.([]interface{}); ok { return v, nil } else { return nil, ErrUnsupportType } } if err := self.should(types.V_ARRAY); err != nil { return nil, err } if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArray() } // ArrayUseNumber loads all indexes of an array node, with numeric nodes cast to json.Number func (self *Node) ArrayUseNumber() ([]interface{}, error) { if self.isAny() { any := self.packAny() if v, ok := any.([]interface{}); ok { return v, nil } else { return nil, ErrUnsupportType } } if err := self.should(types.V_ARRAY); err != nil { return nil, err } if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArrayUseNumber() } // ArrayUseNode copies both parsed and non-parsed children nodes, // and indexes them by original order func (self *Node) ArrayUseNode() ([]Node, error) { if self.isAny() { any := self.packAny() if v, ok := any.([]Node); ok { return v, nil } else { return nil, ErrUnsupportType } } if err := self.should(types.V_ARRAY); err != nil { return nil, err } if err := self.skipAllIndex(); err != nil { return nil, err } return self.toGenericArrayUseNode() } // ArrayUnsafe exports the underlying pointer to its children array // WARN: don't use it unless you know what you are doing // // Deprecated: this API now returns copied nodes instead of directly reference, // which has no difference with ArrayUseNode // func (self *Node) UnsafeArray() ([]Node, error) { // if err := self.should(types.V_ARRAY, "an array"); err != nil { // return nil, err // } // if err := self.skipAllIndex(); err != nil { // return nil, err // } // return self.toGenericArrayUseNode() // } func (self *Node) unsafeArray() (*linkedNodes, error) { if err := self.skipAllIndex(); err != nil { return nil, err } if self.p == nil { *self = newArray(new(linkedNodes)) } return (*linkedNodes)(self.p), nil } // Interface loads all children under all paths from this node, // and converts itself as generic type. // WARN: all numeric nodes are cast to float64 func (self *Node) Interface() (interface{}, error) { if err := self.checkRaw(); err != nil { return nil, err } switch self.t { case V_ERROR: return nil, self.Check() case types.V_NULL: return nil, nil case types.V_TRUE: return true, nil case types.V_FALSE: return false, nil case types.V_ARRAY: return self.toGenericArray() case types.V_OBJECT: return self.toGenericObject() case types.V_STRING: return self.toString(), nil case _V_NUMBER: v, err := self.toFloat64() if err != nil { return nil, err } return v, nil case _V_ARRAY_LAZY: if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArray() case _V_OBJECT_LAZY: if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObject() case _V_ANY: switch v := self.packAny().(type) { case Node: return v.Interface() case *Node: return v.Interface() default: return v, nil } default: return nil, ErrUnsupportType } } func (self *Node) packAny() interface{} { return *(*interface{})(self.p) } // InterfaceUseNumber works same with Interface() // except numeric nodes are cast to json.Number func (self *Node) InterfaceUseNumber() (interface{}, error) { if err := self.checkRaw(); err != nil { return nil, err } switch self.t { case V_ERROR: return nil, self.Check() case types.V_NULL: return nil, nil case types.V_TRUE: return true, nil case types.V_FALSE: return false, nil case types.V_ARRAY: return self.toGenericArrayUseNumber() case types.V_OBJECT: return self.toGenericObjectUseNumber() case types.V_STRING: return self.toString(), nil case _V_NUMBER: return self.toNumber(), nil case _V_ARRAY_LAZY: if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArrayUseNumber() case _V_OBJECT_LAZY: if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObjectUseNumber() case _V_ANY: return self.packAny(), nil default: return nil, ErrUnsupportType } } // InterfaceUseNode clone itself as a new node, // or its children as map[string]Node (or []Node) func (self *Node) InterfaceUseNode() (interface{}, error) { if err := self.checkRaw(); err != nil { return nil, err } switch self.t { case types.V_ARRAY: return self.toGenericArrayUseNode() case types.V_OBJECT: return self.toGenericObjectUseNode() case _V_ARRAY_LAZY: if err := self.skipAllIndex(); err != nil { return nil, err } return self.toGenericArrayUseNode() case _V_OBJECT_LAZY: if err := self.skipAllKey(); err != nil { return nil, err } return self.toGenericObjectUseNode() default: return *self, self.Check() } } // LoadAll loads the node's children // and ensure all its children can be READ concurrently (include its children's children) func (self *Node) LoadAll() error { return self.Load() } // Load loads the node's children as parsed. // and ensure all its children can be READ concurrently (include its children's children) func (self *Node) Load() error { switch self.t { case _V_ARRAY_LAZY: self.loadAllIndex(true) case _V_OBJECT_LAZY: self.loadAllKey(true) case V_ERROR: return self case V_NONE: return nil } if self.m == nil { self.m = new(sync.RWMutex) } return self.checkRaw() } /**---------------------------------- Internal Helper Methods ----------------------------------**/ func (self *Node) should(t types.ValueType) error { if err := self.checkRaw(); err != nil { return err } if self.itype() != t { return ErrUnsupportType } return nil } func (self *Node) nodeAt(i int) *Node { var p *linkedNodes if self.isLazy() { _, stack := self.getParserAndArrayStack() p = &stack.v } else { p = (*linkedNodes)(self.p) if l := p.Len(); l != self.len() { // some nodes got unset, iterate to skip them for j := 0; j < l; j++ { v := p.At(j) if v.Exists() { i-- } if i < 0 { return v } } return nil } } return p.At(i) } func (self *Node) pairAt(i int) *Pair { var p *linkedPairs if self.isLazy() { _, stack := self.getParserAndObjectStack() p = &stack.v } else { p = (*linkedPairs)(self.p) if l := p.Len(); l != self.len() { // some nodes got unset, iterate to skip them for j := 0; j < l; j++ { v := p.At(j) if v != nil && v.Value.Exists() { i-- } if i < 0 { return v } } return nil } } return p.At(i) } func (self *Node) skipAllIndex() error { if !self.isLazy() { return nil } var err types.ParsingError parser, stack := self.getParserAndArrayStack() parser.skipValue = true parser.noLazy = true *self, err = parser.decodeArray(&stack.v) if err != 0 { return parser.ExportError(err) } return nil } func (self *Node) skipAllKey() error { if !self.isLazy() { return nil } var err types.ParsingError parser, stack := self.getParserAndObjectStack() parser.skipValue = true parser.noLazy = true *self, err = parser.decodeObject(&stack.v) if err != 0 { return parser.ExportError(err) } return nil } func (self *Node) skipKey(key string) (*Node, int) { nb := self.len() lazy := self.isLazy() if nb > 0 { /* linear search */ var p *Pair var i int if lazy { s := (*parseObjectStack)(self.p) p, i = s.v.Get(key) } else { p, i = (*linkedPairs)(self.p).Get(key) } if p != nil { return &p.Value, i } } /* not found */ if !lazy { return nil, -1 } // lazy load for last, i := self.skipNextPair(), nb; last != nil; last, i = self.skipNextPair(), i+1 { if last.Value.Check() != nil { return &last.Value, -1 } if last.Key == key { return &last.Value, i } } return nil, -1 } func (self *Node) skipIndex(index int) *Node { nb := self.len() if nb > index { v := self.nodeAt(index) return v } if !self.isLazy() { return nil } // lazy load for last := self.skipNextNode(); last != nil; last = self.skipNextNode() { if last.Check() != nil { return last } if self.len() > index { return last } } return nil } func (self *Node) skipIndexPair(index int) *Pair { nb := self.len() if nb > index { return self.pairAt(index) } if !self.isLazy() { return nil } // lazy load for last := self.skipNextPair(); last != nil; last = self.skipNextPair() { if last.Value.Check() != nil { return last } if self.len() > index { return last } } return nil } func (self *Node) loadAllIndex(loadOnce bool) error { if !self.isLazy() { return nil } var err types.ParsingError parser, stack := self.getParserAndArrayStack() if !loadOnce { parser.noLazy = true } else { parser.loadOnce = true } *self, err = parser.decodeArray(&stack.v) if err != 0 { return parser.ExportError(err) } return nil } func (self *Node) loadAllKey(loadOnce bool) error { if !self.isLazy() { return nil } var err types.ParsingError parser, stack := self.getParserAndObjectStack() if !loadOnce { parser.noLazy = true *self, err = parser.decodeObject(&stack.v) } else { parser.loadOnce = true *self, err = parser.decodeObject(&stack.v) } if err != 0 { return parser.ExportError(err) } return nil } func (self *Node) removeNode(i int) { node := self.nodeAt(i) if node == nil { return } *node = Node{} // NOTICE: not be consistent with linkedNode.Len() self.l-- } func (self *Node) removePair(i int) { last := self.pairAt(i) if last == nil { return } *last = Pair{} // NOTICE: should be consistent with linkedPair.Len() self.l-- } func (self *Node) removePairAt(i int) { p := (*linkedPairs)(self.p).At(i) if p == nil { return } *p = Pair{} // NOTICE: should be consistent with linkedPair.Len() self.l-- } func (self *Node) toGenericArray() ([]interface{}, error) { nb := self.len() if nb == 0 { return []interface{}{}, nil } ret := make([]interface{}, 0, nb) /* convert each item */ it := self.values() for v := it.next(); v != nil; v = it.next() { vv, err := v.Interface() if err != nil { return nil, err } ret = append(ret, vv) } /* all done */ return ret, nil } func (self *Node) toGenericArrayUseNumber() ([]interface{}, error) { nb := self.len() if nb == 0 { return []interface{}{}, nil } ret := make([]interface{}, 0, nb) /* convert each item */ it := self.values() for v := it.next(); v != nil; v = it.next() { vv, err := v.InterfaceUseNumber() if err != nil { return nil, err } ret = append(ret, vv) } /* all done */ return ret, nil } func (self *Node) toGenericArrayUseNode() ([]Node, error) { var nb = self.len() if nb == 0 { return []Node{}, nil } var s = (*linkedNodes)(self.p) var out = make([]Node, nb) s.ToSlice(out) return out, nil } func (self *Node) toGenericObject() (map[string]interface{}, error) { nb := self.len() if nb == 0 { return map[string]interface{}{}, nil } ret := make(map[string]interface{}, nb) /* convert each item */ it := self.properties() for v := it.next(); v != nil; v = it.next() { vv, err := v.Value.Interface() if err != nil { return nil, err } ret[v.Key] = vv } /* all done */ return ret, nil } func (self *Node) toGenericObjectUseNumber() (map[string]interface{}, error) { nb := self.len() if nb == 0 { return map[string]interface{}{}, nil } ret := make(map[string]interface{}, nb) /* convert each item */ it := self.properties() for v := it.next(); v != nil; v = it.next() { vv, err := v.Value.InterfaceUseNumber() if err != nil { return nil, err } ret[v.Key] = vv } /* all done */ return ret, nil } func (self *Node) toGenericObjectUseNode() (map[string]Node, error) { var nb = self.len() if nb == 0 { return map[string]Node{}, nil } var s = (*linkedPairs)(self.p) var out = make(map[string]Node, nb) s.ToMap(out) /* all done */ return out, nil } /**------------------------------------ Factory Methods ------------------------------------**/ var ( nullNode = Node{t: types.V_NULL} trueNode = Node{t: types.V_TRUE} falseNode = Node{t: types.V_FALSE} ) // NewRaw creates a node of raw json. // If the input json is invalid, NewRaw returns a error Node. func NewRaw(json string) Node { parser := NewParserObj(json) start, err := parser.skip() if err != 0 { return *newError(err, err.Message()) } it := switchRawType(parser.s[start]) if it == _V_NONE { return Node{} } return newRawNode(parser.s[start:parser.p], it, false) } // NewRawConcurrentRead creates a node of raw json, which can be READ // (GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON) concurrently. // If the input json is invalid, NewRaw returns a error Node. func NewRawConcurrentRead(json string) Node { parser := NewParserObj(json) start, err := parser.skip() if err != 0 { return *newError(err, err.Message()) } it := switchRawType(parser.s[start]) if it == _V_NONE { return Node{} } return newRawNode(parser.s[start:parser.p], it, true) } // NewAny creates a node of type V_ANY if any's type isn't Node or *Node, // which stores interface{} and can be only used for `.Interface()`\`.MarshalJSON()`. func NewAny(any interface{}) Node { switch n := any.(type) { case Node: return n case *Node: return *n default: return Node{ t: _V_ANY, p: unsafe.Pointer(&any), } } } // NewBytes encodes given src with Base64 (RFC 4648), and creates a node of type V_STRING. func NewBytes(src []byte) Node { if len(src) == 0 { panic("empty src bytes") } out := rt.EncodeBase64ToString(src) return NewString(out) } // NewNull creates a node of type V_NULL func NewNull() Node { return Node{ p: nil, t: types.V_NULL, } } // NewBool creates a node of type bool: // // If v is true, returns V_TRUE node // If v is false, returns V_FALSE node func NewBool(v bool) Node { var t = types.V_FALSE if v { t = types.V_TRUE } return Node{ p: nil, t: t, } } // NewNumber creates a json.Number node // v must be a decimal string complying with RFC8259 func NewNumber(v string) Node { return Node{ l: uint(len(v)), p: rt.StrPtr(v), t: _V_NUMBER, } } func (node *Node) toNumber() json.Number { return json.Number(rt.StrFrom(node.p, int64(node.l))) } func (self *Node) toString() string { return rt.StrFrom(self.p, int64(self.l)) } func (node *Node) toFloat64() (float64, error) { ret, err := node.toNumber().Float64() if err != nil { return 0, err } return ret, nil } func (node *Node) toInt64() (int64, error) { ret, err := node.toNumber().Int64() if err != nil { return 0, err } return ret, nil } func newBytes(v []byte) Node { return Node{ t: types.V_STRING, p: mem2ptr(v), l: uint(len(v)), } } // NewString creates a node of type V_STRING. // v is considered to be a valid UTF-8 string, // which means it won't be validated and unescaped. // when the node is encoded to json, v will be escaped. func NewString(v string) Node { return Node{ t: types.V_STRING, p: rt.StrPtr(v), l: uint(len(v)), } } // NewArray creates a node of type V_ARRAY, // using v as its underlying children func NewArray(v []Node) Node { s := new(linkedNodes) s.FromSlice(v) return newArray(s) } const _Threshold_Index = 16 func newArray(v *linkedNodes) Node { return Node{ t: types.V_ARRAY, l: uint(v.Len()), p: unsafe.Pointer(v), } } func (self *Node) setArray(v *linkedNodes) { self.t = types.V_ARRAY self.l = uint(v.Len()) self.p = unsafe.Pointer(v) } // NewObject creates a node of type V_OBJECT, // using v as its underlying children func NewObject(v []Pair) Node { s := new(linkedPairs) s.FromSlice(v) return newObject(s) } func newObject(v *linkedPairs) Node { if v.size > _Threshold_Index { v.BuildIndex() } return Node{ t: types.V_OBJECT, l: uint(v.Len()), p: unsafe.Pointer(v), } } func (self *Node) setObject(v *linkedPairs) { if v.size > _Threshold_Index { v.BuildIndex() } self.t = types.V_OBJECT self.l = uint(v.Len()) self.p = unsafe.Pointer(v) } func (self *Node) parseRaw(full bool) { lock := self.lock() defer self.unlock() if !self.isRaw() { return } raw := self.toString() parser := NewParserObj(raw) var e types.ParsingError if full { parser.noLazy = true *self, e = parser.Parse() } else if lock { var n Node parser.noLazy = true parser.loadOnce = true n, e = parser.Parse() self.assign(n) } else { *self, e = parser.Parse() } if e != 0 { *self = *newSyntaxError(parser.syntaxError(e)) } } func (self *Node) assign(n Node) { self.l = n.l self.p = n.p atomic.StoreInt64(&self.t, n.t) } ================================================ FILE: ast/node_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "bytes" "encoding/json" "errors" "fmt" "reflect" "strconv" "strings" "testing" "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/internal/rt" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestNodeSortKeys(t *testing.T) { var src = `{"b":1,"a":2,"c":3}` root, err := NewSearcher(src).GetByPath() if err != nil { t.Fatal(err) } obj, err := root.MapUseNumber() if err != nil { t.Fatal(err) } exp, err := json.Marshal(obj) if err != nil { t.Fatal(err) } if err := root.SortKeys(true); err != nil { t.Fatal(err) } act, err := root.MarshalJSON() if err != nil { t.Fatal(err) } assert.Equal(t, len(exp), len(act)) assert.Equal(t, string(exp), string(act)) src = `[[1], {"b":1,"a":2,"c":3}, [], {}, [{"b":1,"a":2,"c":3,"d":[],"e":{}}]]` root, err = NewSearcher(src).GetByPath() if err != nil { t.Fatal(err) } vv, err := root.Interface() if err != nil { t.Fatal(err) } exp, err = json.Marshal(vv) if err != nil { t.Fatal(err) } if err := root.SortKeys(true); err != nil { t.Fatal(err) } act, err = root.MarshalJSON() if err != nil { t.Fatal(err) } assert.Equal(t, string(exp), string(act)) } func BenchmarkNodeSortKeys(b *testing.B) { root, err := NewSearcher(_TwitterJson).GetByPath() if err != nil { b.Fatal(err) } // if err := root.LoadAll(); err != nil { // b.Fatal(err) // } b.Run("single", func(b *testing.B) { r := root.Get("statuses") if r.Check() != nil { b.Fatal(r.Error()) } b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { _ = root.SortKeys(false) } }) b.Run("recurse", func(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { _ = root.SortKeys(true) } }) } func TestNodeSortKeys2(t *testing.T) { root, err := NewSearcher(_TwitterJson).GetByPath() if err != nil { t.Fatal(err) } // if err := root.LoadAll(); err != nil { // b.Fatal(err) // } t.Run("single", func(t *testing.T) { r := root.Get("statuses") if r.Check() != nil { t.Fatal(r.Error()) } require.NoError(t, root.SortKeys(false)) }) t.Run("recurse", func(t *testing.T) { require.NoError(t, root.SortKeys(true)) }) } //go:noinline func stackObj() interface{} { var a int = 1 return rt.UnpackEface(a).Pack() } func TestLoadAll(t *testing.T) { e := Node{} err := e.Load() if err != nil { t.Fatal(err) } err = e.LoadAll() if err != nil { t.Fatal(err) } root, err := NewSearcher(`{"a":{"1":[1],"2":2},"b":[{"1":1},2],"c":[1,2]}`).GetByPath() if err != nil { t.Fatal(err) } if err = root.Load(); err != nil { t.Fatal(err) } if l, _ := root.Len(); l != 3 { t.Fatal(root.Len()) } c := root.Get("c") if !c.isRaw() { t.Fatal(err) } err = c.LoadAll() if err != nil { t.Fatal(err) } if l, _ := c.Len(); l != 2 { t.Fatal(c.Len()) } c1 := c.nodeAt(0) if n, err := c1.Int64(); err != nil || n != 1 { t.Fatal(n, err) } a := root.pairAt(0) if a.Key != "a" { t.Fatal(a.Key) } else if !a.Value.isRaw() { t.Fatal(a.Value.itype()) } else if n, err := a.Value.Len(); n != 2 || err != nil { t.Fatal(n, err) } if err := a.Value.Load(); err != nil { t.Fatal(err) } if l, _ := a.Value.Len(); l != 2 { t.Fatal(a.Value.Len()) } a1 := a.Value.Get("1") if !a1.isRaw() { t.Fatal(a1) } a.Value.LoadAll() if l, _ := a1.Len(); a1.t != types.V_ARRAY || l != 1 { t.Fatal(a1.t) } b := root.pairAt(1) if b.Key != "b" { t.Fatal(b.Key) } else if !b.Value.isRaw() { t.Fatal(b.Value.itype()) } else if n, err := b.Value.Len(); n != 2 || err != nil { t.Fatal(n, err) } if err := b.Value.Load(); err != nil { t.Fatal(err) } if l, _ := b.Value.Len(); l != 2 { t.Fatal(b.Value.Len()) } b1 := b.Value.Index(0) if !b1.isRaw() { t.Fatal(b1) } b.Value.LoadAll() if l, _ := b1.Len(); b1.t != types.V_OBJECT || l != 1 { t.Fatal(a1.Len()) } } func TestIndexPair(t *testing.T) { root, _ := NewParser(`{"a":1,"b":2}`).Parse() a := root.IndexPair(0) if a == nil || a.Key != "a" { t.Fatal(a) } b := root.IndexPair(1) if b == nil || b.Key != "b" { t.Fatal(b) } c := root.IndexPair(2) if c != nil { t.Fatal(c) } } func TestIndexOrGet(t *testing.T) { root, _ := NewParser(`{"a":1,"b":2}`).Parse() a := root.IndexOrGet(0, "a") if v, err := a.Int64(); err != nil || v != int64(1) { t.Fatal(a) } a = root.IndexOrGet(0, "b") if v, err := a.Int64(); err != nil || v != int64(2) { t.Fatal(a) } a = root.IndexOrGet(0, "c") if a.Valid() { t.Fatal(a) } } func TestIndexOrGetWithIdx(t *testing.T) { root, _ := NewParser(`{"a":1,"b":2}`).Parse() b, idx := root.IndexOrGetWithIdx(0, "b") if v, err := b.Int64(); err != nil || v != int64(2) { t.Fatal(b, idx) } if idx != 1 { t.Fatal(b, idx) } } func TestTypeCast(t *testing.T) { type tcase struct { method string node Node exp interface{} err error } var nonEmptyErr error = errors.New("") a1 := NewAny(1) lazyArray, _ := NewParser("[" + strings.Repeat("1,", _DEFAULT_NODE_CAP) + "1]").Parse() lazyObject, _ := NewParser(`{"0":0,"1":1,"2":2,"3":3,"4":4,"5":5,"6":6,"7":7,"8":8,"9":9,"10":10,"11":11,"12":12,"13":13,"14":14,"15":15,"16":16}`).Parse() var cases = []tcase{ {"Interface", Node{}, interface{}(nil), ErrUnsupportType}, {"Interface", NewAny(NewNumber("1")), float64(1), nil}, {"Interface", NewAny(int64(1)), int64(1), nil}, {"Interface", NewNumber("1"), float64(1), nil}, {"InterfaceUseNode", Node{}, Node{}, nil}, {"InterfaceUseNode", a1, a1, nil}, {"InterfaceUseNode", NewNumber("1"), NewNumber("1"), nil}, {"InterfaceUseNumber", Node{}, interface{}(nil), ErrUnsupportType}, {"InterfaceUseNumber", NewAny(1), 1, nil}, {"InterfaceUseNumber", NewNumber("1"), json.Number("1"), nil}, {"Map", Node{}, map[string]interface{}(nil), ErrUnsupportType}, {"Map", NewAny(map[string]Node{"a": NewNumber("1")}), map[string]interface{}(nil), ErrUnsupportType}, {"Map", NewAny(map[string]interface{}{"a": 1}), map[string]interface{}{"a": 1}, nil}, {"Map", NewObject([]Pair{NewPair("a", NewNumber("1"))}), map[string]interface{}{"a": float64(1.0)}, nil}, {"MapUseNode", Node{}, map[string]Node(nil), ErrUnsupportType}, {"MapUseNode", NewAny(map[string]interface{}{"a": 1}), map[string]Node(nil), ErrUnsupportType}, {"MapUseNode", NewAny(map[string]Node{"a": NewNumber("1")}), map[string]Node{"a": NewNumber("1")}, nil}, {"MapUseNode", NewObject([]Pair{NewPair("a", NewNumber("1"))}), map[string]Node{"a": NewNumber("1")}, nil}, {"MapUseNumber", Node{}, map[string]interface{}(nil), ErrUnsupportType}, {"MapUseNumber", NewAny(map[string]interface{}{"a": 1}), map[string]interface{}{"a": 1}, nil}, {"MapUseNumber", NewObject([]Pair{NewPair("a", NewNumber("1"))}), map[string]interface{}{"a": json.Number("1")}, nil}, {"Array", Node{}, []interface{}(nil), ErrUnsupportType}, {"Array", NewAny([]interface{}{1}), []interface{}{1}, nil}, {"Array", NewArray([]Node{NewNumber("1")}), []interface{}{float64(1.0)}, nil}, {"ArrayUseNode", Node{}, []Node(nil), ErrUnsupportType}, {"ArrayUseNode", NewAny([]interface{}{1}), []Node(nil), ErrUnsupportType}, {"ArrayUseNode", NewAny([]Node{NewNumber("1")}), []Node{NewNumber("1")}, nil}, {"ArrayUseNode", NewArray([]Node{NewNumber("1")}), []Node{NewNumber("1")}, nil}, {"ArrayUseNumber", Node{}, []interface{}(nil), ErrUnsupportType}, {"ArrayUseNumber", NewAny([]interface{}{1}), []interface{}{1}, nil}, {"ArrayUseNumber", NewAny([]Node{NewNumber("1")}), []interface{}(nil), ErrUnsupportType}, {"ArrayUseNumber", NewArray([]Node{NewNumber("1")}), []interface{}{json.Number("1")}, nil}, {"Raw", Node{}, "", ErrNotExist}, {"Raw", NewRaw(" "), "", nonEmptyErr}, {"Raw", NewRaw(" [ ] "), "[ ]", nil}, {"Raw", NewRaw("[ ]"), "[ ]", nil}, {"Raw", NewRaw(` { "a" : [ true, -1.2e34 ] } `), `{ "a" : [ true, -1.2e34 ] }`, nil}, {"Raw", NewRaw(` { "a" : [ true, -1.2e34 ] `), "", nonEmptyErr}, {"Raw", NewRaw(` { "a" : [ true, -1.2e34 }`), "", nonEmptyErr}, {"Raw", NewBool(true), "true", nil}, {"Raw", NewNumber("-0.0"), "-0.0", nil}, {"Raw", NewString(""), `""`, nil}, {"Raw", NewBytes([]byte("hello, world")), `"aGVsbG8sIHdvcmxk"`, nil}, {"Bool", Node{}, false, ErrUnsupportType}, {"Bool", NewAny(true), true, nil}, {"Bool", NewAny(false), false, nil}, {"Bool", NewAny(int(0)), false, nil}, {"Bool", NewAny(int8(1)), true, nil}, {"Bool", NewAny(int16(1)), true, nil}, {"Bool", NewAny(int32(1)), true, nil}, {"Bool", NewAny(int64(1)), true, nil}, {"Bool", NewAny(uint(1)), true, nil}, {"Bool", NewAny(uint16(1)), true, nil}, {"Bool", NewAny(uint32(1)), true, nil}, {"Bool", NewAny(uint64(1)), true, nil}, {"Bool", NewAny(float64(0)), false, nil}, {"Bool", NewAny(float32(1)), true, nil}, {"Bool", NewAny(float64(1)), true, nil}, {"Bool", NewAny(json.Number("0")), false, nil}, {"Bool", NewAny(json.Number("1")), true, nil}, {"Bool", NewAny(json.Number("1.1")), true, nil}, {"Bool", NewAny(json.Number("+x1.1")), false, nonEmptyErr}, {"Bool", NewAny(string("0")), false, nil}, {"Bool", NewAny(string("t")), true, nil}, {"Bool", NewAny([]byte{0}), false, nonEmptyErr}, {"Bool", NewRaw("true"), true, nil}, {"Bool", NewRaw("false"), false, nil}, {"Bool", NewRaw("null"), false, nil}, {"Bool", NewString(`true`), true, nil}, {"Bool", NewString(`false`), false, nil}, {"Bool", NewString(``), false, nonEmptyErr}, {"Bool", NewNumber("2"), true, nil}, {"Bool", NewNumber("-2.1"), true, nil}, {"Bool", NewNumber("-x-2.1"), false, nonEmptyErr}, {"StrictBool", NewBool(false), false, nil}, {"StrictBool", NewBool(true), true, nil}, {"StrictBool", NewRaw(`null`), false, ErrUnsupportType}, {"StrictBool", NewAny(true), true, nil}, {"StrictBool", NewAny(false), false, nil}, {"StrictBool", NewAny(int(1)), false, ErrUnsupportType}, {"Int64", NewRaw("true"), int64(1), nil}, {"Int64", NewRaw("false"), int64(0), nil}, {"Int64", NewRaw("\"1\""), int64(1), nil}, {"Int64", NewRaw("\"1.1\""), int64(1), nil}, {"Int64", NewRaw("\"1.0\""), int64(1), nil}, {"Int64", NewNumber("+x.0"), int64(0), nonEmptyErr}, {"Int64", NewAny(false), int64(0), nil}, {"Int64", NewAny(true), int64(1), nil}, {"Int64", NewAny(int(1)), int64(1), nil}, {"Int64", NewAny(int8(1)), int64(1), nil}, {"Int64", NewAny(int16(1)), int64(1), nil}, {"Int64", NewAny(int32(1)), int64(1), nil}, {"Int64", NewAny(int64(1)), int64(1), nil}, {"Int64", NewAny(uint(1)), int64(1), nil}, {"Int64", NewAny(uint8(1)), int64(1), nil}, {"Int64", NewAny(uint32(1)), int64(1), nil}, {"Int64", NewAny(uint64(1)), int64(1), nil}, {"Int64", NewAny(float32(1)), int64(1), nil}, {"Int64", NewAny(float64(1)), int64(1), nil}, {"Int64", NewAny("1"), int64(1), nil}, {"Int64", NewAny("1.1"), int64(1), nil}, {"Int64", NewAny("+1x.1"), int64(0), nonEmptyErr}, {"Int64", NewAny(json.Number("1")), int64(1), nil}, {"Int64", NewAny(json.Number("1.1")), int64(1), nil}, {"Int64", NewAny(json.Number("+1x.1")), int64(0), nonEmptyErr}, {"Int64", NewAny([]byte{0}), int64(0), ErrUnsupportType}, {"Int64", Node{}, int64(0), ErrUnsupportType}, {"Int64", NewRaw("0"), int64(0), nil}, {"Int64", NewRaw("null"), int64(0), nil}, {"StrictInt64", NewRaw("true"), int64(0), ErrUnsupportType}, {"StrictInt64", NewRaw("false"), int64(0), ErrUnsupportType}, {"StrictInt64", NewAny(int(0)), int64(0), nil}, {"StrictInt64", NewAny(int8(0)), int64(0), nil}, {"StrictInt64", NewAny(int16(0)), int64(0), nil}, {"StrictInt64", NewAny(int32(0)), int64(0), nil}, {"StrictInt64", NewAny(int64(0)), int64(0), nil}, {"StrictInt64", NewAny(uint(0)), int64(0), nil}, {"StrictInt64", NewAny(uint8(0)), int64(0), nil}, {"StrictInt64", NewAny(uint32(0)), int64(0), nil}, {"StrictInt64", NewAny(uint64(0)), int64(0), nil}, {"StrictInt64", Node{}, int64(0), ErrUnsupportType}, {"StrictInt64", NewRaw("0"), int64(0), nil}, {"StrictInt64", NewRaw("null"), int64(0), ErrUnsupportType}, {"Float64", NewRaw("true"), float64(1), nil}, {"Float64", NewRaw("false"), float64(0), nil}, {"Float64", NewRaw("\"1.0\""), float64(1.0), nil}, {"Float64", NewRaw("\"xx\""), float64(0), nonEmptyErr}, {"Float64", Node{}, float64(0), ErrUnsupportType}, {"Float64", NewAny(false), float64(0), nil}, {"Float64", NewAny(true), float64(1), nil}, {"Float64", NewAny(int(1)), float64(1), nil}, {"Float64", NewAny(int8(1)), float64(1), nil}, {"Float64", NewAny(int16(1)), float64(1), nil}, {"Float64", NewAny(int32(1)), float64(1), nil}, {"Float64", NewAny(int64(1)), float64(1), nil}, {"Float64", NewAny(uint(1)), float64(1), nil}, {"Float64", NewAny(uint8(1)), float64(1), nil}, {"Float64", NewAny(uint32(1)), float64(1), nil}, {"Float64", NewAny(uint64(1)), float64(1), nil}, {"Float64", NewAny(float32(1)), float64(1), nil}, {"Float64", NewAny(float64(1)), float64(1), nil}, {"Float64", NewAny("1.1"), float64(1.1), nil}, {"Float64", NewAny("+1x.1"), float64(0), nonEmptyErr}, {"Float64", NewAny(json.Number("0")), float64(0), nil}, {"Float64", NewAny(json.Number("x")), float64(0), nonEmptyErr}, {"Float64", NewAny([]byte{0}), float64(0), ErrUnsupportType}, {"Float64", NewRaw("0.0"), float64(0.0), nil}, {"Float64", NewRaw("1"), float64(1.0), nil}, {"Float64", NewRaw("null"), float64(0.0), nil}, {"StrictFloat64", NewRaw("true"), float64(0), ErrUnsupportType}, {"StrictFloat64", NewRaw("false"), float64(0), ErrUnsupportType}, {"StrictFloat64", Node{}, float64(0), ErrUnsupportType}, {"StrictFloat64", NewAny(float32(0)), float64(0), nil}, {"StrictFloat64", NewAny(float64(0)), float64(0), nil}, {"StrictFloat64", NewRaw("0.0"), float64(0.0), nil}, {"StrictFloat64", NewRaw("null"), float64(0.0), ErrUnsupportType}, {"Number", Node{}, json.Number(""), ErrUnsupportType}, {"Number", NewAny(false), json.Number("0"), nil}, {"Number", NewAny(true), json.Number("1"), nil}, {"Number", NewAny(int(1)), json.Number("1"), nil}, {"Number", NewAny(int8(1)), json.Number("1"), nil}, {"Number", NewAny(int16(1)), json.Number("1"), nil}, {"Number", NewAny(int32(1)), json.Number("1"), nil}, {"Number", NewAny(int64(1)), json.Number("1"), nil}, {"Number", NewAny(uint(1)), json.Number("1"), nil}, {"Number", NewAny(uint8(1)), json.Number("1"), nil}, {"Number", NewAny(uint32(1)), json.Number("1"), nil}, {"Number", NewAny(uint64(1)), json.Number("1"), nil}, {"Number", NewAny(float32(1)), json.Number("1"), nil}, {"Number", NewAny(float64(1)), json.Number("1"), nil}, {"Number", NewAny("1.1"), json.Number("1.1"), nil}, {"Number", NewAny("+1x.1"), json.Number(""), nonEmptyErr}, {"Number", NewAny(json.Number("0")), json.Number("0"), nil}, {"Number", NewAny(json.Number("x")), json.Number("x"), nil}, {"Number", NewAny(json.Number("+1x.1")), json.Number("+1x.1"), nil}, {"Number", NewAny([]byte{0}), json.Number(""), ErrUnsupportType}, {"Number", NewRaw("x"), json.Number(""), nonEmptyErr}, {"Number", NewRaw("0.0"), json.Number("0.0"), nil}, {"Number", NewRaw("\"1\""), json.Number("1"), nil}, {"Number", NewRaw("\"1.1\""), json.Number("1.1"), nil}, {"Number", NewRaw("\"0.x0\""), json.Number(""), nonEmptyErr}, {"Number", NewRaw("{]"), json.Number(""), nonEmptyErr}, {"Number", NewRaw("true"), json.Number("1"), nil}, {"Number", NewRaw("false"), json.Number("0"), nil}, {"Number", NewRaw("null"), json.Number("0"), nil}, {"StrictNumber", NewRaw("true"), json.Number(""), ErrUnsupportType}, {"StrictNumber", NewRaw("false"), json.Number(""), ErrUnsupportType}, {"StrictNumber", Node{}, json.Number(""), ErrUnsupportType}, {"StrictNumber", NewAny(json.Number("0")), json.Number("0"), nil}, {"StrictNumber", NewRaw("0.0"), json.Number("0.0"), nil}, {"StrictNumber", NewRaw("null"), json.Number(""), ErrUnsupportType}, {"String", Node{}, "", ErrUnsupportType}, {"String", NewAny(`\u263a`), `\u263a`, nil}, {"String", NewRaw(`"\u263a"`), `☺`, nil}, {"String", NewString(`\u263a`), `\u263a`, nil}, {"String", NewRaw(`0.0`), "0.0", nil}, {"String", NewRaw(`true`), "true", nil}, {"String", NewRaw(`false`), "false", nil}, {"String", NewRaw(`null`), "", nil}, {"String", NewAny(false), "false", nil}, {"String", NewAny(true), "true", nil}, {"String", NewAny(int(1)), "1", nil}, {"String", NewAny(int8(1)), "1", nil}, {"String", NewAny(int16(1)), "1", nil}, {"String", NewAny(int32(1)), "1", nil}, {"String", NewAny(int64(1)), "1", nil}, {"String", NewAny(uint(1)), "1", nil}, {"String", NewAny(uint8(1)), "1", nil}, {"String", NewAny(uint32(1)), "1", nil}, {"String", NewAny(uint64(1)), "1", nil}, {"String", NewAny(float32(1)), "1", nil}, {"String", NewAny(float64(1)), "1", nil}, {"String", NewAny("1.1"), "1.1", nil}, {"String", NewAny("+1x.1"), "+1x.1", nil}, {"String", NewAny(json.Number("0")), ("0"), nil}, {"String", NewAny(json.Number("x")), ("x"), nil}, {"String", NewAny([]byte{0}), (""), ErrUnsupportType}, {"StrictString", Node{}, "", ErrUnsupportType}, {"StrictString", NewAny(`\u263a`), `\u263a`, nil}, {"StrictString", NewRaw(`"\u263a"`), `☺`, nil}, {"StrictString", NewString(`\u263a`), `\u263a`, nil}, {"StrictString", NewRaw(`0.0`), "", ErrUnsupportType}, {"StrictString", NewRaw(`true`), "", ErrUnsupportType}, {"StrictString", NewRaw(`false`), "", ErrUnsupportType}, {"StrictString", NewRaw(`null`), "", ErrUnsupportType}, {"Len", Node{}, 0, nil}, {"Len", NewAny(0), 0, ErrUnsupportType}, {"Len", NewNull(), 0, nil}, {"Len", NewRaw(`"1"`), 1, nil}, {"Len", NewRaw(`[1]`), 0, nil}, {"Len", NewArray([]Node{NewNull()}), 1, nil}, {"Len", lazyArray, 0, nil}, {"Len", NewRaw(`{"a":1}`), 0, nil}, {"Len", lazyObject, 0, nil}, {"Cap", Node{}, 0, nil}, {"Cap", NewAny(0), 0, ErrUnsupportType}, {"Cap", NewNull(), 0, nil}, {"Cap", NewRaw(`[1]`), _DEFAULT_NODE_CAP, nil}, {"Cap", NewObject([]Pair{NewPair("", NewNull())}), _DEFAULT_NODE_CAP, nil}, {"Cap", NewRaw(`{"a":1}`), _DEFAULT_NODE_CAP, nil}, } lazyArray.skipAllIndex() lazyObject.skipAllKey() cases = append(cases, tcase{"Len", lazyArray, _DEFAULT_NODE_CAP + 1, nil}, tcase{"Len", lazyObject, 17, nil}, tcase{"Cap", lazyArray, _DEFAULT_NODE_CAP * 2, nil}, tcase{"Cap", lazyObject, _DEFAULT_NODE_CAP * 2, nil}, ) for i, c := range cases { fmt.Println(i, c) rt := reflect.ValueOf(&c.node) m := rt.MethodByName(c.method) rets := m.Call([]reflect.Value{}) if len(rets) != 2 { t.Error(i, rets) } if !reflect.DeepEqual(rets[0].Interface(), c.exp) { t.Error(i, rets[0].Interface(), c.exp) } v := rets[1].Interface() if c.err == nonEmptyErr { if reflect.ValueOf(v).IsNil() { t.Error(i, v) } } else if v != c.err { t.Error(i, v) } } } func TestCheckError_Nil(t *testing.T) { nill := (*Node)(nil) if nill.Valid() || nill.Check() == nil { t.Fail() } if nill.Get("").Check() == nil { t.Fatal() } if nill.GetByPath("").Check() == nil { t.Fatal() } if nill.Index(1).Check() == nil { t.Fatal() } if nill.IndexOrGet(1, "a").Check() == nil { t.Fatal() } if nill.IndexPair(1) != nil { t.Fatal() } if _, err := nill.Set("", Node{}); err == nil { t.Fatal() } if _, err := nill.SetByIndex(1, Node{}); err == nil { t.Fatal() } if _, err := nill.SetAny("", 1); err == nil { t.Fatal() } if _, err := nill.SetAnyByIndex(1, 1); err == nil { t.Fatal() } if _, err := nill.Unset(""); err == nil { t.Fatal() } if _, err := nill.UnsetByIndex(1); err == nil { t.Fatal() } if err := nill.Add(Node{}); err == nil { t.Fatal() } if err := nill.AddAny(1); err == nil { t.Fatal() } } func TestCheckError_None(t *testing.T) { nill := Node{} if !nill.Valid() || nill.Check() != nil { t.Fail() } if nill.Get("").Check() == nil { t.Fatal() } if nill.GetByPath("").Check() == nil { t.Fatal() } if nill.Index(1).Check() == nil { t.Fatal() } if nill.IndexOrGet(1, "a").Check() == nil { t.Fatal() } if nill.IndexPair(1) != nil { t.Fatal() } nill = Node{} if _, err := nill.Set("a", Node{}); err != nil { t.Fatal() } nill = Node{} if _, err := nill.SetByIndex(0, Node{}); err != nil { t.Fatal() } nill = Node{} if _, err := nill.SetByIndex(1, Node{}); err == nil { t.Fatal() } nill = Node{} if _, err := nill.SetAny("a", 1); err != nil { t.Fatal() } nill = Node{} if _, err := nill.SetAnyByIndex(0, 1); err != nil { t.Fatal() } nill = Node{} if _, err := nill.SetAnyByIndex(1, 1); err == nil { t.Fatal() } nill = Node{} if _, err := nill.Unset(""); err == nil { t.Fatal() } nill = Node{} if _, err := nill.UnsetByIndex(1); err == nil { t.Fatal() } nill = Node{} if err := nill.Add(Node{}); err != nil { t.Fatal() } nill = Node{} if err := nill.AddAny(1); err != nil { t.Fatal() } } func TestCheckError_Error(t *testing.T) { nill := newError(types.ERR_EOF, "") if nill.Valid() || nill.Check() == nil { t.Fail() } if nill.Get("").Check() == nil { t.Fatal() } if nill.GetByPath("").Check() == nil { t.Fatal() } if nill.Index(1).Check() == nil { t.Fatal() } if nill.IndexOrGet(1, "a").Check() == nil { t.Fatal() } if nill.IndexPair(1) != nil { t.Fatal() } if _, err := nill.Set("", Node{}); err == nil { t.Fatal() } if _, err := nill.SetByIndex(1, Node{}); err == nil { t.Fatal() } if _, err := nill.SetAny("", 1); err == nil { t.Fatal() } if _, err := nill.SetAnyByIndex(1, 1); err == nil { t.Fatal() } if _, err := nill.Unset(""); err == nil { t.Fatal() } if _, err := nill.UnsetByIndex(1); err == nil { t.Fatal() } if err := nill.Add(Node{}); err == nil { t.Fatal() } if err := nill.AddAny(1); err == nil { t.Fatal() } } func TestCheckError_Empty(t *testing.T) { empty := Node{} if !empty.Valid() || empty.Check() != nil || empty.Error() != "" { t.Fatal() } n := newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(false) if n.Check() != nil { t.Fatal(n.Check()) } n = newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(true) p := NewParser("[hello]") p.noLazy = true p.skipValue = false _, x := p.Parse() if n.Error() != newSyntaxError(p.syntaxError(x)).Error() { t.Fatal(n.Check()) } s, err := NewParser(`{"a":{}, "b":talse, "c":{}}`).Parse() if err != 0 { t.Fatal(err) } root := s.GetByPath() // fmt.Println(root.Check()) a := root.Get("a") if a.Check() != nil { t.Fatal(a.Check()) } c := root.Get("c") if c.Check() == nil { t.Fatal() } fmt.Println(c.Check()) _, e := a.Properties() if e != nil { t.Fatal(e) } exist, e := a.Set("d", newRawNode("x", types.V_OBJECT, false)) if exist || e != nil { t.Fatal(err) } if a.len() != 1 { t.Fail() } d := a.Get("d").Get("") if d.Check() == nil { t.Fatal(d) } exist, e = a.Set("e", newRawNode("[}", types.V_ARRAY, false)) if e != nil { t.Fatal(e) } if a.len() != 2 { t.Fail() } d = a.Index(1).Index(0) if d.Check() == nil { t.Fatal(d) } it, e := root.Interface() if e == nil { t.Fatal(it) } fmt.Println(e) } func TestIndex(t *testing.T) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { t.Fatalf("decode failed: %v", derr.Error()) } status := root.GetByPath("statuses", 0) x, _ := status.Index(4).String() y, _ := status.Get("id_str").String() if x != y { t.Fail() } } func TestUnset(t *testing.T) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { t.Fatalf("decode failed: %v", derr.Error()) } entities := root.GetByPath("statuses", 0, "entities") if !entities.Exists() || entities.Check() != nil { t.Fatal(entities.Check().Error()) } exist, err := entities.Unset("urls") if !exist || err != nil { t.Fatal() } e := entities.Get("urls") if e.Exists() { t.Fatal() } if entities.len() != 2 { t.Fatal(entities.len()) } es, err := entities.Interface() require.NoError(t, err) require.Equal(t, map[string]interface{}{ "hashtags": []interface{}{ map[string]interface{}{ "text": "freebandnames", "indices": []interface{}{ float64(20), float64(34), }, }, }, "user_mentions": []interface{}{}, }, es) out, err := entities.MarshalJSON() require.NoError(t, err) println(string(out)) buf := bytes.NewBuffer(nil) require.NoError(t, json.Compact(buf, out)) require.Equal(t, `{"hashtags":[{"text":"freebandnames","indices":[20,34]}],"user_mentions":[]}`, buf.String()) entities.Set("urls", NewString("a")) e = entities.Get("urls") x, _ := e.String() if !e.Exists() || x != "a" { t.Fatal() } out, err = entities.MarshalJSON() require.NoError(t, err) buf = bytes.NewBuffer(nil) json.Compact(buf, out) require.Equal(t, `{"hashtags":[{"text":"freebandnames","indices":[20,34]}],"user_mentions":[],"urls":"a"}`, buf.String()) // reload entities *entities = NewRaw(string(out)) hashtags := entities.Get("hashtags").Index(0) hashtags.Set("text2", NewRaw(`{}`)) exist, err = hashtags.Unset("indices") // NOTICE: Unset() won't change node.Len() here if !exist || err != nil || hashtags.len() != 2 { t.Fatal(hashtags.len()) } y, _ := hashtags.Get("text").String() if y != "freebandnames" { t.Fatal() } if hashtags.Get("text2").Type() != V_OBJECT { t.Fatal() } entities.Load() exist, err = entities.UnsetByIndex(entities.len() - 1) if !exist || err != nil { t.Fatal() } if entities.len() != 2 { t.Fatal(entities.len()) } e = entities.Index(entities.len()) if e.Exists() { t.Fatal() } ums := entities.Get("user_mentions") ums.Add(NewNull()) ums.Add(NewBool(false)) ums.Add(NewBool(true)) if ums.len() != 3 { t.Fatal() } exist, err = ums.UnsetByIndex(1) if !exist || err != nil { t.Fatal() } require.Equal(t, 2, ums.len()) umses, err := ums.Interface() require.NoError(t, err) require.Equal(t, []interface{}{interface{}(nil), true}, umses) v1, _ := ums.Index(0).Interface() v2, _ := ums.Index(1).Interface() // NOTICE: unset index 1 still can be find here v3, _ := ums.Index(2).Interface() if v1 != nil { t.Fatal() } if v2 != true { t.Fatal() } if v3 != nil { t.Fatal() } out, err = entities.MarshalJSON() require.NoError(t, err) require.Equal(t, `{"hashtags":[{"text":"freebandnames","text2":{}}],"user_mentions":[null,true]}`, string(out)) } // func TestUnsafeNode(t *testing.T) { // str, loop := getTestIteratorSample(_DEFAULT_NODE_CAP) // root, err := NewSearcher(str).GetByPath("array") // if err != nil { // t.Fatal(err) // } // a, _ := root.UnsafeArray() // if len(a) != loop { // t.Fatalf("exp:%v, got:%v", loop, len(a)) // } // for i := int64(0); i= 0; i-- { root.GetByPath("statuses", 3).Set("id_str"+strconv.Itoa(i), app) } val, _ = root.GetByPath("statuses", 3, "id_str0").Int64() if val != 111 { t.Fatalf("exp: %+v, got: %+v", 111, val) } nroot, derr := NewParser(`{"a":[0.1,true,0,"name",{"b":"c"}]}`).Parse() if derr != 0 { t.Fatalf("decode failed: %v", derr.Error()) } root.GetByPath("statuses", 3).Set("id_str2", nroot) val2, _ := root.GetByPath("statuses", 3, "id_str2", "a", 4, "b").String() if val2 != "c" { t.Fatalf("exp:%+v, got:%+v", "c", val2) } } func TestNodeSetByIndex(t *testing.T) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { t.Fatalf("decode failed: %v", derr.Error()) } app, _ := NewParser("111").Parse() st := root.GetByPath("statuses") st.SetByIndex(0, app) st = root.GetByPath("statuses") val := st.Index(0) x, _ := val.Int64() if x != 111 { t.Fatalf("exp: %+v, got: %+v", 111, val) } nroot, derr := NewParser(`{"a":[0.1,true,0,"name",{"b":"c"}]}`).Parse() if derr != 0 { t.Fatalf("decode failed: %v", derr.Error()) } root.GetByPath("statuses").SetByIndex(0, nroot) val2, _ := root.GetByPath("statuses", 0, "a", 4, "b").String() if val2 != "c" { t.Fatalf("exp:%+v, got:%+v", "c", val2) } } func TestNodeAdd(t *testing.T) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { t.Fatalf("decode failed: %v", derr.Error()) } app, _ := NewParser("111").Parse() for i, _ := root.GetByPath("statuses").Cap(); i >= 0; i-- { root.GetByPath("statuses").Add(app) } val, _ := root.GetByPath("statuses", 4).Int64() if val != 111 { t.Fatalf("exp: %+v, got: %+v", 111, val) } val, _ = root.GetByPath("statuses", root.GetByPath("statuses").len()-1).Int64() if val != 111 { t.Fatalf("exp: %+v, got: %+v", 111, val) } nroot, derr := NewParser(`{"a":[0.1,true,0,"name",{"b":"c"}]}`).Parse() if derr != 0 { t.Fatalf("decode failed: %v", derr.Error()) } root.GetByPath("statuses").Add(nroot) val2, _ := root.GetByPath("statuses", root.GetByPath("statuses").len()-1, "a", 4, "b").String() if val2 != "c" { t.Fatalf("exp:%+v, got:%+v", "c", val2) } } func BenchmarkLoadNode(b *testing.B) { b.Run("Interface()", func(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) if err != nil { b.Fatal(err) } _, _ = root.Interface() } }) b.Run("LoadAll()", func(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) if err != nil { b.Fatal(err) } _ = root.LoadAll() } }) b.Run("InterfaceUseNode()", func(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) if err != nil { b.Fatal(err) } _, _ = root.InterfaceUseNode() } }) b.Run("Load()", func(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) if err != nil { b.Fatal(err) } _ = root.Load() } }) } func BenchmarkLoadNode_Parallel(b *testing.B) { b.Run("Interface()", func(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) if err != nil { b.Fatal(err) } _, _ = root.Interface() } }) }) b.Run("LoadAll()", func(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) if err != nil { b.Fatal(err) } _ = root.LoadAll() } }) }) b.Run("InterfaceUseNode()", func(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) if err != nil { b.Fatal(err) } _, _ = root.InterfaceUseNode() } }) }) b.Run("Load()", func(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) if err != nil { b.Fatal(err) } _ = root.Load() } }) }) } func BenchmarkNodeGetByPath(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } _, _ = root.GetByPath("statuses", 3, "entities", "hashtags", 0, "text").String() b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { _, _ = root.GetByPath("statuses", 3, "entities", "hashtags", 0, "text").String() } }) } func BenchmarkStructGetByPath(b *testing.B) { var root = _TwitterStruct{} err := json.Unmarshal([]byte(_TwitterJson), &root) if err != nil { b.Fatalf("unmarshal failed: %v", err) } b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { _ = root.Statuses[3].Entities.Hashtags[0].Text } }) } func BenchmarkNodeIndex(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags").Index(0) node.Set("test1", NewNumber("1")) node.Set("test2", NewNumber("2")) node.Set("test3", NewNumber("3")) node.Set("test4", NewNumber("4")) node.Set("test5", NewNumber("5")) b.ResetTimer() for i := 0; i < b.N; i++ { node.Index(2) } } func BenchmarkStructIndex(b *testing.B) { type T struct { A Node B Node C Node D Node E Node } var obj = new(T) b.ResetTimer() for i := 0; i < b.N; i++ { _ = obj.C } } func BenchmarkSliceIndex(b *testing.B) { var obj = []Node{Node{}, Node{}, Node{}, Node{}, Node{}} b.ResetTimer() for i := 0; i < b.N; i++ { _ = obj[2] } } func BenchmarkMapIndex(b *testing.B) { var obj = map[string]interface{}{"test1": Node{}, "test2": Node{}, "test3": Node{}, "test4": Node{}, "test5": Node{}} b.ResetTimer() for i := 0; i < b.N; i++ { for k := range obj { if k == "test3" { break } } } } func BenchmarkNodeGet(b *testing.B) { var N = 5 var half = "test" + strconv.Itoa(N/2+1) root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags").Index(0) for i := 0; i < N; i++ { node.Set("test"+strconv.Itoa(i), NewNumber(strconv.Itoa(i))) } b.ResetTimer() for i := 0; i < b.N; i++ { _ = node.Get(half) } } func BenchmarkSliceGet(b *testing.B) { var obj = []string{"test1", "test2", "test3", "test4", "test5"} str := "test3" b.ResetTimer() for i := 0; i < b.N; i++ { for _, k := range obj { if k == str { break } } } } func BenchmarkMapGet(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags").Index(0) node.Set("test1", NewNumber("1")) node.Set("test2", NewNumber("2")) node.Set("test3", NewNumber("3")) node.Set("test4", NewNumber("4")) node.Set("test5", NewNumber("5")) m, _ := node.Map() b.ResetTimer() for i := 0; i < b.N; i++ { _ = m["test3"] } } func BenchmarkNodeSet(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags").Index(0) node.Set("test1", NewNumber("1")) node.Set("test2", NewNumber("2")) node.Set("test3", NewNumber("3")) node.Set("test4", NewNumber("4")) node.Set("test5", NewNumber("5")) n := NewNull() b.ResetTimer() for i := 0; i < b.N; i++ { node.Set("test3", n) } } func BenchmarkMapSet(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags").Index(0) node.Set("test1", NewNumber("1")) node.Set("test2", NewNumber("2")) node.Set("test3", NewNumber("3")) node.Set("test4", NewNumber("4")) node.Set("test5", NewNumber("5")) m, _ := node.Map() n := NewNull() b.ResetTimer() for i := 0; i < b.N; i++ { m["test3"] = n } } func BenchmarkNodeSetByIndex(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags") node.Add(NewNumber("1")) node.Add(NewNumber("2")) node.Add(NewNumber("3")) node.Add(NewNumber("4")) node.Add(NewNumber("5")) n := NewNull() b.ResetTimer() for i := 0; i < b.N; i++ { node.SetByIndex(2, n) } } func BenchmarkSliceSetByIndex(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags") node.Add(NewNumber("1")) node.Add(NewNumber("2")) node.Add(NewNumber("3")) node.Add(NewNumber("4")) node.Add(NewNumber("5")) m, _ := node.Array() n := NewNull() b.ResetTimer() for i := 0; i < b.N; i++ { m[2] = n } } func BenchmarkStructSetByIndex(b *testing.B) { type T struct { A Node B Node C Node D Node E Node } var obj = new(T) n := NewNull() b.ResetTimer() for i := 0; i < b.N; i++ { obj.C = n } } func BenchmarkNodeUnset(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags").Index(0) node.Set("test1", NewNumber("1")) node.Set("test2", NewNumber("2")) node.Set("test3", NewNumber("3")) node.Set("test4", NewNumber("4")) node.Set("test5", NewNumber("5")) b.ResetTimer() for i := 0; i < b.N; i++ { node.Unset("test3") } } func BenchmarkMapUnset(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags").Index(0) node.Set("test1", NewNumber("1")) node.Set("test2", NewNumber("2")) node.Set("test3", NewNumber("3")) node.Set("test4", NewNumber("4")) node.Set("test5", NewNumber("5")) m, _ := node.Map() b.ResetTimer() for i := 0; i < b.N; i++ { delete(m, "test3") } } func BenchmarkNodUnsetByIndex(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags") node.Add(NewNumber("1")) node.Add(NewNumber("2")) node.Add(NewNumber("3")) node.Add(NewNumber("4")) node.Add(NewNumber("5")) b.ResetTimer() for i := 0; i < b.N; i++ { node.UnsetByIndex(2) } } func BenchmarkSliceUnsetByIndex(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) } node := root.Get("statuses").Index(3).Get("entities").Get("hashtags") node.Add(NewNumber("1")) node.Add(NewNumber("2")) node.Add(NewNumber("3")) node.Add(NewNumber("4")) node.Add(NewNumber("5")) m, _ := node.Array() b.ResetTimer() for i := 0; i < b.N; i++ { for i := 3; i < 5; i++ { m[i-1] = m[i] } } } func BenchmarkNodeAdd(b *testing.B) { n := NewObject([]Pair{NewPair("test", NewNumber("1"))}) b.ResetTimer() for i := 0; i < b.N; i++ { node := NewArray([]Node{}) node.Add(n) } } func BenchmarkSliceAdd(b *testing.B) { n := NewObject([]Pair{NewPair("test", NewNumber("1"))}) b.ResetTimer() for i := 0; i < b.N; i++ { node := []Node{} node = append(node, n) } } func BenchmarkMapAdd(b *testing.B) { n := NewObject([]Pair{NewPair("test", NewNumber("1"))}) b.ResetTimer() for i := 0; i < b.N; i++ { node := map[string]Node{} node["test3"] = n } } func TestNode_Move(t *testing.T) { var us = NewRaw(`["a","1","b","c"]`) if ex, e := us.UnsetByIndex(1); !ex || e != nil { t.Fail() } var us2 = NewRaw(`["a","b","c","1"]`) if ex, e := us2.UnsetByIndex(3); !ex || e != nil { t.Fail() } tests := []struct { name string in Node src int dst int out Node wantErr bool }{ { name: "over index", in: NewArray([]Node{}), src: 0, dst: 1, out: NewArray([]Node{}), wantErr: false, }, { name: "equal index", in: NewArray([]Node{NewBool(true)}), src: 0, dst: 0, out: NewArray([]Node{NewBool(true)}), wantErr: false, }, { name: "forward", in: NewArray([]Node{NewString("a"), NewString("b"), NewString("c")}), src: 0, dst: 2, out: NewArray([]Node{NewString("b"), NewString("c"), NewString("a")}), wantErr: false, }, { name: "backward", in: NewArray([]Node{NewString("a"), NewString("b"), NewString("c")}), src: 2, dst: 0, out: NewArray([]Node{NewString("c"), NewString("a"), NewString("b")}), wantErr: false, }, { name: "lazy", in: NewRaw(`["a","b","c"]`), src: 2, dst: 0, out: NewArray([]Node{NewString("c"), NewString("a"), NewString("b")}), wantErr: false, }, { name: "unset back", in: us, src: 2, dst: 0, out: NewArray([]Node{NewString("c"), NewString("a"), NewString("b")}), wantErr: false, }, { name: "unset forward", in: us2, src: 0, dst: 2, out: NewArray([]Node{NewString("b"), NewString("c"), NewString("a")}), wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { err := tt.in.Move(tt.dst, tt.src) require.NoError(t, err) ej, _ := tt.out.MarshalJSON() aj, _ := tt.in.MarshalJSON() require.Equal(t, string(ej), string(aj)) }) } } func TestNode_Pop(t *testing.T) { var us = NewRaw(`[1,2,3]`) if ex, e := us.UnsetByIndex(0); !ex || e != nil { t.Fail() } var us2 = NewRaw(`[1,2,3]`) if ex, e := us2.UnsetByIndex(2); !ex || e != nil { t.Fail() } tests := []struct { name string in Node out Node wantErr bool }{ { name: "empty", in: NewArray([]Node{}), out: NewArray([]Node{}), wantErr: false, }, { name: "one", in: NewArray([]Node{NewString("a")}), out: NewArray([]Node{}), wantErr: false, }, { name: "raw", in: NewRaw(`[1]`), out: NewArray([]Node{}), wantErr: false, }, { name: "unset head", in: us, out: NewRaw(`[2]`), wantErr: false, }, { name: "unset tail", in: us2, out: NewRaw(`[1]`), wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if err := tt.in.Pop(); (err != nil) != tt.wantErr { t.Errorf("Node.Pop() error = %v, wantErr %v", err, tt.wantErr) } ej, _ := tt.out.MarshalJSON() aj, _ := tt.in.MarshalJSON() require.Equal(t, string(ej), string(aj)) }) } } ================================================ FILE: ast/parser.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "fmt" "sync" "sync/atomic" "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/internal/rt" "github.com/bytedance/sonic/internal/utils" "github.com/bytedance/sonic/unquote" ) const ( _DEFAULT_NODE_CAP int = 16 _APPEND_GROW_SHIFT = 1 ) const ( _ERR_NOT_FOUND types.ParsingError = 33 _ERR_UNSUPPORT_TYPE types.ParsingError = 34 ) var ( // ErrNotExist means both key and value doesn't exist ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists") // ErrUnsupportType means API on the node is unsupported ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type") ) type Parser struct { p int s string noLazy bool loadOnce bool skipValue bool dbuf *byte } /** Parser Private Methods **/ func (self *Parser) delim() types.ParsingError { n := len(self.s) p := self.lspace(self.p) /* check for EOF */ if p >= n { return types.ERR_EOF } /* check for the delimiter */ if self.s[p] != ':' { return types.ERR_INVALID_CHAR } /* update the read pointer */ self.p = p + 1 return 0 } func (self *Parser) object() types.ParsingError { n := len(self.s) p := self.lspace(self.p) /* check for EOF */ if p >= n { return types.ERR_EOF } /* check for the delimiter */ if self.s[p] != '{' { return types.ERR_INVALID_CHAR } /* update the read pointer */ self.p = p + 1 return 0 } func (self *Parser) array() types.ParsingError { n := len(self.s) p := self.lspace(self.p) /* check for EOF */ if p >= n { return types.ERR_EOF } /* check for the delimiter */ if self.s[p] != '[' { return types.ERR_INVALID_CHAR } /* update the read pointer */ self.p = p + 1 return 0 } func (self *Parser) lspace(sp int) int { ns := len(self.s) for ; sp < ns && utils.IsSpace(self.s[sp]); sp += 1 { } return sp } func (self *Parser) backward() { for ; self.p >= 0 && utils.IsSpace(self.s[self.p]); self.p -= 1 { } } func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { sp := self.p ns := len(self.s) /* check for EOF */ if self.p = self.lspace(sp); self.p >= ns { return Node{}, types.ERR_EOF } /* check for empty array */ if self.s[self.p] == ']' { self.p++ return Node{t: types.V_ARRAY}, 0 } /* allocate array space and parse every element */ for { var val Node var err types.ParsingError if self.skipValue { /* skip the value */ var start int if start, err = self.skipFast(); err != 0 { return Node{}, err } if self.p > ns { return Node{}, types.ERR_EOF } t := switchRawType(self.s[start]) if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } val = newRawNode(self.s[start:self.p], t, false) } else { /* decode the value */ if val, err = self.Parse(); err != 0 { return Node{}, err } } /* add the value to result */ ret.Push(val) self.p = self.lspace(self.p) /* check for EOF */ if self.p >= ns { return Node{}, types.ERR_EOF } /* check for the next character */ switch self.s[self.p] { case ',': self.p++ case ']': self.p++ return newArray(ret), 0 default: // if val.isLazy() { // return newLazyArray(self, ret), 0 // } return Node{}, types.ERR_INVALID_CHAR } } } func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { sp := self.p ns := len(self.s) /* check for EOF */ if self.p = self.lspace(sp); self.p >= ns { return Node{}, types.ERR_EOF } /* check for empty object */ if self.s[self.p] == '}' { self.p++ return Node{t: types.V_OBJECT}, 0 } /* decode each pair */ for { var val Node var njs types.JsonState var err types.ParsingError /* decode the key */ if njs = self.decodeValue(); njs.Vt != types.V_STRING { return Node{}, types.ERR_INVALID_CHAR } /* extract the key */ idx := self.p - 1 key := self.s[njs.Iv:idx] /* check for escape sequence */ if njs.Ep != -1 { if key, err = unquote.String(key); err != 0 { return Node{}, err } } /* expect a ':' delimiter */ if err = self.delim(); err != 0 { return Node{}, err } if self.skipValue { /* skip the value */ var start int if start, err = self.skipFast(); err != 0 { return Node{}, err } if self.p > ns { return Node{}, types.ERR_EOF } t := switchRawType(self.s[start]) if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } val = newRawNode(self.s[start:self.p], t, false) } else { /* decode the value */ if val, err = self.Parse(); err != 0 { return Node{}, err } } /* add the value to result */ // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !! ret.Push(NewPair(key, val)) self.p = self.lspace(self.p) /* check for EOF */ if self.p >= ns { return Node{}, types.ERR_EOF } /* check for the next character */ switch self.s[self.p] { case ',': self.p++ case '}': self.p++ return newObject(ret), 0 default: // if val.isLazy() { // return newLazyObject(self, ret), 0 // } return Node{}, types.ERR_INVALID_CHAR } } } func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) { p := self.p - 1 s := self.s[iv:p] /* fast path: no escape sequence */ if ep == -1 { return NewString(s), 0 } /* unquote the string */ out, err := unquote.String(s) /* check for errors */ if err != 0 { return Node{}, err } else { return newBytes(rt.Str2Mem(out)), 0 } } /** Parser Interface **/ func (self *Parser) Pos() int { return self.p } // Parse returns a ast.Node representing the parser's JSON. // NOTICE: the specific parsing lazy dependens parser's option // It only parse first layer and first child for Object or Array be default func (self *Parser) Parse() (Node, types.ParsingError) { switch val := self.decodeValue(); val.Vt { case types.V_EOF: return Node{}, types.ERR_EOF case types.V_NULL: return nullNode, 0 case types.V_TRUE: return trueNode, 0 case types.V_FALSE: return falseNode, 0 case types.V_STRING: return self.decodeString(val.Iv, val.Ep) case types.V_ARRAY: s := self.p - 1 if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' { self.p = p + 1 return Node{t: types.V_ARRAY}, 0 } if self.noLazy { if self.loadOnce { self.noLazy = false } return self.decodeArray(new(linkedNodes)) } // NOTICE: loadOnce always keep raw json for object or array if self.loadOnce { self.p = s s, e := self.skipFast() if e != 0 { return Node{}, e } return newRawNode(self.s[s:self.p], types.V_ARRAY, true), 0 } return newLazyArray(self), 0 case types.V_OBJECT: s := self.p - 1 if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' { self.p = p + 1 return Node{t: types.V_OBJECT}, 0 } // NOTICE: loadOnce always keep raw json for object or array if self.noLazy { if self.loadOnce { self.noLazy = false } return self.decodeObject(new(linkedPairs)) } if self.loadOnce { self.p = s s, e := self.skipFast() if e != 0 { return Node{}, e } return newRawNode(self.s[s:self.p], types.V_OBJECT, true), 0 } return newLazyObject(self), 0 case types.V_DOUBLE: return NewNumber(self.s[val.Ep:self.p]), 0 case types.V_INTEGER: return NewNumber(self.s[val.Ep:self.p]), 0 default: return Node{}, types.ParsingError(-val.Vt) } } func (self *Parser) searchKey(match string) types.ParsingError { ns := len(self.s) if err := self.object(); err != 0 { return err } /* check for EOF */ if self.p = self.lspace(self.p); self.p >= ns { return types.ERR_EOF } /* check for empty object */ if self.s[self.p] == '}' { self.p++ return _ERR_NOT_FOUND } var njs types.JsonState var err types.ParsingError /* decode each pair */ for { /* decode the key */ if njs = self.decodeValue(); njs.Vt != types.V_STRING { return types.ERR_INVALID_CHAR } /* extract the key */ idx := self.p - 1 key := self.s[njs.Iv:idx] /* check for escape sequence */ if njs.Ep != -1 { if key, err = unquote.String(key); err != 0 { return err } } /* expect a ':' delimiter */ if err = self.delim(); err != 0 { return err } /* skip value */ if key != match { if _, err = self.skipFast(); err != 0 { return err } } else { return 0 } /* check for EOF */ self.p = self.lspace(self.p) if self.p >= ns { return types.ERR_EOF } /* check for the next character */ switch self.s[self.p] { case ',': self.p++ case '}': self.p++ return _ERR_NOT_FOUND default: return types.ERR_INVALID_CHAR } } } func (self *Parser) searchIndex(idx int) types.ParsingError { ns := len(self.s) if err := self.array(); err != 0 { return err } /* check for EOF */ if self.p = self.lspace(self.p); self.p >= ns { return types.ERR_EOF } /* check for empty array */ if self.s[self.p] == ']' { self.p++ return _ERR_NOT_FOUND } var err types.ParsingError /* allocate array space and parse every element */ for i := 0; i < idx; i++ { /* decode the value */ if _, err = self.skipFast(); err != 0 { return err } /* check for EOF */ self.p = self.lspace(self.p) if self.p >= ns { return types.ERR_EOF } /* check for the next character */ switch self.s[self.p] { case ',': self.p++ case ']': self.p++ return _ERR_NOT_FOUND default: return types.ERR_INVALID_CHAR } } return 0 } func (self *Node) skipNextNode() *Node { if !self.isLazy() { return nil } parser, stack := self.getParserAndArrayStack() ret := &stack.v sp := parser.p ns := len(parser.s) /* check for EOF */ if parser.p = parser.lspace(sp); parser.p >= ns { return newSyntaxError(parser.syntaxError(types.ERR_EOF)) } /* check for empty array */ if parser.s[parser.p] == ']' { parser.p++ self.setArray(ret) return nil } var val Node /* skip the value */ if start, err := parser.skipFast(); err != 0 { return newSyntaxError(parser.syntaxError(err)) } else { t := switchRawType(parser.s[start]) if t == _V_NONE { return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) } val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ ret.Push(val) self.l++ parser.p = parser.lspace(parser.p) /* check for EOF */ if parser.p >= ns { return newSyntaxError(parser.syntaxError(types.ERR_EOF)) } /* check for the next character */ switch parser.s[parser.p] { case ',': parser.p++ return ret.At(ret.Len() - 1) case ']': parser.p++ self.setArray(ret) return ret.At(ret.Len() - 1) default: return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) } } func (self *Node) skipNextPair() *Pair { if !self.isLazy() { return nil } parser, stack := self.getParserAndObjectStack() ret := &stack.v sp := parser.p ns := len(parser.s) /* check for EOF */ if parser.p = parser.lspace(sp); parser.p >= ns { return newErrorPair(parser.syntaxError(types.ERR_EOF)) } /* check for empty object */ if parser.s[parser.p] == '}' { parser.p++ self.setObject(ret) return nil } /* decode one pair */ var val Node var njs types.JsonState var err types.ParsingError /* decode the key */ if njs = parser.decodeValue(); njs.Vt != types.V_STRING { return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } /* extract the key */ idx := parser.p - 1 key := parser.s[njs.Iv:idx] /* check for escape sequence */ if njs.Ep != -1 { if key, err = unquote.String(key); err != 0 { return newErrorPair(parser.syntaxError(err)) } } /* expect a ':' delimiter */ if err = parser.delim(); err != 0 { return newErrorPair(parser.syntaxError(err)) } /* skip the value */ if start, err := parser.skipFast(); err != 0 { return newErrorPair(parser.syntaxError(err)) } else { t := switchRawType(parser.s[start]) if t == _V_NONE { return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ ret.Push(NewPair(key, val)) self.l++ parser.p = parser.lspace(parser.p) /* check for EOF */ if parser.p >= ns { return newErrorPair(parser.syntaxError(types.ERR_EOF)) } /* check for the next character */ switch parser.s[parser.p] { case ',': parser.p++ return ret.At(ret.Len() - 1) case '}': parser.p++ self.setObject(ret) return ret.At(ret.Len() - 1) default: return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } } /** Parser Factory **/ // Loads parse all json into interface{} func Loads(src string) (int, interface{}, error) { ps := &Parser{s: src} np, err := ps.Parse() /* check for errors */ if err != 0 { return 0, nil, ps.ExportError(err) } else { x, err := np.Interface() if err != nil { return 0, nil, err } return ps.Pos(), x, nil } } // LoadsUseNumber parse all json into interface{}, with numeric nodes cast to json.Number func LoadsUseNumber(src string) (int, interface{}, error) { ps := &Parser{s: src} np, err := ps.Parse() /* check for errors */ if err != 0 { return 0, nil, err } else { x, err := np.InterfaceUseNumber() if err != nil { return 0, nil, err } return ps.Pos(), x, nil } } // NewParser returns pointer of new allocated parser func NewParser(src string) *Parser { return &Parser{s: src} } // NewParser returns new allocated parser func NewParserObj(src string) Parser { return Parser{s: src} } // decodeNumber controls if parser decodes the number values instead of skip them // // WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser // otherwise the memory CANNOT be reused func (self *Parser) decodeNumber(decode bool) { if !decode && self.dbuf != nil { types.FreeDbuf(self.dbuf) self.dbuf = nil return } if decode && self.dbuf == nil { self.dbuf = types.NewDbuf() } } // ExportError converts types.ParsingError to std Error func (self *Parser) ExportError(err types.ParsingError) error { if err == _ERR_NOT_FOUND { return ErrNotExist } return fmt.Errorf("%q", SyntaxError{ Pos: self.p, Src: self.s, Code: err, }.Description()) } func backward(src string, i int) int { for ; i >= 0 && utils.IsSpace(src[i]); i-- { } return i } func newRawNode(str string, typ types.ValueType, lock bool) Node { ret := Node{ t: typ | _V_RAW, p: rt.StrPtr(str), l: uint(len(str)), } if lock { ret.m = new(sync.RWMutex) } return ret } var typeJumpTable = [256]types.ValueType{ '"': types.V_STRING, '-': _V_NUMBER, '0': _V_NUMBER, '1': _V_NUMBER, '2': _V_NUMBER, '3': _V_NUMBER, '4': _V_NUMBER, '5': _V_NUMBER, '6': _V_NUMBER, '7': _V_NUMBER, '8': _V_NUMBER, '9': _V_NUMBER, '[': types.V_ARRAY, 'f': types.V_FALSE, 'n': types.V_NULL, 't': types.V_TRUE, '{': types.V_OBJECT, } func switchRawType(c byte) types.ValueType { return typeJumpTable[c] } func (self *Node) loadt() types.ValueType { return (types.ValueType)(atomic.LoadInt64(&self.t)) } func (self *Node) lock() bool { if m := self.m; m != nil { m.Lock() return true } return false } func (self *Node) unlock() { if m := self.m; m != nil { m.Unlock() } } func (self *Node) rlock() bool { if m := self.m; m != nil { m.RLock() return true } return false } func (self *Node) runlock() { if m := self.m; m != nil { m.RUnlock() } } ================================================ FILE: ast/parser_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "encoding/json" "os" "runtime" "runtime/debug" "sync" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) var ( debugSyncGC = os.Getenv("SONIC_SYNC_GC") != "" debugAsyncGC = os.Getenv("SONIC_NO_ASYNC_GC") == "" ) func TestMain(m *testing.M) { go func() { if !debugAsyncGC { return } println("Begin GC looping...") for { runtime.GC() debug.FreeOSMemory() } println("stop GC looping!") }() time.Sleep(time.Millisecond) m.Run() } func TestGC_Parse(t *testing.T) { if debugSyncGC { return } _, _, err := Loads(_TwitterJson) if err != nil { t.Fatal(err) } wg := &sync.WaitGroup{} N := 1000 for i := 0; i < N; i++ { wg.Add(1) go func(wg *sync.WaitGroup) { defer wg.Done() _, _, err := Loads(_TwitterJson) if err != nil { t.Error(err) return } runtime.GC() }(wg) } wg.Wait() } func runDecoderTest(t *testing.T, src string, expect interface{}) { vv, err := NewParser(src).Parse() if err != 0 { panic(err) } x, _ := vv.Interface() assert.Equal(t, expect, x) } func runDecoderTestUseNumber(t *testing.T, src string, expect interface{}) { vv, err := NewParser(src).Parse() if err != 0 { panic(err) } vvv, _ := vv.InterfaceUseNumber() switch vvv.(type) { case json.Number: assert.Equal(t, expect, n2f64(vvv.(json.Number))) case []interface{}: x := vvv.([]interface{}) for i, e := range x { if ev, ok := e.(json.Number); ok { x[i] = n2f64(ev) } } assert.Equal(t, expect, x) case map[string]interface{}: x := vvv.(map[string]interface{}) for k, v := range x { if ev, ok := v.(json.Number); ok { x[k] = n2f64(ev) } } assert.Equal(t, expect, x) } } func n2f64(i json.Number) float64 { x, err := i.Float64() if err != nil { panic(err) } return x } func TestParser_Basic(t *testing.T) { runDecoderTest(t, `null`, nil) runDecoderTest(t, `true`, true) runDecoderTest(t, `false`, false) runDecoderTest(t, `"hello, world \\ \/ \b \f \n \r \t \u666f 测试中文"`, "hello, world \\ / \b \f \n \r \t \u666f 测试中文") runDecoderTest(t, `"\ud83d\ude00"`, "😀") runDecoderTest(t, `0`, float64(0)) runDecoderTest(t, `-0`, float64(0)) runDecoderTest(t, `123456`, float64(123456)) runDecoderTest(t, `-12345`, float64(-12345)) runDecoderTest(t, `0.2`, 0.2) runDecoderTest(t, `1.2`, 1.2) runDecoderTest(t, `-0.2`, -0.2) runDecoderTest(t, `-1.2`, -1.2) runDecoderTest(t, `0e12`, 0e12) runDecoderTest(t, `0e+12`, 0e+12) runDecoderTest(t, `0e-12`, 0e-12) runDecoderTest(t, `-0e12`, -0e12) runDecoderTest(t, `-0e+12`, -0e+12) runDecoderTest(t, `-0e-12`, -0e-12) runDecoderTest(t, `2e12`, 2e12) runDecoderTest(t, `2E12`, 2e12) runDecoderTest(t, `2e+12`, 2e+12) runDecoderTest(t, `2e-12`, 2e-12) runDecoderTest(t, `-2e12`, -2e12) runDecoderTest(t, `-2e+12`, -2e+12) runDecoderTest(t, `-2e-12`, -2e-12) runDecoderTest(t, `0.2e12`, 0.2e12) runDecoderTest(t, `0.2e+12`, 0.2e+12) runDecoderTest(t, `0.2e-12`, 0.2e-12) runDecoderTest(t, `-0.2e12`, -0.2e12) runDecoderTest(t, `-0.2e+12`, -0.2e+12) runDecoderTest(t, `-0.2e-12`, -0.2e-12) runDecoderTest(t, `1.2e12`, 1.2e12) runDecoderTest(t, `1.2e+12`, 1.2e+12) runDecoderTest(t, `1.2e-12`, 1.2e-12) runDecoderTest(t, `-1.2e12`, -1.2e12) runDecoderTest(t, `-1.2e+12`, -1.2e+12) runDecoderTest(t, `-1.2e-12`, -1.2e-12) runDecoderTest(t, `-1.2E-12`, -1.2e-12) runDecoderTest(t, `[]`, []interface{}{}) runDecoderTest(t, `{}`, map[string]interface{}{}) runDecoderTest(t, `["asd", "123", true, false, null, 2.4, 1.2e15]`, []interface{}{"asd", "123", true, false, nil, 2.4, 1.2e15}) runDecoderTest(t, `{"asdf": "qwer", "zxcv": true}`, map[string]interface{}{"asdf": "qwer", "zxcv": true}) runDecoderTest(t, `{"a": "123", "b": true, "c": false, "d": null, "e": 2.4, "f": 1.2e15, "g": 1}`, map[string]interface{}{"a": "123", "b": true, "c": false, "d": nil, "e": 2.4, "f": 1.2e15, "g": float64(1)}) runDecoderTestUseNumber(t, `null`, nil) runDecoderTestUseNumber(t, `true`, true) runDecoderTestUseNumber(t, `false`, false) runDecoderTestUseNumber(t, `"hello, world \\ \/ \b \f \n \r \t \u666f 测试中文"`, "hello, world \\ / \b \f \n \r \t \u666f 测试中文") runDecoderTestUseNumber(t, `"\ud83d\ude00"`, "😀") runDecoderTestUseNumber(t, `0`, float64(0)) runDecoderTestUseNumber(t, `-0`, float64(0)) runDecoderTestUseNumber(t, `123456`, float64(123456)) runDecoderTestUseNumber(t, `-12345`, float64(-12345)) runDecoderTestUseNumber(t, `0.2`, 0.2) runDecoderTestUseNumber(t, `1.2`, 1.2) runDecoderTestUseNumber(t, `-0.2`, -0.2) runDecoderTestUseNumber(t, `-1.2`, -1.2) runDecoderTestUseNumber(t, `0e12`, 0e12) runDecoderTestUseNumber(t, `0e+12`, 0e+12) runDecoderTestUseNumber(t, `0e-12`, 0e-12) runDecoderTestUseNumber(t, `-0e12`, -0e12) runDecoderTestUseNumber(t, `-0e+12`, -0e+12) runDecoderTestUseNumber(t, `-0e-12`, -0e-12) runDecoderTestUseNumber(t, `2e12`, 2e12) runDecoderTestUseNumber(t, `2E12`, 2e12) runDecoderTestUseNumber(t, `2e+12`, 2e+12) runDecoderTestUseNumber(t, `2e-12`, 2e-12) runDecoderTestUseNumber(t, `-2e12`, -2e12) runDecoderTestUseNumber(t, `-2e+12`, -2e+12) runDecoderTestUseNumber(t, `-2e-12`, -2e-12) runDecoderTestUseNumber(t, `0.2e12`, 0.2e12) runDecoderTestUseNumber(t, `0.2e+12`, 0.2e+12) runDecoderTestUseNumber(t, `0.2e-12`, 0.2e-12) runDecoderTestUseNumber(t, `-0.2e12`, -0.2e12) runDecoderTestUseNumber(t, `-0.2e+12`, -0.2e+12) runDecoderTestUseNumber(t, `-0.2e-12`, -0.2e-12) runDecoderTestUseNumber(t, `1.2e12`, 1.2e12) runDecoderTestUseNumber(t, `1.2e+12`, 1.2e+12) runDecoderTestUseNumber(t, `1.2e-12`, 1.2e-12) runDecoderTestUseNumber(t, `-1.2e12`, -1.2e12) runDecoderTestUseNumber(t, `-1.2e+12`, -1.2e+12) runDecoderTestUseNumber(t, `-1.2e-12`, -1.2e-12) runDecoderTestUseNumber(t, `-1.2E-12`, -1.2e-12) runDecoderTestUseNumber(t, `["asd", "123", true, false, null, 2.4, 1.2e15, 1]`, []interface{}{"asd", "123", true, false, nil, 2.4, 1.2e15, float64(1)}) runDecoderTestUseNumber(t, `{"a": "123", "b": true, "c": false, "d": null, "e": 2.4, "f": 1.2e15, "g": 1}`, map[string]interface{}{"a": "123", "b": true, "c": false, "d": nil, "e": 2.4, "f": 1.2e15, "g": float64(1)}) } func TestLoads(t *testing.T) { _, i, e := Loads(`{"a": "123", "b": true, "c": false, "d": null, "e": 2.4, "f": 1.2e15, "g": 1}`) if e != nil { t.Fatal(e) } assert.Equal(t, map[string]interface{}{"a": "123", "b": true, "c": false, "d": nil, "e": 2.4, "f": 1.2e15, "g": float64(1)}, i) _, i, e = LoadsUseNumber(`{"a": "123", "b": true, "c": false, "d": null, "e": 2.4, "f": 1.2e15, "g": 1}`) if e != nil { t.Fatal(e) } assert.Equal(t, map[string]interface{}{"a": "123", "b": true, "c": false, "d": nil, "e": json.Number("2.4"), "f": json.Number("1.2e15"), "g": json.Number("1")}, i) } func TestParsehNotExist(t *testing.T) { s, err := NewParser(` { "xx" : [ 0, "" ] ,"yy" :{ "2": "" } } `).Parse() if err != 0 { t.Fatal(err) } node := s.GetByPath("xx", 2) if node.Exists() { t.Fatalf("node: %v", node) } node = s.GetByPath("xx", 1) if !node.Exists() { t.Fatalf("node: %v", nil) } node = s.GetByPath("yy", "3") if node.Exists() { t.Fatalf("node: %v", node) } node = s.GetByPath("yy", "2") if !node.Exists() { t.Fatalf("node: %v", nil) } } func BenchmarkParser_Sonic(b *testing.B) { r, err := NewParser(_TwitterJson).Parse() if err != 0 { b.Fatal(err) } if err := r.LoadAll(); err != nil { b.Fatal(err) } b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { r, _ = NewParser(_TwitterJson).Parse() _ = r.LoadAll() } } func BenchmarkParser_Parallel_Sonic(b *testing.B) { r, _ := NewParser(_TwitterJson).Parse() if err := r.LoadAll(); err != nil { b.Fatal(err) } b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { r, _ := NewParser(_TwitterJson).Parse() _ = r.LoadAll() } }) } func BenchmarkParseEmpty_Sonic(b *testing.B) { var emptySample = `{"a":[],"b":{},"c":[{},{},{},{}],"d":{"e":[],"f":[],"g":[],"h":[]}}` p := NewParserObj(emptySample) ast, _ := p.Parse() require.NoError(b, ast.LoadAll()) b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { p := NewParserObj(emptySample) ast, _ := p.Parse() _ = ast.LoadAll() } } func BenchmarkParseOne_Sonic(b *testing.B) { ast, _ := NewParser(_TwitterJson).Parse() node, _ := ast.Get("statuses").Index(2).Get("id").Int64() if node != 249289491129438208 { b.Fail() } b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { ast, _ := NewParser(_TwitterJson).Parse() _, _ = ast.Get("statuses").Index(2).Get("id").Int64() } } func BenchmarkParseOne_Parallel_Sonic(b *testing.B) { ast, _ := NewParser(_TwitterJson).Parse() node, _ := ast.Get("statuses").Index(2).Get("id").Int64() if node != 249289491129438208 { b.Fail() } b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { ast, _ := NewParser(_TwitterJson).Parse() _, _ = ast.Get("statuses").Index(2).Get("id").Int64() } }) } func BenchmarkParseNoLazy_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() ast := NewRawConcurrentRead(_TwitterJson) for i := 0; i < b.N; i++ { node := ast.GetByPath("statuses", 3) if node.Check() != nil { b.Fail() } } } func BenchmarkParseNoLazy_Parallel_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() ast := NewRawConcurrentRead(_TwitterJson) b.RunParallel(func(p *testing.PB) { for p.Next() { node := ast.GetByPath("statuses", 3) if node.Check() != nil { b.Fail() } } }) } func BenchmarkNodeRaw_Parallel_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() ast := NewRawConcurrentRead(_TwitterJson) b.RunParallel(func(p *testing.PB) { for p.Next() { node := ast.GetByPath("statuses", 3) if _, e := node.Raw(); e != nil { b.Fatal(e) } } }) } func BenchmarkParseSeven_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { ast, _ := NewParser(_TwitterJson).Parse() node := ast.GetByPath("statuses", 3, "id") node = ast.GetByPath("statuses", 3, "user", "entities", "description") node = ast.GetByPath("statuses", 3, "user", "entities", "url", "urls") node = ast.GetByPath("statuses", 3, "user", "entities", "url") node = ast.GetByPath("statuses", 3, "user", "created_at") node = ast.GetByPath("statuses", 3, "user", "name") node = ast.GetByPath("statuses", 3, "text") if node.Check() != nil { b.Fail() } } } func BenchmarkParseSeven_Parallel_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { ast, _ := NewParser(_TwitterJson).Parse() node := ast.GetByPath("statuses", 3, "id") node = ast.GetByPath("statuses", 3, "user", "entities", "description") node = ast.GetByPath("statuses", 3, "user", "entities", "url", "urls") node = ast.GetByPath("statuses", 3, "user", "entities", "url") node = ast.GetByPath("statuses", 3, "user", "created_at") node = ast.GetByPath("statuses", 3, "user", "name") node = ast.GetByPath("statuses", 3, "text") if node.Check() != nil { b.Fail() } } }) } ================================================ FILE: ast/search.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/internal/rt" ) // SearchOptions controls Searcher's behavior type SearchOptions struct { // ValidateJSON indicates the searcher to validate the entire JSON ValidateJSON bool // CopyReturn indicates the searcher to copy the result JSON instead of refer from the input // This can help to reduce memory usage if you cache the results CopyReturn bool // ConcurrentRead indicates the searcher to return a concurrently-READ-safe node, // including: GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON ConcurrentRead bool } type Searcher struct { parser Parser SearchOptions } func NewSearcher(str string) *Searcher { return &Searcher{ parser: Parser{ s: str, noLazy: false, }, SearchOptions: SearchOptions{ ValidateJSON: true, }, } } // GetByPathCopy search in depth from top json and returns a **Copied** json node at the path location func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { self.CopyReturn = true return self.getByPath(path...) } // GetByPathNoCopy search in depth from top json and returns a **Referenced** json node at the path location // // WARN: this search directly refer partial json from top json, which has faster speed, // may consumes more memory. func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { return self.getByPath(path...) } func (self *Searcher) getByPath(path ...interface{}) (Node, error) { var err types.ParsingError var start int self.parser.p = 0 start, err = self.parser.getByPath(self.ValidateJSON, path...) if err != 0 { // for compatibility with old version if err == types.ERR_NOT_FOUND { return Node{}, ErrNotExist } if err == types.ERR_UNSUPPORT_TYPE { panic("path must be either int(>=0) or string") } return Node{}, self.parser.syntaxError(err) } t := switchRawType(self.parser.s[start]) if t == _V_NONE { return Node{}, self.parser.ExportError(err) } // copy string to reducing memory usage var raw string if self.CopyReturn { raw = rt.Mem2Str([]byte(self.parser.s[start:self.parser.p])) } else { raw = self.parser.s[start:self.parser.p] } return newRawNode(raw, t, self.ConcurrentRead), nil } // GetByPath searches a path and returns relaction and types of target func _GetByPath(src string, path ...interface{}) (start int, end int, typ int, err error) { p := NewParserObj(src) s, e := p.getByPath(false, path...) if e != 0 { // for compatibility with old version if e == types.ERR_NOT_FOUND { return -1, -1, 0, ErrNotExist } if e == types.ERR_UNSUPPORT_TYPE { panic("path must be either int(>=0) or string") } return -1, -1, 0, p.syntaxError(e) } t := switchRawType(p.s[s]) if t == _V_NONE { return -1, -1, 0, ErrNotExist } if t == _V_NUMBER { p.p = 1 + backward(p.s, p.p-1) } return s, p.p, int(t), nil } // ValidSyntax check if a json has a valid JSON syntax, // while not validate UTF-8 charset func _ValidSyntax(json string) bool { p := NewParserObj(json) _, e := p.skip() if e != 0 { return false } if skipBlank(p.s, p.p) != -int(types.ERR_EOF) { return false } return true } // SkipFast skip a json value in fast-skip algs, // while not strictly validate JSON syntax and UTF-8 charset. func _SkipFast(src string, i int) (int, int, error) { p := NewParserObj(src) p.p = i s, e := p.skipFast() if e != 0 { return -1, -1, p.ExportError(e) } t := switchRawType(p.s[s]) if t == _V_NONE { return -1, -1, ErrNotExist } if t == _V_NUMBER { p.p = 1 + backward(p.s, p.p-1) } return s, p.p, nil } ================================================ FILE: ast/search_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "encoding/json" "math" "runtime" "strconv" "strings" "sync" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestGC_Search(t *testing.T) { if debugSyncGC { return } _, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0, "id") if err != nil { t.Fatal(err) } wg := &sync.WaitGroup{} // A limitation of the race detecting is 8128. // See https://github.com/golang/go/issues/43898 N := 5000 for i := 0; i < N; i++ { wg.Add(1) go func(wg *sync.WaitGroup) { defer wg.Done() _, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0, "id") if err != nil { t.Error(err) return } runtime.GC() }(wg) } wg.Wait() } func TestNodeRace(t *testing.T) { src := `{"1":1,"2": [ 1 , 1 , { "3" : 1 , "4" : [] } ] }` s := NewSearcher(src) s.ConcurrentRead = true node, _ := s.GetByPath() cases := []struct { path []interface{} exp []string scalar bool lv int }{ {[]interface{}{"1"}, []string{`1`}, true, 0}, {[]interface{}{"2"}, []string{`[ 1 , 1 , { "3" : 1 , "4" : [] } ]`, `[1,1,{ "3" : 1 , "4" : [] }]`, `[1,1,{"3":1,"4":[]}]`}, false, 3}, {[]interface{}{"2", 1}, []string{`1`}, true, 1}, {[]interface{}{"2", 2}, []string{`{ "3" : 1 , "4" : [] }`, `{"3":1,"4":[]}`}, false, 2}, {[]interface{}{"2", 2, "3"}, []string{`1`}, true, 0}, {[]interface{}{"2", 2, "4"}, []string{`[]`}, false, 0}, } wg := sync.WaitGroup{} start := sync.RWMutex{} start.Lock() P := 100 for i := range cases { // println(i) c := cases[i] for j := 0; j < P; j++ { wg.Add(1) go func() { defer wg.Done() start.RLock() n := node.GetByPath(c.path...) _ = n.TypeSafe() _ = n.isAny() v, err := n.Raw() iv, _ := n.Int64() lv, _ := n.Len() _, e := n.Interface() e2 := n.SortKeys(false) require.NoError(t, err) require.NoError(t, e) require.NoError(t, e2) if c.scalar { require.Equal(t, int64(1), iv) } else { require.Equal(t, c.lv, lv) } eq := false for _, exp := range c.exp { if exp == v { eq = true break } } require.True(t, eq) }() } } start.Unlock() wg.Wait() } func TestExportErrorInvalidChar(t *testing.T) { data := `{"a":]` p := NewSearcher(data) _, err := p.GetByPath("a") if err == nil { t.Fatal() } if strings.Index(err.Error(), `"Syntax error at `) != 0 { t.Fatal(err) } data = `:"b"]` p = NewSearcher(data) _, err = p.GetByPath("a") if err == nil { t.Fatal() } if err.Error() != `"Syntax error at index 0: invalid char\n\n\t:\"b\"]\n\t^....\n"` { t.Fatal(err) } data = `{:"b"]` p = NewSearcher(data) _, err = p.GetByPath("a") if err == nil { t.Fatal() } if err.Error() != `"Syntax error at index 1: invalid char\n\n\t{:\"b\"]\n\t.^....\n"` { t.Fatal(err) } data = `{` p = NewSearcher(data) _, err = p.GetByPath("he") if err == nil { t.Fatal() } if err == ErrNotExist { t.Fatal(err) } data = `[` p = NewSearcher(data) _, err = p.GetByPath(0) if err == nil { t.Fatal() } if err == ErrNotExist { t.Fatal(err) } } type testExportError struct { data string path []interface{} err error } func TestExportErrNotExist(t *testing.T) { tests := []testExportError{ // object {`{}`, []interface{}{"b"}, ErrNotExist}, {` { } `, []interface{}{"b"}, ErrNotExist}, {`{"a":null}`, []interface{}{"b"}, ErrNotExist}, // This should be invalid char errors. // {`{"a":null}`, []interface{}{"a", "b"}, ErrNotExist}, // {`{"a":null}`, []interface{}{"a", 0}, ErrNotExist}, // {`{"a":null}`, []interface{}{"a", "b", 0}, ErrNotExist}, {`{"":{"b":123}}`, []interface{}{"b"}, ErrNotExist}, {`{"":{"b":123}}`, []interface{}{"", ""}, ErrNotExist}, {`{"a":{"b":123}}`, []interface{}{"b"}, ErrNotExist}, {`{"a":{"b":123}}`, []interface{}{"a", "c"}, ErrNotExist}, {`{"a":{"c": null, "b":{}}}`, []interface{}{"a", "b", "c"}, ErrNotExist}, {`{"a":{"b":123}}`, []interface{}{"b", "b"}, ErrNotExist}, {`{"\"\\":{"b":123}}`, []interface{}{"\"", "b"}, ErrNotExist}, {`{"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"\\":{"b":123}}`, []interface{}{"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"", "b"}, ErrNotExist}, // array {`[]`, []interface{}{0}, ErrNotExist}, {`[]`, []interface{}{1}, ErrNotExist}, {` [ ] `, []interface{}{0}, ErrNotExist}, {`[null]`, []interface{}{1}, ErrNotExist}, {`[null, ["null", 123]]`, []interface{}{2}, ErrNotExist}, {`[null, true , false, 14, 2.35, -46, "hello7", "\"8"]`, []interface{}{8}, ErrNotExist}, {`[{}]`, []interface{}{1}, ErrNotExist}, {`[[]]`, []interface{}{1}, ErrNotExist}, {`[[],[{},{}, []],{}]`, []interface{}{3}, ErrNotExist}, } for _, test := range tests { f := func(t *testing.T) { p := NewSearcher(test.data) node, err := p.GetByPath(test.path...) if err != test.err || node.Exists() { t.Fatal(err) } } t.Run(test.data, f) } } func TestSearcher_GetByPath(t *testing.T) { s := NewSearcher(` { "xx" : [] ,"yy" :{ }, "test" : [ true , 0.1 , "abc", ["h"], {"a":"bc"} ] } `) node, e := s.GetByPath("test", 0) a, _ := node.Bool() if e != nil || a != true { t.Fatalf("node: %v, err: %v", node, e) } node, e = s.GetByPath("test", 1) b, _ := node.Float64() if e != nil || b != 0.1 { t.Fatalf("node: %v, err: %v", node, e) } node, e = s.GetByPath("test", 2) c, _ := node.String() if e != nil || c != "abc" { t.Fatalf("node: %v, err: %v", node, e) } node, e = s.GetByPath("test", 3) arr, _ := node.Array() if e != nil || arr[0] != "h" { t.Fatalf("node: %v, err: %v", node, e) } node, e = s.GetByPath("test", 4, "a") d, _ := node.String() if e != nil || d != "bc" { t.Fatalf("node: %v, err: %v", node, e) } } func TestSearch_LoadRawNumber(t *testing.T) { s := NewSearcher(`[ 1, 2.34 ]`) node, err := s.getByPath(1) require.NoError(t, err) raw, err := node.Raw() require.NoError(t, err) require.Equal(t, raw, "2.34") node, err = s.getByPath() require.NoError(t, err) elem := node.Index(1) // FIXME: raw is `2.34` in aarch64 // raw, err = elem.Raw() // require.NoError(t, err) // require.Equal(t, raw, "2.34\n\t") num, err := elem.Number() require.NoError(t, err) require.Equal(t, num, json.Number("2.34")) } type testGetByPath struct { json string path []interface{} value interface{} ok bool } func TestSearcher_GetByPathSingle(t *testing.T) { type Path = []interface{} const Ok = true const Error = false tests := []testGetByPath{ {`true`, Path{}, true, Ok}, {`false`, Path{}, false, Ok}, {`null`, Path{}, nil, Ok}, {`12345`, Path{}, 12345.0, Ok}, {`12345.6789`, Path{}, 12345.6789, Ok}, {`"abc"`, Path{}, "abc", Ok}, {`"a\"\\bc"`, Path{}, "a\"\\bc", Ok}, {`{"a":1}`, Path{"a"}, 1.0, Ok}, {`{"":1}`, Path{""}, 1.0, Ok}, {`{"":{"":1}}`, Path{"", ""}, 1.0, Ok}, {`[1,2,3]`, Path{0}, 1.0, Ok}, {`[1,2,3]`, Path{1}, 2.0, Ok}, {`[1,2,3]`, Path{2}, 3.0, Ok}, {`tru`, Path{}, nil, Error}, {`fal`, Path{}, nil, Error}, {`nul`, Path{}, nil, Error}, {`{"a":1`, Path{}, nil, Error}, {`x12345.6789`, Path{}, nil, Error}, {`"abc`, Path{}, nil, Error}, {`"a\"\\bc`, Path{}, nil, Error}, {`"a\"\`, Path{}, nil, Error}, {`{"a":`, Path{"a"}, nil, Error}, {`[1,2,3]`, Path{4}, nil, Error}, {`[1,2,3]`, Path{"a"}, nil, Error}, } for _, test := range tests { t.Run(test.json, func(t *testing.T) { s := NewSearcher(test.json) node, err1 := s.GetByPath(test.path...) assert.Equal(t, test.ok, err1 == nil) value, err2 := node.Interface() assert.Equal(t, test.value, value) assert.Equal(t, test.ok, err2 == nil) }) } } func TestSearcher_GetByPathErr(t *testing.T) { s := NewSearcher(` { "xx" : [] ,"yy" :{ }, "test" : [ true , 0.1 , "abc", ["h"], {"a":"bc"} ], "err1":[a, ] , "err2":{ ,"x":"xx"} } `) node, e := s.GetByPath("zz") if e == nil { t.Fatalf("node: %v, err: %v", node, e) } s.parser.p = 0 node, e = s.GetByPath("xx", 4) if e == nil { t.Fatalf("node: %v, err: %v", node, e) } s.parser.p = 0 node, e = s.GetByPath("yy", "a") if e == nil { t.Fatalf("node: %v, err: %v", node, e) } s.parser.p = 0 node, e = s.GetByPath("test", 2, "x") if e == nil { t.Fatalf("node: %v, err: %v", node, e) } s.parser.p = 0 node, e = s.GetByPath("err1", 0) if e == nil { t.Fatalf("node: %v, err: %v", node, e) } s.parser.p = 0 node, e = s.GetByPath("err2", "x") if e == nil { t.Fatalf("node: %v, err: %v", node, e) } } func TestLoadIndex(t *testing.T) { node, err := NewSearcher(`{"a":[-0, 1, -1.2, -1.2e-10]}`).GetByPath("a") if err != nil { t.Fatal(err) } a, _ := node.Index(3).Float64() assert.Equal(t, -1.2e-10, a) m, _ := node.Array() assert.Equal(t, m, []interface{}{ float64(0), float64(1), -1.2, -1.2e-10, }) } func TestSearchNotExist(t *testing.T) { s := NewSearcher(` { "xx" : [ 0, "" ] ,"yy" :{ "2": "" } } `) node, e := s.GetByPath("xx", 2) if node.Exists() { t.Fatalf("node: %v, err: %v", node, e) } node, e = s.GetByPath("xx", 1) if e != nil || !node.Exists() { t.Fatalf("node: %v, err: %v", node, e) } node, e = s.GetByPath("yy", "3") if node.Exists() { t.Fatalf("node: %v, err: %v", node, e) } node, e = s.GetByPath("yy", "2") if e != nil || !node.Exists() { t.Fatalf("node: %v, err: %v", node, e) } } func BenchmarkGetOne_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) ast := NewSearcher(_TwitterJson) for i := 0; i < b.N; i++ { node, err := ast.GetByPath("statuses", 3, "id") if err != nil { b.Fatal(err) } x, _ := node.Int64() if x != 249279667666817024 { b.Fatal(node.Interface()) } } } func BenchmarkGetOneSafe_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) ast := NewSearcher(_TwitterJson) ast.ConcurrentRead = true for i := 0; i < b.N; i++ { node, err := ast.GetByPath("statuses", 3, "id") if err != nil { b.Fatal(err) } x, _ := node.Int64() if x != 249279667666817024 { b.Fatal(node.Interface()) } } } func BenchmarkGetFull_Sonic(b *testing.B) { ast := NewSearcher(_TwitterJson) b.SetBytes(int64(len(_TwitterJson))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { node, err := ast.GetByPath() if err != nil || node.Type() != V_OBJECT { b.Fatal(err) } } } func BenchmarkGetWithManyCompare_Sonic(b *testing.B) { b.SetBytes(int64(len(_LotsCompare))) ast := NewSearcher(_LotsCompare) for i := 0; i < b.N; i++ { node, err := ast.GetByPath("is") if err != nil { b.Fatal(err) } x, _ := node.Int64() if x != 1 { b.Fatal(node.Interface()) } } } func BenchmarkGetOne_Parallel_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.RunParallel(func(pb *testing.PB) { ast := NewSearcher(_TwitterJson) for pb.Next() { node, err := ast.GetByPath("statuses", 3, "id") if err != nil { b.Fatal(err) } x, _ := node.Int64() if x != 249279667666817024 { b.Fatal(node.Interface()) } } }) } func BenchmarkGetOneSafe_Parallel_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.RunParallel(func(pb *testing.PB) { ast := NewSearcher(_TwitterJson) ast.ConcurrentRead = true for pb.Next() { node, err := ast.GetByPath("statuses", 3, "id") if err != nil { b.Fatal(err) } x, _ := node.Int64() if x != 249279667666817024 { b.Fatal(node.Interface()) } } }) } func BenchmarkSetOne_Sonic(b *testing.B) { node, err := NewSearcher(_TwitterJson).GetByPath("statuses", 3) if err != nil { b.Fatal(err) } n := NewNumber(strconv.Itoa(math.MaxInt32)) _, err = node.Set("id", n) if err != nil { b.Fatal(err) } b.SetBytes(int64(len(_TwitterJson))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { node, _ := NewSearcher(_TwitterJson).GetByPath("statuses", 3) _, _ = node.Set("id", n) } } func BenchmarkSetOne_Parallel_Sonic(b *testing.B) { node, err := NewSearcher(_TwitterJson).GetByPath("statuses", 3) if err != nil { b.Fatal(err) } n := NewNumber(strconv.Itoa(math.MaxInt32)) _, err = node.Set("id", n) if err != nil { b.Fatal(err) } b.SetBytes(int64(len(_TwitterJson))) b.ReportAllocs() b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { node, _ := NewSearcher(_TwitterJson).GetByPath("statuses", 3) _, _ = node.Set("id", n) } }) } ================================================ FILE: ast/stubs.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "unsafe" "github.com/bytedance/sonic/internal/rt" ) //go:nosplit func mem2ptr(s []byte) unsafe.Pointer { return (*rt.GoSlice)(unsafe.Pointer(&s)).Ptr } ================================================ FILE: ast/testdata_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast const _TwitterJson = `{ "statuses": [ { "coordinates": null, "favorited": false, "truncated": false, "created_at": "Mon Sep 24 03:35:21 +0000 2012", "id_str": "250075927172759552", "entities": { "urls": [ ], "hashtags": [ { "text": "freebandnames", "indices": [ 20, 34 ] } ], "user_mentions": [ ] }, "in_reply_to_user_id_str": null, "contributors": null, "text": "Aggressive Ponytail #freebandnames", "metadata": { "iso_language_code": "en", "result_type": "recent" }, "retweet_count": 0, "in_reply_to_status_id_str": null, "id": 250075927172759552, "geo": null, "retweeted": false, "in_reply_to_user_id": null, "place": null, "user": { "profile_sidebar_fill_color": "DDEEF6", "profile_sidebar_border_color": "C0DEED", "profile_background_tile": false, "name": "Sean Cummings", "profile_image_url": "http://a0.twimg.com/profile_images/2359746665/1v6zfgqo8g0d3mk7ii5s_normal.jpeg", "created_at": "Mon Apr 26 06:01:55 +0000 2010", "location": "LA, CA", "follow_request_sent": null, "profile_link_color": "0084B4", "is_translator": false, "id_str": "137238150", "entities": { "url": { "urls": [ { "expanded_url": null, "url": "", "indices": [ 0, 0 ] } ] }, "description": { "urls": [ ] } }, "default_profile": true, "contributors_enabled": false, "favourites_count": 0, "url": null, "profile_image_url_https": "https://si0.twimg.com/profile_images/2359746665/1v6zfgqo8g0d3mk7ii5s_normal.jpeg", "utc_offset": -28800, "id": 137238150, "profile_use_background_image": true, "listed_count": 2, "profile_text_color": "333333", "lang": "en", "followers_count": 70, "protected": false, "notifications": null, "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme1/bg.png", "profile_background_color": "C0DEED", "verified": false, "geo_enabled": true, "time_zone": "Pacific Time (US & Canada)", "description": "Born 330 Live 310", "default_profile_image": false, "profile_background_image_url": "http://a0.twimg.com/images/themes/theme1/bg.png", "statuses_count": 579, "friends_count": 110, "following": null, "show_all_inline_media": false, "screen_name": "sean_cummings" }, "in_reply_to_screen_name": null, "source": "Twitter for Mac", "in_reply_to_status_id": null }, { "coordinates": null, "favorited": false, "truncated": false, "created_at": "Fri Sep 21 23:40:54 +0000 2012", "id_str": "249292149810667520", "entities": { "urls": [ ], "hashtags": [ { "text": "FreeBandNames", "indices": [ 20, 34 ] } ], "user_mentions": [ ] }, "in_reply_to_user_id_str": null, "contributors": null, "text": "Thee Namaste Nerdz. #FreeBandNames", "metadata": { "iso_language_code": "pl", "result_type": "recent" }, "retweet_count": 0, "in_reply_to_status_id_str": null, "id": 249292149810667520, "geo": null, "retweeted": false, "in_reply_to_user_id": null, "place": null, "user": { "profile_sidebar_fill_color": "DDFFCC", "profile_sidebar_border_color": "BDDCAD", "profile_background_tile": true, "name": "Chaz Martenstein", "profile_image_url": "http://a0.twimg.com/profile_images/447958234/Lichtenstein_normal.jpg", "created_at": "Tue Apr 07 19:05:07 +0000 2009", "location": "Durham, NC", "follow_request_sent": null, "profile_link_color": "0084B4", "is_translator": false, "id_str": "29516238", "entities": { "url": { "urls": [ { "expanded_url": null, "url": "http://bullcityrecords.com/wnng/", "indices": [ 0, 32 ] } ] }, "description": { "urls": [ ] } }, "default_profile": false, "contributors_enabled": false, "favourites_count": 8, "url": "http://bullcityrecords.com/wnng/", "profile_image_url_https": "https://si0.twimg.com/profile_images/447958234/Lichtenstein_normal.jpg", "utc_offset": -18000, "id": 29516238, "profile_use_background_image": true, "listed_count": 118, "profile_text_color": "333333", "lang": "en", "followers_count": 2052, "protected": false, "notifications": null, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/9423277/background_tile.bmp", "profile_background_color": "9AE4E8", "verified": false, "geo_enabled": false, "time_zone": "Eastern Time (US & Canada)", "description": "You will come to Durham, North Carolina. I will sell you some records then, here in Durham, North Carolina. Fun will happen.", "default_profile_image": false, "profile_background_image_url": "http://a0.twimg.com/profile_background_images/9423277/background_tile.bmp", "statuses_count": 7579, "friends_count": 348, "following": null, "show_all_inline_media": true, "screen_name": "bullcityrecords" }, "in_reply_to_screen_name": null, "source": "web", "in_reply_to_status_id": null }, { "coordinates": null, "favorited": false, "truncated": false, "created_at": "Fri Sep 21 23:30:20 +0000 2012", "id_str": "249289491129438208", "entities": { "urls": [ ], "hashtags": [ { "text": "freebandnames", "indices": [ 29, 43 ] } ], "user_mentions": [ ] }, "in_reply_to_user_id_str": null, "contributors": null, "text": "Mexican Heaven, Mexican Hell #freebandnames", "metadata": { "iso_language_code": "en", "result_type": "recent" }, "retweet_count": 0, "in_reply_to_status_id_str": null, "id": 249289491129438208, "geo": null, "retweeted": false, "in_reply_to_user_id": null, "place": null, "user": { "profile_sidebar_fill_color": "99CC33", "profile_sidebar_border_color": "829D5E", "profile_background_tile": false, "name": "Thomas John Wakeman", "profile_image_url": "http://a0.twimg.com/profile_images/2219333930/Froggystyle_normal.png", "created_at": "Tue Sep 01 21:21:35 +0000 2009", "location": "Kingston New York", "follow_request_sent": null, "profile_link_color": "D02B55", "is_translator": false, "id_str": "70789458", "entities": { "url": { "urls": [ { "expanded_url": null, "url": "", "indices": [ 0, 0 ] } ] }, "description": { "urls": [ ] } }, "default_profile": false, "contributors_enabled": false, "favourites_count": 19, "url": null, "profile_image_url_https": "https://si0.twimg.com/profile_images/2219333930/Froggystyle_normal.png", "utc_offset": -18000, "id": 70789458, "profile_use_background_image": true, "listed_count": 1, "profile_text_color": "3E4415", "lang": "en", "followers_count": 63, "protected": false, "notifications": null, "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme5/bg.gif", "profile_background_color": "352726", "verified": false, "geo_enabled": false, "time_zone": "Eastern Time (US & Canada)", "description": "Science Fiction Writer, sort of. Likes Superheroes, Mole People, Alt. Timelines.", "default_profile_image": false, "profile_background_image_url": "http://a0.twimg.com/images/themes/theme5/bg.gif", "statuses_count": 1048, "friends_count": 63, "following": null, "show_all_inline_media": false, "screen_name": "MonkiesFist" }, "in_reply_to_screen_name": null, "source": "web", "in_reply_to_status_id": null }, { "coordinates": null, "favorited": false, "truncated": false, "created_at": "Fri Sep 21 22:51:18 +0000 2012", "id_str": "249279667666817024", "entities": { "urls": [ ], "hashtags": [ { "text": "freebandnames", "indices": [ 20, 34 ] } ], "user_mentions": [ ] }, "in_reply_to_user_id_str": null, "contributors": null, "text": "The Foolish Mortals #freebandnames", "metadata": { "iso_language_code": "en", "result_type": "recent" }, "retweet_count": 0, "in_reply_to_status_id_str": null, "id": 249279667666817024, "geo": null, "retweeted": false, "in_reply_to_user_id": null, "place": null, "user": { "profile_sidebar_fill_color": "BFAC83", "profile_sidebar_border_color": "615A44", "profile_background_tile": true, "name": "Marty Elmer", "profile_image_url": "http://a0.twimg.com/profile_images/1629790393/shrinker_2000_trans_normal.png", "created_at": "Mon May 04 00:05:00 +0000 2009", "location": "Wisconsin, USA", "follow_request_sent": null, "profile_link_color": "3B2A26", "is_translator": false, "id_str": "37539828", "entities": { "url": { "urls": [ { "expanded_url": null, "url": "http://www.omnitarian.me", "indices": [ 0, 24 ] } ] }, "description": { "urls": [ ] } }, "default_profile": false, "contributors_enabled": false, "favourites_count": 647, "url": "http://www.omnitarian.me", "profile_image_url_https": "https://si0.twimg.com/profile_images/1629790393/shrinker_2000_trans_normal.png", "utc_offset": -21600, "id": 37539828, "profile_use_background_image": true, "listed_count": 52, "profile_text_color": "000000", "lang": "en", "followers_count": 608, "protected": false, "notifications": null, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/106455659/rect6056-9.png", "profile_background_color": "EEE3C4", "verified": false, "geo_enabled": false, "time_zone": "Central Time (US & Canada)", "description": "Cartoonist, Illustrator, and T-Shirt connoisseur", "default_profile_image": false, "profile_background_image_url": "http://a0.twimg.com/profile_background_images/106455659/rect6056-9.png", "statuses_count": 3575, "friends_count": 249, "following": null, "show_all_inline_media": true, "screen_name": "Omnitarian" }, "in_reply_to_screen_name": null, "source": "Twitter for iPhone", "in_reply_to_status_id": null } ], "search_metadata": { "max_id": 250126199840518145, "since_id": 24012619984051000, "refresh_url": "?since_id=250126199840518145&q=%23freebandnames&result_type=mixed&include_entities=1", "next_results": "?max_id=249279667666817023&q=%23freebandnames&count=4&include_entities=1&result_type=mixed", "count": 4, "completed_in": 0.035, "since_id_str": "24012619984051000", "query": "%23freebandnames", "max_id_str": "250126199840518145" } }` const _LotsCompare = `{"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"hi":0,"is":1}` type _TwitterStruct struct { Statuses []struct { Coordinates interface{} `json:"coordinates"` Favorited bool `json:"favorited"` Truncated bool `json:"truncated"` CreatedAt string `json:"created_at"` IDStr string `json:"id_str"` Entities struct { Urls []interface{} `json:"urls"` Hashtags []struct { Text string `json:"text"` Indices []int `json:"indices"` } `json:"hashtags"` UserMentions []interface{} `json:"user_mentions"` } `json:"entities"` InReplyToUserIDStr interface{} `json:"in_reply_to_user_id_str"` Contributors interface{} `json:"contributors"` Text string `json:"text"` Metadata struct { IsoLanguageCode string `json:"iso_language_code"` ResultType string `json:"result_type"` } `json:"metadata"` RetweetCount int `json:"retweet_count"` InReplyToStatusIDStr interface{} `json:"in_reply_to_status_id_str"` ID int64 `json:"id"` Geo interface{} `json:"geo"` Retweeted bool `json:"retweeted"` InReplyToUserID interface{} `json:"in_reply_to_user_id"` Place interface{} `json:"place"` User struct { ProfileSidebarFillColor string `json:"profile_sidebar_fill_color"` ProfileSidebarBorderColor string `json:"profile_sidebar_border_color"` ProfileBackgroundTile bool `json:"profile_background_tile"` Name string `json:"name"` ProfileImageURL string `json:"profile_image_url"` CreatedAt string `json:"created_at"` Location string `json:"location"` FollowRequestSent interface{} `json:"follow_request_sent"` ProfileLinkColor string `json:"profile_link_color"` IsTranslator bool `json:"is_translator"` IDStr string `json:"id_str"` Entities struct { URL struct { Urls []struct { ExpandedURL interface{} `json:"expanded_url"` URL string `json:"url"` Indices []int `json:"indices"` } `json:"urls"` } `json:"url"` Description struct { Urls []interface{} `json:"urls"` } `json:"description"` } `json:"entities"` DefaultProfile bool `json:"default_profile"` ContributorsEnabled bool `json:"contributors_enabled"` FavouritesCount int `json:"favourites_count"` URL interface{} `json:"url"` ProfileImageURLHTTPS string `json:"profile_image_url_https"` UtcOffset int `json:"utc_offset"` ID int `json:"id"` ProfileUseBackgroundImage bool `json:"profile_use_background_image"` ListedCount int `json:"listed_count"` ProfileTextColor string `json:"profile_text_color"` Lang string `json:"lang"` FollowersCount int `json:"followers_count"` Protected bool `json:"protected"` Notifications interface{} `json:"notifications"` ProfileBackgroundImageURLHTTPS string `json:"profile_background_image_url_https"` ProfileBackgroundColor string `json:"profile_background_color"` Verified bool `json:"verified"` GeoEnabled bool `json:"geo_enabled"` TimeZone string `json:"time_zone"` Description string `json:"description"` DefaultProfileImage bool `json:"default_profile_image"` ProfileBackgroundImageURL string `json:"profile_background_image_url"` StatusesCount int `json:"statuses_count"` FriendsCount int `json:"friends_count"` Following interface{} `json:"following"` ShowAllInlineMedia bool `json:"show_all_inline_media"` ScreenName string `json:"screen_name"` } `json:"user"` InReplyToScreenName interface{} `json:"in_reply_to_screen_name"` Source string `json:"source"` InReplyToStatusID interface{} `json:"in_reply_to_status_id"` } `json:"statuses"` SearchMetadata struct { MaxID int64 `json:"max_id"` SinceID int64 `json:"since_id"` RefreshURL string `json:"refresh_url"` NextResults string `json:"next_results"` Count int `json:"count"` CompletedIn float64 `json:"completed_in"` SinceIDStr string `json:"since_id_str"` Query string `json:"query"` MaxIDStr string `json:"max_id_str"` } `json:"search_metadata"` } ================================================ FILE: ast/visitor.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "encoding/json" "errors" "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/unquote" ) // Visitor handles the callbacks during preorder traversal of a JSON AST. // // According to the JSON RFC8259, a JSON AST can be defined by // the following rules without separator / whitespace tokens. // // JSON-AST = value // value = false / null / true / object / array / number / string // object = begin-object [ member *( member ) ] end-object // member = string value // array = begin-array [ value *( value ) ] end-array type Visitor interface { // OnNull handles a JSON null value. OnNull() error // OnBool handles a JSON true / false value. OnBool(v bool) error // OnString handles a JSON string value. OnString(v string) error // OnInt64 handles a JSON number value with int64 type. OnInt64(v int64, n json.Number) error // OnFloat64 handles a JSON number value with float64 type. OnFloat64(v float64, n json.Number) error // OnObjectBegin handles the beginning of a JSON object value with a // suggested capacity that can be used to make your custom object container. // // After this point the visitor will receive a sequence of callbacks like // [string, value, string, value, ......, ObjectEnd]. // // Note: // 1. This is a recursive definition which means the value can // also be a JSON object / array described by a sequence of callbacks. // 2. The suggested capacity will be 0 if current object is empty. // 3. Currently sonic use a fixed capacity for non-empty object (keep in // sync with ast.Node) which might not be very suitable. This may be // improved in future version. OnObjectBegin(capacity int) error // OnObjectKey handles a JSON object key string in member. OnObjectKey(key string) error // OnObjectEnd handles the ending of a JSON object value. OnObjectEnd() error // OnArrayBegin handles the beginning of a JSON array value with a // suggested capacity that can be used to make your custom array container. // // After this point the visitor will receive a sequence of callbacks like // [value, value, value, ......, ArrayEnd]. // // Note: // 1. This is a recursive definition which means the value can // also be a JSON object / array described by a sequence of callbacks. // 2. The suggested capacity will be 0 if current array is empty. // 3. Currently sonic use a fixed capacity for non-empty array (keep in // sync with ast.Node) which might not be very suitable. This may be // improved in future version. OnArrayBegin(capacity int) error // OnArrayEnd handles the ending of a JSON array value. OnArrayEnd() error } // VisitorOptions contains all Visitor's options. The default value is an // empty VisitorOptions{}. type VisitorOptions struct { // OnlyNumber indicates parser to directly return number value without // conversion, then the first argument of OnInt64 / OnFloat64 will always // be zero. OnlyNumber bool } var defaultVisitorOptions = &VisitorOptions{} // Preorder decodes the whole JSON string and callbacks each AST node to visitor // during preorder traversal. Any visitor method with an error returned will // break the traversal and the given error will be directly returned. The opts // argument can be reused after every call. func Preorder(str string, visitor Visitor, opts *VisitorOptions) error { if opts == nil { opts = defaultVisitorOptions } // process VisitorOptions first to guarantee that all options will be // constant during decoding and make options more readable. var ( optDecodeNumber = !opts.OnlyNumber ) tv := &traverser{ parser: Parser{ s: str, noLazy: true, skipValue: false, }, visitor: visitor, } if optDecodeNumber { tv.parser.decodeNumber(true) } err := tv.decodeValue() if optDecodeNumber { tv.parser.decodeNumber(false) } return err } type traverser struct { parser Parser visitor Visitor } // NOTE: keep in sync with (*Parser).Parse method. func (self *traverser) decodeValue() error { switch val := self.parser.decodeValue(); val.Vt { case types.V_EOF: return types.ERR_EOF case types.V_NULL: return self.visitor.OnNull() case types.V_TRUE: return self.visitor.OnBool(true) case types.V_FALSE: return self.visitor.OnBool(false) case types.V_STRING: return self.decodeString(val.Iv, val.Ep) case types.V_DOUBLE: return self.visitor.OnFloat64(val.Dv, json.Number(self.parser.s[val.Ep:self.parser.p])) case types.V_INTEGER: return self.visitor.OnInt64(val.Iv, json.Number(self.parser.s[val.Ep:self.parser.p])) case types.V_ARRAY: return self.decodeArray() case types.V_OBJECT: return self.decodeObject() default: return types.ParsingError(-val.Vt) } } // NOTE: keep in sync with (*Parser).decodeArray method. func (self *traverser) decodeArray() error { sp := self.parser.p ns := len(self.parser.s) /* allocate array space and parse every element */ if err := self.visitor.OnArrayBegin(_DEFAULT_NODE_CAP); err != nil { if err == VisitOPSkip { // NOTICE: for user needs to skip entry object self.parser.p -= 1 if _, e := self.parser.skipFast(); e != 0 { return e } return self.visitor.OnArrayEnd() } return err } /* check for EOF */ self.parser.p = self.parser.lspace(sp) if self.parser.p >= ns { return types.ERR_EOF } /* check for empty array */ if self.parser.s[self.parser.p] == ']' { self.parser.p++ return self.visitor.OnArrayEnd() } for { /* decode the value */ if err := self.decodeValue(); err != nil { return err } self.parser.p = self.parser.lspace(self.parser.p) /* check for EOF */ if self.parser.p >= ns { return types.ERR_EOF } /* check for the next character */ switch self.parser.s[self.parser.p] { case ',': self.parser.p++ case ']': self.parser.p++ return self.visitor.OnArrayEnd() default: return types.ERR_INVALID_CHAR } } } // NOTE: keep in sync with (*Parser).decodeObject method. func (self *traverser) decodeObject() error { sp := self.parser.p ns := len(self.parser.s) /* allocate object space and decode each pair */ if err := self.visitor.OnObjectBegin(_DEFAULT_NODE_CAP); err != nil { if err == VisitOPSkip { // NOTICE: for user needs to skip entry object self.parser.p -= 1 if _, e := self.parser.skipFast(); e != 0 { return e } return self.visitor.OnObjectEnd() } return err } /* check for EOF */ self.parser.p = self.parser.lspace(sp) if self.parser.p >= ns { return types.ERR_EOF } /* check for empty object */ if self.parser.s[self.parser.p] == '}' { self.parser.p++ return self.visitor.OnObjectEnd() } for { var njs types.JsonState var err types.ParsingError /* decode the key */ if njs = self.parser.decodeValue(); njs.Vt != types.V_STRING { return types.ERR_INVALID_CHAR } /* extract the key */ idx := self.parser.p - 1 key := self.parser.s[njs.Iv:idx] /* check for escape sequence */ if njs.Ep != -1 { if key, err = unquote.String(key); err != 0 { return err } } if err := self.visitor.OnObjectKey(key); err != nil { return err } /* expect a ':' delimiter */ if err = self.parser.delim(); err != 0 { return err } /* decode the value */ if err := self.decodeValue(); err != nil { return err } self.parser.p = self.parser.lspace(self.parser.p) /* check for EOF */ if self.parser.p >= ns { return types.ERR_EOF } /* check for the next character */ switch self.parser.s[self.parser.p] { case ',': self.parser.p++ case '}': self.parser.p++ return self.visitor.OnObjectEnd() default: return types.ERR_INVALID_CHAR } } } // NOTE: keep in sync with (*Parser).decodeString method. func (self *traverser) decodeString(iv int64, ep int) error { p := self.parser.p - 1 s := self.parser.s[iv:p] /* fast path: no escape sequence */ if ep == -1 { return self.visitor.OnString(s) } /* unquote the string */ out, err := unquote.String(s) if err != 0 { return err } return self.visitor.OnString(out) } // If visitor return this error on `OnObjectBegin()` or `OnArrayBegin()`, // the traverser will skip entry object or array var VisitOPSkip = errors.New("") ================================================ FILE: ast/visitor_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ast import ( "bufio" "encoding/json" "fmt" "io" "os" "sort" "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) var visitorTestCases = []struct { name string jsonStr string }{ {"default", _TwitterJson}, {"issue_case01", "[1193.6419677734375]"}, {"issue653", `{"v0": 0, "m0": {}, "v1": 1, "a0": [], "v2": 2}`}, } type visitorNodeDiffTest struct { t *testing.T str string tracer io.Writer cursor Node stk visitorNodeStack sp uint8 } type visitorNodeStack = [256]struct { Node Node Object map[string]Node Array []Node ObjectKey string } func (self *visitorNodeDiffTest) incrSP() { self.t.Helper() self.sp++ require.NotZero(self.t, self.sp, "stack overflow") } func (self *visitorNodeDiffTest) debugStack() string { var buf strings.Builder buf.WriteString("[") for i := uint8(0); i < self.sp; i++ { if i != 0 { buf.WriteString(", ") } if self.stk[i].Array != nil { buf.WriteString("Array") } else if self.stk[i].Object != nil { buf.WriteString("Object") } else { fmt.Fprintf(&buf, "Key(%q)", self.stk[i].ObjectKey) } } buf.WriteString("]") return buf.String() } func (self *visitorNodeDiffTest) requireType(got int) { self.t.Helper() want := self.cursor.Type() require.EqualValues(self.t, want, got) } func (self *visitorNodeDiffTest) toArrayIndex(array Node, i int) { // set cursor to next Value if existed self.t.Helper() n, err := array.Len() require.NoError(self.t, err) if i < n { self.cursor = *array.Index(i) require.NoError(self.t, self.cursor.Check()) } } func (self *visitorNodeDiffTest) onValueEnd() { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnValueEnd: %s\n", self.debugStack()) } // cursor should point to the Value now self.t.Helper() if self.sp == 0 { if self.tracer != nil { fmt.Fprintf(self.tracer, "EOF\n\n") } return } // [..., Array, sp] if array := self.stk[self.sp-1].Array; array != nil { array = append(array, self.cursor) self.stk[self.sp-1].Array = array self.toArrayIndex(self.stk[self.sp-1].Node, len(array)) return } // [..., Object, ObjectKey, sp] require.GreaterOrEqual(self.t, self.sp, uint8(2)) require.NotNil(self.t, self.stk[self.sp-2].Object) require.Nil(self.t, self.stk[self.sp-1].Object) require.Nil(self.t, self.stk[self.sp-1].Array) self.stk[self.sp-2].Object[self.stk[self.sp-1].ObjectKey] = self.cursor self.cursor = self.stk[self.sp-2].Node // reset cursor to Object self.sp-- // pop ObjectKey } func (self *visitorNodeDiffTest) OnNull() error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnNull\n") } self.requireType(V_NULL) self.onValueEnd() return nil } func (self *visitorNodeDiffTest) OnBool(v bool) error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnBool: %t\n", v) } if v { self.requireType(V_TRUE) } else { self.requireType(V_FALSE) } self.onValueEnd() return nil } func (self *visitorNodeDiffTest) OnString(v string) error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnString: %q\n", v) } self.requireType(V_STRING) want, err := self.cursor.StrictString() require.NoError(self.t, err) require.EqualValues(self.t, want, v) self.onValueEnd() return nil } func (self *visitorNodeDiffTest) OnInt64(v int64, n json.Number) error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnInt64: %d (%q)\n", v, n) } self.requireType(V_NUMBER) want, err := self.cursor.StrictInt64() require.NoError(self.t, err) require.EqualValues(self.t, want, v) nv, err := n.Int64() require.NoError(self.t, err) require.EqualValues(self.t, want, nv) self.onValueEnd() return nil } func (self *visitorNodeDiffTest) OnFloat64(v float64, n json.Number) error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnFloat64: %f (%q)\n", v, n) } self.requireType(V_NUMBER) want, err := self.cursor.StrictFloat64() require.NoError(self.t, err) require.EqualValues(self.t, want, v) nv, err := n.Float64() require.NoError(self.t, err) require.EqualValues(self.t, want, nv) self.onValueEnd() return nil } func (self *visitorNodeDiffTest) OnObjectBegin(capacity int) error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnObjectBegin: %d\n", capacity) } self.requireType(V_OBJECT) self.stk[self.sp].Node = self.cursor self.stk[self.sp].Object = make(map[string]Node, capacity) self.incrSP() return nil } func (self *visitorNodeDiffTest) OnObjectKey(key string) error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnObjectKey: %q %s\n", key, self.debugStack()) } require.NotNil(self.t, self.stk[self.sp-1].Object) node := self.stk[self.sp-1].Node self.stk[self.sp].ObjectKey = key self.incrSP() self.cursor = *node.Get(key) // set cursor to Value require.NoError(self.t, self.cursor.Check()) return nil } func (self *visitorNodeDiffTest) OnObjectEnd() error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnObjectEnd\n") } object := self.stk[self.sp-1].Object require.NotNil(self.t, object) node := self.stk[self.sp-1].Node pairs, err := node.MapUseNode() require.NoError(self.t, err) keysGot := make([]string, 0, len(object)) for key := range object { keysGot = append(keysGot, key) } keysWant := make([]string, 0, len(pairs)) for key := range pairs { keysWant = append(keysWant, key) } sort.Strings(keysGot) sort.Strings(keysWant) require.EqualValues(self.t, keysWant, keysGot) for key, pair := range pairs { typeGot := object[key].Type() typeWant := pair.Type() require.EqualValues(self.t, typeWant, typeGot) } // pop Object self.sp-- self.stk[self.sp].Node = Node{} self.stk[self.sp].Object = nil self.cursor = node // set cursor to this Object self.onValueEnd() return nil } func (self *visitorNodeDiffTest) OnArrayBegin(capacity int) error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnArrayBegin: %d\n", capacity) } self.requireType(V_ARRAY) self.stk[self.sp].Node = self.cursor self.stk[self.sp].Array = make([]Node, 0, capacity) self.incrSP() self.toArrayIndex(self.stk[self.sp-1].Node, 0) return nil } func (self *visitorNodeDiffTest) OnArrayEnd() error { if self.tracer != nil { fmt.Fprintf(self.tracer, "OnArrayEnd\n") } array := self.stk[self.sp-1].Array require.NotNil(self.t, array) node := self.stk[self.sp-1].Node values, err := node.ArrayUseNode() require.NoError(self.t, err) require.EqualValues(self.t, len(values), len(array)) for i, n := 0, len(values); i < n; i++ { typeGot := array[i].Type() typeWant := values[i].Type() require.EqualValues(self.t, typeWant, typeGot) } // pop Array self.sp-- self.stk[self.sp].Node = Node{} self.stk[self.sp].Array = nil self.cursor = node // set cursor to this Array self.onValueEnd() return nil } func (self *visitorNodeDiffTest) Run(t *testing.T, str string, tracer io.Writer) { self.t = t self.str = str self.tracer = tracer self.t.Helper() self.cursor = NewRaw(self.str) require.NoError(self.t, self.cursor.LoadAll()) self.stk = visitorNodeStack{} self.sp = 0 require.NoError(self.t, Preorder(self.str, self, nil)) } func TestVisitor_NodeDiff(t *testing.T) { var suite visitorNodeDiffTest newTracer := func(t *testing.T) io.Writer { const EnableTracer = false if !EnableTracer { return nil } basename := strings.ReplaceAll(t.Name(), "/", "_") fp, err := os.Create(fmt.Sprintf("../output/%s.log", basename)) require.NoError(t, err) writer := bufio.NewWriter(fp) t.Cleanup(func() { _ = writer.Flush() _ = fp.Close() }) return writer } for _, c := range visitorTestCases { t.Run(c.name, func(t *testing.T) { suite.Run(t, c.jsonStr, newTracer(t)) }) } } type visitorUserNode interface { UserNode() } type ( visitorUserNull struct{} visitorUserBool struct{ Value bool } visitorUserInt64 struct{ Value int64 } visitorUserFloat64 struct{ Value float64 } visitorUserString struct{ Value string } visitorUserObject struct{ Value map[string]visitorUserNode } visitorUserArray struct{ Value []visitorUserNode } ) func (*visitorUserNull) UserNode() {} func (*visitorUserBool) UserNode() {} func (*visitorUserInt64) UserNode() {} func (*visitorUserFloat64) UserNode() {} func (*visitorUserString) UserNode() {} func (*visitorUserObject) UserNode() {} func (*visitorUserArray) UserNode() {} func compareUserNode(tb testing.TB, lhs, rhs visitorUserNode) bool { switch lhs := lhs.(type) { case *visitorUserNull: _, ok := rhs.(*visitorUserNull) return assert.True(tb, ok) case *visitorUserBool: rhs, ok := rhs.(*visitorUserBool) return assert.True(tb, ok) && assert.Equal(tb, lhs.Value, rhs.Value) case *visitorUserInt64: rhs, ok := rhs.(*visitorUserInt64) return assert.True(tb, ok) && assert.Equal(tb, lhs.Value, rhs.Value) case *visitorUserFloat64: rhs, ok := rhs.(*visitorUserFloat64) return assert.True(tb, ok) && assert.Equal(tb, lhs.Value, rhs.Value) case *visitorUserString: rhs, ok := rhs.(*visitorUserString) return assert.True(tb, ok) && assert.Equal(tb, lhs.Value, rhs.Value) case *visitorUserObject: rhs, ok := rhs.(*visitorUserObject) if !(assert.True(tb, ok) && assert.Equal(tb, len(lhs.Value), len(rhs.Value))) { return false } for key, lhs := range lhs.Value { rhs, ok := rhs.Value[key] if !(assert.True(tb, ok) && assert.True(tb, compareUserNode(tb, lhs, rhs))) { return false } } return true case *visitorUserArray: rhs, ok := rhs.(*visitorUserArray) if !(assert.True(tb, ok) && assert.Equal(tb, len(lhs.Value), len(rhs.Value))) { return false } for i, n := 0, len(lhs.Value); i < n; i++ { if !assert.True(tb, compareUserNode(tb, lhs.Value[i], rhs.Value[i])) { return false } } return true default: tb.Fatalf("unexpected type of UserNode: %T", lhs) return false } } type visitorUserNodeDecoder interface { Reset() Decode(str string) (visitorUserNode, error) } var _ visitorUserNodeDecoder = (*visitorUserNodeASTDecoder)(nil) type visitorUserNodeASTDecoder struct{} func (self *visitorUserNodeASTDecoder) Reset() {} func (self *visitorUserNodeASTDecoder) Decode(str string) (visitorUserNode, error) { root := NewRaw(str) if err := root.LoadAll(); err != nil { return nil, err } return self.decodeValue(&root) } func (self *visitorUserNodeASTDecoder) decodeValue(root *Node) (visitorUserNode, error) { switch typ := root.Type(); typ { // embed (*Node).Check case V_NONE: return nil, ErrNotExist case V_ERROR: return nil, root case V_NULL: return &visitorUserNull{}, nil case V_TRUE: return &visitorUserBool{Value: true}, nil case V_FALSE: return &visitorUserBool{Value: false}, nil case V_STRING: value, err := root.StrictString() if err != nil { return nil, err } return &visitorUserString{Value: value}, nil case V_NUMBER: value, err := root.StrictNumber() if err != nil { return nil, err } i64, ierr := value.Int64() if ierr == nil { return &visitorUserInt64{Value: i64}, nil } f64, ferr := value.Float64() if ferr == nil { return &visitorUserFloat64{Value: f64}, nil } return nil, fmt.Errorf("invalid number: %v, ierr: %v, ferr: %v", value, ierr, ferr) case V_ARRAY: nodes, err := root.ArrayUseNode() if err != nil { return nil, err } values := make([]visitorUserNode, len(nodes)) for i := 0; i < len(nodes); i++ { n := &nodes[i] value, err := self.decodeValue(n) if err != nil { return nil, err } values[i] = value } return &visitorUserArray{Value: values}, nil case V_OBJECT: pairs, err := root.MapUseNode() if err != nil { return nil, err } values := make(map[string]visitorUserNode, len(pairs)) for k, v := range pairs { value, err := self.decodeValue(&v) if err != nil { return nil, err } values[k] = value } return &visitorUserObject{Value: values}, nil case V_ANY: fallthrough default: return nil, fmt.Errorf("unexpected Node type: %v", typ) } } var _ visitorUserNodeDecoder = (*visitorUserNodeVisitorDecoder)(nil) type visitorUserNodeVisitorDecoder struct { stk visitorUserNodeStack sp uint8 } type visitorUserNodeStack = [256]struct { val visitorUserNode obj map[string]visitorUserNode arr []visitorUserNode key string } func (self *visitorUserNodeVisitorDecoder) Reset() { self.stk = visitorUserNodeStack{} self.sp = 0 } func (self *visitorUserNodeVisitorDecoder) Decode(str string) (visitorUserNode, error) { if err := Preorder(str, self, nil); err != nil { return nil, err } return self.result() } func (self *visitorUserNodeVisitorDecoder) result() (visitorUserNode, error) { if self.sp != 1 { return nil, fmt.Errorf("incorrect sp: %d", self.sp) } return self.stk[0].val, nil } func (self *visitorUserNodeVisitorDecoder) incrSP() error { self.sp++ if self.sp == 0 { return fmt.Errorf("reached max depth: %d", len(self.stk)) } return nil } func (self *visitorUserNodeVisitorDecoder) OnNull() error { self.stk[self.sp].val = &visitorUserNull{} if err := self.incrSP(); err != nil { return err } return self.onValueEnd() } func (self *visitorUserNodeVisitorDecoder) OnBool(v bool) error { self.stk[self.sp].val = &visitorUserBool{Value: v} if err := self.incrSP(); err != nil { return err } return self.onValueEnd() } func (self *visitorUserNodeVisitorDecoder) OnString(v string) error { self.stk[self.sp].val = &visitorUserString{Value: v} if err := self.incrSP(); err != nil { return err } return self.onValueEnd() } func (self *visitorUserNodeVisitorDecoder) OnInt64(v int64, n json.Number) error { self.stk[self.sp].val = &visitorUserInt64{Value: v} if err := self.incrSP(); err != nil { return err } return self.onValueEnd() } func (self *visitorUserNodeVisitorDecoder) OnFloat64(v float64, n json.Number) error { self.stk[self.sp].val = &visitorUserFloat64{Value: v} if err := self.incrSP(); err != nil { return err } return self.onValueEnd() } func (self *visitorUserNodeVisitorDecoder) OnObjectBegin(capacity int) error { self.stk[self.sp].obj = make(map[string]visitorUserNode, capacity) return self.incrSP() } func (self *visitorUserNodeVisitorDecoder) OnObjectKey(key string) error { self.stk[self.sp].key = key return self.incrSP() } func (self *visitorUserNodeVisitorDecoder) OnObjectEnd() error { self.stk[self.sp-1].val = &visitorUserObject{Value: self.stk[self.sp-1].obj} self.stk[self.sp-1].obj = nil return self.onValueEnd() } func (self *visitorUserNodeVisitorDecoder) OnArrayBegin(capacity int) error { self.stk[self.sp].arr = make([]visitorUserNode, 0, capacity) return self.incrSP() } func (self *visitorUserNodeVisitorDecoder) OnArrayEnd() error { self.stk[self.sp-1].val = &visitorUserArray{Value: self.stk[self.sp-1].arr} self.stk[self.sp-1].arr = nil return self.onValueEnd() } func (self *visitorUserNodeVisitorDecoder) onValueEnd() error { if self.sp == 1 { return nil } // [..., Array, Value, sp] if self.stk[self.sp-2].arr != nil { self.stk[self.sp-2].arr = append(self.stk[self.sp-2].arr, self.stk[self.sp-1].val) self.sp-- return nil } // [..., Object, ObjectKey, Value, sp] self.stk[self.sp-3].obj[self.stk[self.sp-2].key] = self.stk[self.sp-1].val self.sp -= 2 return nil } func testUserNodeDiff(t *testing.T, d1, d2 visitorUserNodeDecoder, str string) { t.Helper() d1.Reset() n1, err := d1.Decode(_TwitterJson) require.NoError(t, err) d2.Reset() n2, err := d2.Decode(_TwitterJson) require.NoError(t, err) require.True(t, compareUserNode(t, n1, n2)) } func TestVisitor_UserNodeDiff(t *testing.T) { var d1 visitorUserNodeASTDecoder var d2 visitorUserNodeVisitorDecoder for _, c := range visitorTestCases { t.Run(c.name, func(t *testing.T) { testUserNodeDiff(t, &d1, &d2, c.jsonStr) }) } } type skipVisitor struct { sp int Skip int inSkip bool CountSkip int } func (self *skipVisitor) OnNull() error { if self.sp == self.Skip+1 && self.inSkip { panic("unexpected key") } return nil } func (self *skipVisitor) OnFloat64(v float64, n json.Number) error { if self.sp == self.Skip+1 && self.inSkip { panic("unexpected key") } return nil } func (self *skipVisitor) OnInt64(v int64, n json.Number) error { if self.sp == self.Skip+1 && self.inSkip { panic("unexpected key") } return nil } func (self *skipVisitor) OnBool(v bool) error { if self.sp == self.Skip+1 && self.inSkip { panic("unexpected key") } return nil } func (self *skipVisitor) OnString(v string) error { if self.sp == self.Skip+1 && self.inSkip { panic("unexpected key") } return nil } func (self *skipVisitor) OnObjectBegin(capacity int) error { println("self.sp", self.sp) if self.sp == self.Skip { self.inSkip = true self.CountSkip++ println("op skip") self.sp++ return VisitOPSkip } self.sp++ return nil } func (self *skipVisitor) OnObjectKey(key string) error { if self.sp == self.Skip+1 && self.inSkip { panic("unexpected key") } return nil } func (self *skipVisitor) OnObjectEnd() error { if self.sp == self.Skip+1 { if !self.inSkip { panic("not in skip") } self.inSkip = false println("finish op skip") } self.sp-- return nil } func (self *skipVisitor) OnArrayBegin(capacity int) error { println("arr self.sp", self.sp) if self.sp == self.Skip { self.inSkip = true self.CountSkip++ println("arr op skip") self.sp++ return VisitOPSkip } self.sp++ return nil } func (self *skipVisitor) OnArrayEnd() error { println("arr self.sp", self.sp) if self.sp == self.Skip+1 { if !self.inSkip { panic("arr not in skip") } self.inSkip = false println("arr finish op skip") } self.sp-- return nil } func TestVisitor_OpSkip(t *testing.T) { var suite skipVisitor suite.Skip = 1 Preorder(`{ "a": [ null ] , "b": 1, "c": { "1" : 1 } }`, &suite, nil) if suite.CountSkip != 2 { t.Fatal(suite.CountSkip) } } func BenchmarkVisitor_UserNode(b *testing.B) { const str = _TwitterJson b.Run("AST", func(b *testing.B) { var d visitorUserNodeASTDecoder b.ResetTimer() for k := 0; k < b.N; k++ { d.Reset() _, err := d.Decode(str) require.NoError(b, err) b.SetBytes(int64(len(str))) } }) b.Run("Visitor", func(b *testing.B) { var d visitorUserNodeVisitorDecoder b.ResetTimer() for k := 0; k < b.N; k++ { d.Reset() _, err := d.Decode(str) require.NoError(b, err) b.SetBytes(int64(len(str))) } }) } ================================================ FILE: compat.go ================================================ //go:build (!amd64 && !arm64) || go1.27 || !go1.17 || (arm64 && !go1.20) // +build !amd64,!arm64 go1.27 !go1.17 arm64,!go1.20 /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package sonic import ( "bytes" "encoding/json" "io" "reflect" "github.com/bytedance/sonic/option" ) const apiKind = UseStdJSON type frozenConfig struct { Config } // Froze convert the Config to API func (cfg Config) Froze() API { api := &frozenConfig{Config: cfg} return api } func (cfg frozenConfig) marshalOptions(val interface{}, prefix, indent string) ([]byte, error) { w := bytes.NewBuffer([]byte{}) enc := json.NewEncoder(w) enc.SetEscapeHTML(cfg.EscapeHTML) enc.SetIndent(prefix, indent) err := enc.Encode(val) out := w.Bytes() // json.Encoder always appends '\n' after encoding, // which is not same with json.Marshal() if len(out) > 0 && out[len(out)-1] == '\n' { out = out[:len(out)-1] } return out, err } // Marshal is implemented by sonic func (cfg frozenConfig) Marshal(val interface{}) ([]byte, error) { if !cfg.EscapeHTML { return cfg.marshalOptions(val, "", "") } return json.Marshal(val) } // MarshalToString is implemented by sonic func (cfg frozenConfig) MarshalToString(val interface{}) (string, error) { out, err := cfg.Marshal(val) return string(out), err } // MarshalIndent is implemented by sonic func (cfg frozenConfig) MarshalIndent(val interface{}, prefix, indent string) ([]byte, error) { if !cfg.EscapeHTML { return cfg.marshalOptions(val, prefix, indent) } return json.MarshalIndent(val, prefix, indent) } // UnmarshalFromString is implemented by sonic func (cfg frozenConfig) UnmarshalFromString(buf string, val interface{}) error { r := bytes.NewBufferString(buf) dec := json.NewDecoder(r) if cfg.UseNumber { dec.UseNumber() } if cfg.DisallowUnknownFields { dec.DisallowUnknownFields() } err := dec.Decode(val) if err != nil { return err } // check the trailing chars offset := dec.InputOffset() if t, err := dec.Token(); !(t == nil && err == io.EOF) { return &json.SyntaxError{Offset: offset} } return nil } // Unmarshal is implemented by sonic func (cfg frozenConfig) Unmarshal(buf []byte, val interface{}) error { return cfg.UnmarshalFromString(string(buf), val) } // NewEncoder is implemented by sonic func (cfg frozenConfig) NewEncoder(writer io.Writer) Encoder { enc := json.NewEncoder(writer) if !cfg.EscapeHTML { enc.SetEscapeHTML(cfg.EscapeHTML) } return enc } // NewDecoder is implemented by sonic func (cfg frozenConfig) NewDecoder(reader io.Reader) Decoder { dec := json.NewDecoder(reader) if cfg.UseNumber { dec.UseNumber() } if cfg.DisallowUnknownFields { dec.DisallowUnknownFields() } return dec } // Valid is implemented by sonic func (cfg frozenConfig) Valid(data []byte) bool { return json.Valid(data) } // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in // order to reduce the first-hit latency at **amd64** Arch. // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is // a compile option to set the depth of recursive compile for the nested struct type. // * This is the none implement for !amd64. // It will be useful for someone who develop with !amd64 arch,like Mac M1. func Pretouch(vt reflect.Type, opts ...option.CompileOption) error { return nil } ================================================ FILE: compat_test.go ================================================ /* * Copyright 2022 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package sonic import ( "bytes" "compress/gzip" "encoding/json" "io" "os" "reflect" "testing" "github.com/bytedance/sonic/internal/envs" "github.com/bytedance/sonic/option" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestCompatUnmarshalStd(t *testing.T) { var sobj = map[string]interface{}{} var jobj = map[string]interface{}{} var data = []byte(`{"a":1.00000001E-10}`) var str = string(data) serr := ConfigStd.UnmarshalFromString(str, &sobj) jerr := json.Unmarshal(data, &jobj) require.Equal(t, jerr, serr) require.Equal(t, jobj, sobj) data[2] = '0' require.Equal(t, jobj, sobj) sobj = map[string]interface{}{} jobj = map[string]interface{}{} data = []byte(`{"a":1}`) cfg := Config{ UseNumber: true, }.Froze() serr = cfg.Unmarshal(data, &sobj) dec := json.NewDecoder(bytes.NewBuffer(data)) dec.UseNumber() jerr = dec.Decode(&jobj) require.Equal(t, jerr, serr) require.Equal(t, jobj, sobj) x := struct { A json.Number B json.Number }{} y := struct { A json.Number B json.Number }{} data = []byte(`{"A":"1", "C":-1, "B":1}`) cfg = Config{ DisallowUnknownFields: true, }.Froze() serr = cfg.Unmarshal(data, &x) dec = json.NewDecoder(bytes.NewBuffer(data)) dec.UseNumber() dec.DisallowUnknownFields() jerr = dec.Decode(&y) require.Equal(t, jerr, serr) // require.Equal(t, y, x) } func TestCompatMarshalStd(t *testing.T) { t.Parallel() var obj = map[string]interface{}{ "c": json.RawMessage(" [ \"<&>\" ] "), "b": json.RawMessage(" [ ] "), } sout, serr := ConfigStd.Marshal(obj) jout, jerr := json.Marshal(obj) require.Equal(t, jerr, serr) require.Equal(t, string(jout), string(sout)) obj = map[string]interface{}{ "a": json.RawMessage(" [} "), } sout, serr = ConfigStd.Marshal(obj) jout, jerr = json.Marshal(obj) require.NotNil(t, jerr) require.NotNil(t, serr) require.Equal(t, string(jout), string(sout)) obj = map[string]interface{}{ "a": json.RawMessage("1"), } sout, serr = ConfigStd.MarshalIndent(obj, "xxxx", " ") jout, jerr = json.MarshalIndent(obj, "xxxx", " ") require.Equal(t, jerr, serr) require.Equal(t, string(jout), string(sout)) } func TestCompatEncoderStd(t *testing.T) { var o = map[string]interface{}{ "a": "<>", "b": json.RawMessage(" [ ] "), } var w1 = bytes.NewBuffer(nil) var w2 = bytes.NewBuffer(nil) var enc1 = json.NewEncoder(w1) var enc2 = ConfigStd.NewEncoder(w2) require.Nil(t, enc1.Encode(o)) require.Nil(t, enc2.Encode(o)) require.Equal(t, w1.String(), w2.String()) enc1.SetEscapeHTML(true) enc2.SetEscapeHTML(true) enc1.SetIndent("\n", " ") enc2.SetIndent("\n", " ") require.Nil(t, enc1.Encode(o)) require.Nil(t, enc2.Encode(o)) require.Equal(t, w1.String(), w2.String()) enc1.SetEscapeHTML(false) enc2.SetEscapeHTML(false) enc1.SetIndent("", "") enc2.SetIndent("", "") require.Nil(t, enc1.Encode(o)) require.Nil(t, enc2.Encode(o)) require.Equal(t, w1.String(), w2.String()) } func TestCompatDecoderStd(t *testing.T) { var o1 = map[string]interface{}{} var o2 = map[string]interface{}{} var s = `{"a":"b"} {"1":"2"} a {}` var w1 = bytes.NewBuffer([]byte(s)) var w2 = bytes.NewBuffer([]byte(s)) var enc1 = json.NewDecoder(w1) var enc2 = ConfigStd.NewDecoder(w2) require.Equal(t, enc1.More(), enc2.More()) require.Nil(t, enc1.Decode(&o1)) require.Nil(t, enc2.Decode(&o2)) require.Equal(t, w1.String(), w2.String()) require.Equal(t, enc1.More(), enc2.More()) require.Nil(t, enc1.Decode(&o1)) require.Nil(t, enc2.Decode(&o2)) require.Equal(t, w1.String(), w2.String()) require.Equal(t, enc1.More(), enc2.More()) require.NotNil(t, enc1.Decode(&o1)) require.NotNil(t, enc2.Decode(&o2)) require.Equal(t, w1.String(), w2.String()) } func TestPretouch(t *testing.T) { var v map[string]interface{} if err := Pretouch(reflect.TypeOf(v)); err != nil { t.Errorf("err:%v", err) } if err := Pretouch(reflect.TypeOf(v), option.WithCompileRecursiveDepth(1), option.WithCompileMaxInlineDepth(2), ); err != nil { t.Errorf("err:%v", err) } } func TestGet(t *testing.T) { var data = `{"a":"b"}` r, err := GetFromString(data, "a") if err != nil { t.Fatal(err) } v, err := r.String() if err != nil { t.Fatal(err) } if v != "b" { t.Fatal(v) } } func TestUnmarshalWithTrailingChars(t *testing.T) { for i, str := range []string{ "123", "123 ", "{} [] ", "{} [ ", "[],", "[]null", "false null", } { var msg1, msg2 json.RawMessage err1 := json.Unmarshal([]byte(str), &msg1) err2 := ConfigStd.UnmarshalFromString(str, &msg2) require.Equal(t, err1 == nil, err2 == nil, i) // sonic will not clear the unmarshaled value here, but encoding/json will // require.Equal(t, msg1, msg2) } } func TestUnmarshalJSONSuite(t *testing.T) { if envs.UseOptDec { t.Skip("this test still fails in OPTDEC") // FIXME: fix the optdec issues } gzFile, err := os.Open("testdata/JSONTestSuite/testdata.json.gz") require.NoError(t, err) defer gzFile.Close() gzReader, err := gzip.NewReader(gzFile) require.NoError(t, err) defer gzReader.Close() data, err := io.ReadAll(gzReader) require.NoError(t, err) var tests map[string]string err = json.Unmarshal(data, &tests) require.NoError(t, err) for name, tt := range tests { b := []byte(tt) t.Run(name, func(t *testing.T) { serr := ConfigStd.Unmarshal(b, new(json.RawMessage)) jerr := json.Unmarshal(b, new(json.RawMessage)) assert.Equal(t, jerr != nil, serr != nil, "json: %v, sonic: %v", jerr, serr) serr = ConfigStd.Unmarshal(b, new(interface{})) jerr = json.Unmarshal(b, new(interface{})) assert.Equal(t, jerr != nil, serr != nil, "json: %v, sonic: %v", jerr, serr) }) } } ================================================ FILE: decode_test.go ================================================ //go:build (amd64 && go1.17 && !go1.27) || (arm64 && go1.20 && !go1.27) // +build amd64,go1.17,!go1.27 arm64,go1.20,!go1.27 /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package sonic import ( "bytes" "encoding" "encoding/json" "errors" "fmt" "image" "math" "math/big" "math/rand" "net" "reflect" "runtime" "strconv" "strings" "testing" "time" "unsafe" "github.com/bytedance/sonic/decoder" "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/internal/utils" "github.com/davecgh/go-spew/spew" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) type T struct { X string Y int Z int `json:"-"` } type U struct { Alphabet string `json:"alpha"` } type V struct { F1 interface{} F2 int32 F3 json.Number F4 *VOuter } type VOuter struct { V V } type W struct { S SS } type P struct { PP PP } type PP struct { T T Ts []T } type SS string func (*SS) UnmarshalJSON(_ []byte) error { return &json.UnmarshalTypeError{Value: "number", Type: reflect.TypeOf(SS(""))} } // ifaceNumAsFloat64/ifaceNumAsNumber are used to test unmarshaling with and // without UseNumber var ifaceNumAsFloat64 = map[string]interface{}{ "k1": float64(1), "k2": "s", "k3": []interface{}{float64(1), 2.0, 3e-3}, "k4": map[string]interface{}{"kk1": "s", "kk2": float64(2)}, } var ifaceNumAsNumber = map[string]interface{}{ "k1": json.Number("1"), "k2": "s", "k3": []interface{}{json.Number("1"), json.Number("2.0"), json.Number("3e-3")}, "k4": map[string]interface{}{"kk1": "s", "kk2": json.Number("2")}, } type tx struct { x int } type u8 uint8 // A type that can unmarshal itself. type unmarshaler struct { T bool } func (u *unmarshaler) UnmarshalJSON(_ []byte) error { *u = unmarshaler{true} // All we need to see that UnmarshalJSON is called. return nil } type ustruct struct { M unmarshaler } type unmarshalerText struct { A, B string } // needed for re-marshaling tests func (u unmarshalerText) MarshalText() ([]byte, error) { return []byte(u.A + ":" + u.B), nil } func (u *unmarshalerText) UnmarshalText(b []byte) error { pos := bytes.IndexByte(b, ':') if pos == -1 { return errors.New("missing separator") } u.A, u.B = string(b[:pos]), string(b[pos+1:]) return nil } var _ encoding.TextUnmarshaler = (*unmarshalerText)(nil) type ustructText struct { M unmarshalerText } // u8marshal is an integer type that can marshal/unmarshal itself. type u8marshal uint8 func (u8 u8marshal) MarshalText() ([]byte, error) { return []byte(fmt.Sprintf("u%d", u8)), nil } var errMissingU8Prefix = errors.New("missing 'u' prefix") func (u8 *u8marshal) UnmarshalText(b []byte) error { if !bytes.HasPrefix(b, []byte{'u'}) { return errMissingU8Prefix } n, err := strconv.Atoi(string(b[1:])) if err != nil { return err } *u8 = u8marshal(n) return nil } var _ encoding.TextUnmarshaler = (*u8marshal)(nil) var ( umtrue = unmarshaler{true} umslice = []unmarshaler{{true}} umstruct = ustruct{unmarshaler{true}} umtrueXY = unmarshalerText{"x", "y"} umsliceXY = []unmarshalerText{{"x", "y"}} umstructXY = ustructText{unmarshalerText{"x", "y"}} ummapXY = map[unmarshalerText]bool{{"x", "y"}: true} ) // Test data structures for anonymous fields. type Point struct { Z int } type Top struct { Level0 int Embed0 *Embed0a *Embed0b `json:"e,omitempty"` // treated as named Embed0c `json:"-"` // ignored Loop Embed0p // has Point with X, Y, used Embed0q // has Point with Z, used embed // contains exported field } type Embed0 struct { Level1a int // overridden by Embed0a's Level1a with json tag Level1b int // used because Embed0a's Level1b is renamed Level1c int // used because Embed0a's Level1c is ignored Level1d int // annihilated by Embed0a's Level1d Level1e int `json:"x"` // annihilated by Embed0a.Level1e } type Embed0a struct { Level1a int `json:"Level1a,omitempty"` Level1b int `json:"LEVEL1B,omitempty"` Level1c int `json:"-"` Level1d int // annihilated by Embed0's Level1d Level1f int `json:"x"` // annihilated by Embed0's Level1e } type Embed0b Embed0 type Embed0c Embed0 type Embed0p struct { image.Point } type Embed0q struct { Point } type embed struct { Q int } type Loop struct { Loop1 int `json:",omitempty"` Loop2 int `json:",omitempty"` *Loop } // From reflect test: // The X in S6 and S7 annihilate, but they also block the X in S8.S9. type S5 struct { S6 S7 S8 } type S6 struct { X int } type S7 S6 type S8 struct { S9 } type S9 struct { X int Y int } // From reflect test: // The X in S11.S6 and S12.S6 annihilate, but they also block the X in S13.S8.S9. type S10 struct { S11 S12 S13 } type S11 struct { S6 } type S12 struct { S6 } type S13 struct { S8 } type Ambig struct { // Given "hello", the first match should win. First int `json:"HELLO"` Second int `json:"Hello"` } type XYZ struct { X interface{} Y interface{} Z interface{} } type byteWithMarshalJSON byte func (b byteWithMarshalJSON) MarshalJSON() ([]byte, error) { return []byte(fmt.Sprintf(`"Z%.2x"`, byte(b))), nil } func (b *byteWithMarshalJSON) UnmarshalJSON(data []byte) error { if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' { return fmt.Errorf("bad quoted string") } i, err := strconv.ParseInt(string(data[2:4]), 16, 8) if err != nil { return fmt.Errorf("bad hex") } *b = byteWithMarshalJSON(i) return nil } type byteWithPtrMarshalJSON byte func (b *byteWithPtrMarshalJSON) MarshalJSON() ([]byte, error) { return byteWithMarshalJSON(*b).MarshalJSON() } func (b *byteWithPtrMarshalJSON) UnmarshalJSON(data []byte) error { return (*byteWithMarshalJSON)(b).UnmarshalJSON(data) } type byteWithMarshalText byte func (b byteWithMarshalText) MarshalText() ([]byte, error) { return []byte(fmt.Sprintf(`Z%.2x`, byte(b))), nil } func (b *byteWithMarshalText) UnmarshalText(data []byte) error { if len(data) != 3 || data[0] != 'Z' { return fmt.Errorf("bad quoted string") } i, err := strconv.ParseInt(string(data[1:3]), 16, 8) if err != nil { return fmt.Errorf("bad hex") } *b = byteWithMarshalText(i) return nil } type byteWithPtrMarshalText byte func (b *byteWithPtrMarshalText) MarshalText() ([]byte, error) { return byteWithMarshalText(*b).MarshalText() } func (b *byteWithPtrMarshalText) UnmarshalText(data []byte) error { return (*byteWithMarshalText)(b).UnmarshalText(data) } type intWithMarshalJSON int func (b intWithMarshalJSON) MarshalJSON() ([]byte, error) { return []byte(fmt.Sprintf(`"Z%.2x"`, int(b))), nil } func (b *intWithMarshalJSON) UnmarshalJSON(data []byte) error { if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' { return fmt.Errorf("bad quoted string") } i, err := strconv.ParseInt(string(data[2:4]), 16, 8) if err != nil { return fmt.Errorf("bad hex") } *b = intWithMarshalJSON(i) return nil } type intWithPtrMarshalJSON int func (b *intWithPtrMarshalJSON) MarshalJSON() ([]byte, error) { return intWithMarshalJSON(*b).MarshalJSON() } func (b *intWithPtrMarshalJSON) UnmarshalJSON(data []byte) error { return (*intWithMarshalJSON)(b).UnmarshalJSON(data) } type intWithMarshalText int func (b intWithMarshalText) MarshalText() ([]byte, error) { return []byte(fmt.Sprintf(`Z%.2x`, int(b))), nil } func (b *intWithMarshalText) UnmarshalText(data []byte) error { if len(data) != 3 || data[0] != 'Z' { return fmt.Errorf("bad quoted string") } i, err := strconv.ParseInt(string(data[1:3]), 16, 8) if err != nil { return fmt.Errorf("bad hex") } *b = intWithMarshalText(i) return nil } type intWithPtrMarshalText int func (b *intWithPtrMarshalText) MarshalText() ([]byte, error) { return intWithMarshalText(*b).MarshalText() } func (b *intWithPtrMarshalText) UnmarshalText(data []byte) error { return (*intWithMarshalText)(b).UnmarshalText(data) } type mapStringToStringData struct { Data map[string]string `json:"data"` } type unmarshalTest struct { in string ptr interface{} // new(type) out interface{} err error useNumber bool golden bool disallowUnknownFields bool validateString bool } type B struct { B bool `json:",string"` } type DoublePtr struct { I **int J **int } type JsonSyntaxError struct { Msg string Offset int64 } func (self *JsonSyntaxError) err() *json.SyntaxError { return (*json.SyntaxError)(unsafe.Pointer(self)) } var unmarshalTests = []unmarshalTest{ // basic types {in: `true`, ptr: new(bool), out: true}, {in: `1`, ptr: new(int), out: 1}, {in: `1.2`, ptr: new(float64), out: 1.2}, {in: `-5`, ptr: new(int16), out: int16(-5)}, {in: `2`, ptr: new(json.Number), out: json.Number("2"), useNumber: true}, {in: `2`, ptr: new(json.Number), out: json.Number("2")}, {in: `2`, ptr: new(interface{}), out: 2.0}, {in: `2`, ptr: new(interface{}), out: json.Number("2"), useNumber: true}, {in: `"a\u1234"`, ptr: new(string), out: "a\u1234"}, {in: `"http:\/\/"`, ptr: new(string), out: "http://"}, {in: `"g-clef: \uD834\uDD1E"`, ptr: new(string), out: "g-clef: \U0001D11E"}, {in: `"invalid: \uD834x\uDD1E"`, ptr: new(string), out: "invalid: \uFFFDx\uFFFD"}, {in: "null", ptr: new(interface{}), out: nil}, {in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &json.UnmarshalTypeError{Value: "array", Type: reflect.TypeOf(""), Offset: 7, Struct: "T", Field: "X"}}, {in: `{"X": 23}`, ptr: new(T), out: T{}, err: &json.UnmarshalTypeError{Value: "number", Type: reflect.TypeOf(""), Offset: 8, Struct: "T", Field: "X"}}, {in: `{"x": 1}`, ptr: new(tx), out: tx{}}, {in: `{"x": 1}`, ptr: new(tx), err: fmt.Errorf("json: unknown field \"x\""), disallowUnknownFields: true}, {in: `{"S": 23}`, ptr: new(W), out: W{}, err: &json.UnmarshalTypeError{Value: "number", Type: reflect.TypeOf(SS("")), Struct: "W", Field: "S"}}, {in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: json.Number("3")}}, {in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: json.Number("1"), F2: int32(2), F3: json.Number("3")}, useNumber: true}, {in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(interface{}), out: ifaceNumAsFloat64}, {in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(interface{}), out: ifaceNumAsNumber, useNumber: true}, {in: `{"":""}`, ptr: new(struct{}), out: struct{}{}}, {in: `{"x":""}`, ptr: new(struct{ X json.Number }), err: errors.New("empty string into json number")}, // raw values with whitespace {in: "\n true ", ptr: new(bool), out: true}, {in: "\t 1 ", ptr: new(int), out: 1}, {in: "\r 1.2 ", ptr: new(float64), out: 1.2}, {in: "\t -5 \n", ptr: new(int16), out: int16(-5)}, {in: "\t \"a\\u1234\" \n", ptr: new(string), out: "a\u1234"}, // Z has a "-" tag. {in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}}, {in: `{"Y": 1, "Z": 2}`, ptr: new(T), err: fmt.Errorf("json: unknown field \"Z\""), disallowUnknownFields: true}, {in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), out: U{Alphabet: "abc"}}, {in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, {in: `{"alpha": "abc"}`, ptr: new(U), out: U{Alphabet: "abc"}}, {in: `{"alphabet": "xyz"}`, ptr: new(U), out: U{}}, {in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, // syntax errors {in: `{"X": "foo", "Y"}`, err: (&JsonSyntaxError{"invalid character '}' after object key", 17}).err()}, {in: `[1, 2, 3+]`, err: (&JsonSyntaxError{"invalid character '+' after array element", 9}).err()}, {in: `{"X":12x}`, err: (&JsonSyntaxError{"invalid character 'x' after object key:value pair", 8}).err(), useNumber: true}, {in: `[2, 3`, err: (&JsonSyntaxError{Msg: "unexpected end of JSON input", Offset: 5}).err()}, {in: `{"F3": -}`, ptr: new(V), out: V{F3: json.Number("-")}, err: (&JsonSyntaxError{Msg: "invalid character '}' in numeric literal", Offset: 9}).err()}, // raw value errors {in: "\x01 42", err: (&JsonSyntaxError{"invalid character '\\x01' looking for beginning of value", 1}).err()}, {in: " 42 \x01", err: (&JsonSyntaxError{"invalid character '\\x01' after top-level value", 5}).err()}, {in: "\x01 true", err: (&JsonSyntaxError{"invalid character '\\x01' looking for beginning of value", 1}).err()}, {in: " false \x01", err: (&JsonSyntaxError{"invalid character '\\x01' after top-level value", 8}).err()}, {in: "\x01 1.2", err: (&JsonSyntaxError{"invalid character '\\x01' looking for beginning of value", 1}).err()}, {in: " 3.4 \x01", err: (&JsonSyntaxError{"invalid character '\\x01' after top-level value", 6}).err()}, {in: "\x01 \"string\"", err: (&JsonSyntaxError{"invalid character '\\x01' looking for beginning of value", 1}).err()}, {in: " \"string\" \x01", err: (&JsonSyntaxError{"invalid character '\\x01' after top-level value", 11}).err()}, // array tests {in: `[1, 2, 3]`, ptr: new([3]int), out: [3]int{1, 2, 3}}, {in: `[1, 2, 3]`, ptr: new([1]int), out: [1]int{1}}, {in: `[1, 2, 3]`, ptr: new([5]int), out: [5]int{1, 2, 3, 0, 0}}, {in: `[1, 2, 3]`, ptr: new(MustNotUnmarshalJSON), err: errors.New("MustNotUnmarshalJSON was used")}, // empty array to interface test {in: `[]`, ptr: new([]interface{}), out: []interface{}{}}, {in: `null`, ptr: new([]interface{}), out: []interface{}(nil)}, {in: `{"T":[]}`, ptr: new(map[string]interface{}), out: map[string]interface{}{"T": []interface{}{}}}, {in: `{"T":null}`, ptr: new(map[string]interface{}), out: map[string]interface{}{"T": interface{}(nil)}}, // composite tests {in: allValueIndent, ptr: new(All), out: allValue}, {in: allValueCompact, ptr: new(All), out: allValue}, {in: allValueIndent, ptr: new(*All), out: &allValue}, {in: allValueCompact, ptr: new(*All), out: &allValue}, {in: pallValueIndent, ptr: new(All), out: pallValue}, {in: pallValueCompact, ptr: new(All), out: pallValue}, {in: pallValueIndent, ptr: new(*All), out: &pallValue}, {in: pallValueCompact, ptr: new(*All), out: &pallValue}, // unmarshal interface test {in: `{"T":false}`, ptr: new(unmarshaler), out: umtrue}, // use "false" so test will fail if custom unmarshaler is not called {in: `{"T":false}`, ptr: new(*unmarshaler), out: &umtrue}, {in: `[{"T":false}]`, ptr: new([]unmarshaler), out: umslice}, {in: `[{"T":false}]`, ptr: new(*[]unmarshaler), out: &umslice}, {in: `{"M":{"T":"x:y"}}`, ptr: new(ustruct), out: umstruct}, // UnmarshalText interface test {in: `"x:y"`, ptr: new(unmarshalerText), out: umtrueXY}, {in: `"x:y"`, ptr: new(*unmarshalerText), out: &umtrueXY}, {in: `["x:y"]`, ptr: new([]unmarshalerText), out: umsliceXY}, {in: `["x:y"]`, ptr: new(*[]unmarshalerText), out: &umsliceXY}, {in: `{"M":"x:y"}`, ptr: new(ustructText), out: umstructXY}, // integer-keyed map test { in: `{"-1":"a","0":"b","1":"c"}`, ptr: new(map[int]string), out: map[int]string{-1: "a", 0: "b", 1: "c"}, }, { in: `{"0":"a","10":"c","9":"b"}`, ptr: new(map[u8]string), out: map[u8]string{0: "a", 9: "b", 10: "c"}, }, { in: `{"-9223372036854775808":"min","9223372036854775807":"max"}`, ptr: new(map[int64]string), out: map[int64]string{math.MinInt64: "min", math.MaxInt64: "max"}, }, { in: `{"18446744073709551615":"max"}`, ptr: new(map[uint64]string), out: map[uint64]string{math.MaxUint64: "max"}, }, { in: `{"0":false,"10":true}`, ptr: new(map[uintptr]bool), out: map[uintptr]bool{0: false, 10: true}, }, // Check that MarshalText and UnmarshalText take precedence // over default integer handling in map keys. { in: `{"u2":4}`, ptr: new(map[u8marshal]int), out: map[u8marshal]int{2: 4}, }, { in: `{"2":4}`, ptr: new(map[u8marshal]int), err: errMissingU8Prefix, }, // integer-keyed map errors { in: `{"abc":"abc"}`, ptr: new(map[int]string), err: &json.UnmarshalTypeError{Value: "number abc", Type: reflect.TypeOf(0), Offset: 2}, }, { in: `{"256":"abc"}`, ptr: new(map[uint8]string), err: &json.UnmarshalTypeError{Value: "number 256", Type: reflect.TypeOf(uint8(0)), Offset: 2}, }, { in: `{"128":"abc"}`, ptr: new(map[int8]string), err: &json.UnmarshalTypeError{Value: "number 128", Type: reflect.TypeOf(int8(0)), Offset: 2}, }, { in: `{"-1":"abc"}`, ptr: new(map[uint8]string), err: &json.UnmarshalTypeError{Value: "number -1", Type: reflect.TypeOf(uint8(0)), Offset: 2}, }, { in: `{"F":{"a":2,"3":4}}`, ptr: new(map[string]map[int]int), err: &json.UnmarshalTypeError{Value: "number a", Type: reflect.TypeOf(0), Offset: 7}, }, { in: `{"F":{"a":2,"3":4}}`, ptr: new(map[string]map[uint]int), err: &json.UnmarshalTypeError{Value: "number a", Type: reflect.TypeOf(uint(0)), Offset: 7}, }, // Map keys can be encoding.TextUnmarshalers. {in: `{"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, // If multiple values for the same key exists, only the most recent value is used. {in: `{"x:y":false,"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, { in: `{ "Level0": 1, "Level1b": 2, "Level1c": 3, "x": 4, "Level1a": 5, "LEVEL1B": 6, "e": { "Level1a": 8, "Level1b": 9, "Level1c": 10, "Level1d": 11, "x": 12 }, "Loop1": 13, "Loop2": 14, "X": 15, "Y": 16, "Z": 17, "Q": 18 }`, ptr: new(Top), out: Top{ Level0: 1, Embed0: Embed0{ Level1b: 2, Level1c: 3, }, Embed0a: &Embed0a{ Level1a: 5, Level1b: 6, }, Embed0b: &Embed0b{ Level1a: 8, Level1b: 9, Level1c: 10, Level1d: 11, Level1e: 12, }, Loop: Loop{ Loop1: 13, Loop2: 14, }, Embed0p: Embed0p{ Point: image.Point{X: 15, Y: 16}, }, Embed0q: Embed0q{ Point: Point{Z: 17}, }, embed: embed{ Q: 18, }, }, }, { in: `{"hello": 1}`, ptr: new(Ambig), out: Ambig{First: 1}, }, { in: `{"X": 1,"Y":2}`, ptr: new(S5), out: S5{S8: S8{S9: S9{Y: 2}}}, }, { in: `{"X": 1,"Y":2}`, ptr: new(S5), err: fmt.Errorf("json: unknown field \"X\""), disallowUnknownFields: true, }, { in: `{"X": 1,"Y":2}`, ptr: new(S10), out: S10{S13: S13{S8: S8{S9: S9{Y: 2}}}}, }, { in: `{"X": 1,"Y":2}`, ptr: new(S10), err: fmt.Errorf("json: unknown field \"X\""), disallowUnknownFields: true, }, { in: `{"I": 0, "I": null, "J": null}`, ptr: new(DoublePtr), out: DoublePtr{I: nil, J: nil}, }, // invalid UTF-8 is coerced to valid UTF-8. { in: "\"hello\xffworld\"", ptr: new(string), out: "hello\xffworld", validateString: false, }, { in: "\"hello\xc2\xc2world\"", ptr: new(string), out: "hello\xc2\xc2world", validateString: false, }, { in: "\"hello\xc2\xffworld\"", ptr: new(string), out: "hello\xc2\xffworld", }, { in: "\"hello\\ud800world\"", ptr: new(string), out: "hello\ufffdworld", }, { in: "\"hello\\ud800\\ud800world\"", ptr: new(string), out: "hello\ufffd\ufffdworld", }, { in: "\"hello\xed\xa0\x80\xed\xb0\x80world\"", ptr: new(string), out: "hello\xed\xa0\x80\xed\xb0\x80world", }, // Used to be issue 8305, but time.Time implements encoding.TextUnmarshaler so this works now. { in: `{"2009-11-10T23:00:00Z": "hello world"}`, ptr: new(map[time.Time]string), out: map[time.Time]string{time.Date(2009, 11, 10, 23, 0, 0, 0, time.UTC): "hello world"}, }, // issue 8305 { in: `{"2009-11-10T23:00:00Z": "hello world"}`, ptr: new(map[Point]string), err: &json.UnmarshalTypeError{Value: "object", Type: reflect.TypeOf(map[Point]string{}), Offset: 1}, }, { in: `{"asdf": "hello world"}`, ptr: new(map[unmarshaler]string), err: &json.UnmarshalTypeError{Value: "object", Type: reflect.TypeOf(map[unmarshaler]string{}), Offset: 1}, }, // related to issue 13783. // Go 1.7 changed marshaling a slice of typed byte to use the methods on the byte type, // similar to marshaling a slice of typed int. // These tests check that, assuming the byte type also has valid decoding methods, // either the old base64 string encoding or the new per-element encoding can be // successfully unmarshaled. The custom unmarshalers were accessible in earlier // versions of Go, even though the custom marshaler was not. { in: `"AQID"`, ptr: new([]byteWithMarshalJSON), out: []byteWithMarshalJSON{1, 2, 3}, }, { in: `["Z01","Z02","Z03"]`, ptr: new([]byteWithMarshalJSON), out: []byteWithMarshalJSON{1, 2, 3}, golden: true, }, { in: `"AQID"`, ptr: new([]byteWithMarshalText), out: []byteWithMarshalText{1, 2, 3}, }, { in: `["Z01","Z02","Z03"]`, ptr: new([]byteWithMarshalText), out: []byteWithMarshalText{1, 2, 3}, golden: true, }, { in: `"AQID"`, ptr: new([]byteWithPtrMarshalJSON), out: []byteWithPtrMarshalJSON{1, 2, 3}, }, { in: `["Z01","Z02","Z03"]`, ptr: new([]byteWithPtrMarshalJSON), out: []byteWithPtrMarshalJSON{1, 2, 3}, golden: true, }, { in: `"AQID"`, ptr: new([]byteWithPtrMarshalText), out: []byteWithPtrMarshalText{1, 2, 3}, }, { in: `["Z01","Z02","Z03"]`, ptr: new([]byteWithPtrMarshalText), out: []byteWithPtrMarshalText{1, 2, 3}, golden: true, }, // ints work with the marshaler but not the base64 []byte case { in: `["Z01","Z02","Z03"]`, ptr: new([]intWithMarshalJSON), out: []intWithMarshalJSON{1, 2, 3}, golden: true, }, { in: `["Z01","Z02","Z03"]`, ptr: new([]intWithMarshalText), out: []intWithMarshalText{1, 2, 3}, golden: true, }, { in: `["Z01","Z02","Z03"]`, ptr: new([]intWithPtrMarshalJSON), out: []intWithPtrMarshalJSON{1, 2, 3}, golden: true, }, { in: `["Z01","Z02","Z03"]`, ptr: new([]intWithPtrMarshalText), out: []intWithPtrMarshalText{1, 2, 3}, golden: true, }, {in: `0.000001`, ptr: new(float64), out: 0.000001, golden: true}, {in: `1e-7`, ptr: new(float64), out: 1e-7, golden: true}, {in: `100000000000000000000`, ptr: new(float64), out: 100000000000000000000.0, golden: true}, {in: `1e+21`, ptr: new(float64), out: 1e21, golden: true}, {in: `-0.000001`, ptr: new(float64), out: -0.000001, golden: true}, {in: `-1e-7`, ptr: new(float64), out: -1e-7, golden: true}, {in: `-100000000000000000000`, ptr: new(float64), out: -100000000000000000000.0, golden: true}, {in: `-1e+21`, ptr: new(float64), out: -1e21, golden: true}, {in: `999999999999999900000`, ptr: new(float64), out: 999999999999999900000.0, golden: true}, {in: `9007199254740992`, ptr: new(float64), out: 9007199254740992.0, golden: true}, {in: `9007199254740993`, ptr: new(float64), out: 9007199254740992.0, golden: false}, { in: `{"V": {"F2": "hello"}}`, ptr: new(VOuter), err: &json.UnmarshalTypeError{ Value: "string", Struct: "V", Field: "V.F2", Type: reflect.TypeOf(int32(0)), Offset: 20, }, }, { in: `{"V": {"F4": {}, "F2": "hello"}}`, ptr: new(VOuter), err: &json.UnmarshalTypeError{ Value: "string", Struct: "V", Field: "V.F2", Type: reflect.TypeOf(int32(0)), Offset: 30, }, }, // issue 15146. // invalid inputs in wrongStringTests below. {in: `{"B":"true"}`, ptr: new(B), out: B{true}, golden: true}, {in: `{"B":"false"}`, ptr: new(B), out: B{false}, golden: true}, {in: `{"B": "maybe"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "maybe" into bool`)}, {in: `{"B": "tru"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "tru" into bool`)}, {in: `{"B": "False"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "False" into bool`)}, {in: `{"B": "null"}`, ptr: new(B), out: B{false}}, {in: `{"B": "nul"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "nul" into bool`)}, {in: `{"B": [2, 3]}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal unquoted value into bool`)}, // additional tests for disallowUnknownFields { in: `{ "Level0": 1, "Level1b": 2, "Level1c": 3, "x": 4, "Level1a": 5, "LEVEL1B": 6, "e": { "Level1a": 8, "Level1b": 9, "Level1c": 10, "Level1d": 11, "x": 12 }, "Loop1": 13, "Loop2": 14, "X": 15, "Y": 16, "Z": 17, "Q": 18, "extra": true }`, ptr: new(Top), err: fmt.Errorf("json: unknown field \"extra\""), disallowUnknownFields: true, }, { in: `{ "Level0": 1, "Level1b": 2, "Level1c": 3, "x": 4, "Level1a": 5, "LEVEL1B": 6, "e": { "Level1a": 8, "Level1b": 9, "Level1c": 10, "Level1d": 11, "x": 12, "extra": null }, "Loop1": 13, "Loop2": 14, "X": 15, "Y": 16, "Z": 17, "Q": 18 }`, ptr: new(Top), err: fmt.Errorf("json: unknown field \"extra\""), disallowUnknownFields: true, }, // issue 26444 // json.UnmarshalTypeError without field & struct values { in: `{"data":{"test1": "bob", "test2": 123}}`, ptr: new(mapStringToStringData), err: &json.UnmarshalTypeError{Value: "number", Type: reflect.TypeOf(""), Offset: 37, Struct: "mapStringToStringData", Field: "data"}, }, { in: `{"data":{"test1": 123, "test2": "bob"}}`, ptr: new(mapStringToStringData), err: &json.UnmarshalTypeError{Value: "number", Type: reflect.TypeOf(""), Offset: 21, Struct: "mapStringToStringData", Field: "data"}, }, // trying to decode JSON arrays or objects via TextUnmarshaler { in: `[1, 2, 3]`, ptr: new(MustNotUnmarshalText), err: &json.UnmarshalTypeError{Value: "array", Type: reflect.TypeOf(&MustNotUnmarshalText{}), Offset: 1}, }, { in: `{"foo": "bar"}`, ptr: new(MustNotUnmarshalText), err: &json.UnmarshalTypeError{Value: "object", Type: reflect.TypeOf(&MustNotUnmarshalText{}), Offset: 1}, }, // #22369 { in: `{"PP": {"T": {"Y": "bad-type"}}}`, ptr: new(P), err: &json.UnmarshalTypeError{ Value: "string", Struct: "T", Field: "PP.T.Y", Type: reflect.TypeOf(0), Offset: 29, }, }, { in: `{"Ts": [{"Y": 1}, {"Y": 2}, {"Y": "bad-type"}]}`, ptr: new(PP), err: &json.UnmarshalTypeError{ Value: "string", Struct: "T", Field: "Ts.Y", Type: reflect.TypeOf(0), Offset: 29, }, }, // #14702 { in: `invalid`, ptr: new(json.Number), err: (&JsonSyntaxError{ Msg: "invalid character 'i' looking for beginning of value", Offset: 1, }).err(), }, { in: `"invalid"`, ptr: new(json.Number), err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), }, { in: `{"A":"invalid"}`, ptr: new(struct{ A json.Number }), err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), }, { in: `{"A":"invalid"}`, ptr: new(struct { A json.Number `json:",string"` }), err: fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into json.Number", `invalid`), }, { in: `{"A":"invalid"}`, ptr: new(map[string]json.Number), err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), }, // UTF-8 and string validation tests {in: `\u`, ptr: new(interface{}), err: fmt.Errorf("json: invald char"), validateString: true}, {in: `\u`, ptr: new(string), err: fmt.Errorf("json: invald char"), validateString: true}, {in: "\"\x00\"", ptr: new(interface{}), err: fmt.Errorf("json: invald char"), validateString: true}, {in: "\"\x00\"", ptr: new(string), err: fmt.Errorf("json: invald char"), validateString: true}, {in: "\"\xff\"", ptr: new(interface{}), out: interface{}("\ufffd"), validateString: true}, {in: "\"\xff\"", ptr: new(string), out: "\ufffd", validateString: true}, {in: "\"\x00\"", ptr: new(interface{}), out: interface{}("\x00"), validateString: false}, {in: "\"\x00\"", ptr: new(string), out: "\x00", validateString: false}, {in: "\"\xff\"", ptr: new(interface{}), out: interface{}("\xff"), validateString: false}, {in: "\"\xff\"", ptr: new(string), out: "\xff", validateString: false}, // cases found by fuzz { in: `{"H":{"A": {}}}`, ptr: new(struct { F0 struct { F1 json.Number "json:\"a,omitempty\"" } "json:\"H,\"" }), err: fmt.Errorf("Mismatch type json.Number with value object.."), }, } func trim(b []byte) []byte { if len(b) > 20 { return b[0:20] } return b } func diff(t *testing.T, a, b []byte) { for i := 0; ; i++ { if i >= len(a) || i >= len(b) || a[i] != b[i] { j := i - 10 if j < 0 { j = 0 } t.Errorf("diverge at %d: «%s» vs «%s»", i, trim(a[j:]), trim(b[j:])) return } } } func TestMarshal(t *testing.T) { b, err := Marshal(allValue) if err != nil { t.Fatalf("Marshal allValue: %v", err) } if string(b) != allValueCompact { t.Errorf("Marshal allValueCompact") diff(t, b, []byte(allValueCompact)) return } b, err = Marshal(pallValue) if err != nil { t.Fatalf("Marshal pallValue: %v", err) } if string(b) != pallValueCompact { t.Errorf("Marshal pallValueCompact") diff(t, b, []byte(pallValueCompact)) return } } func TestMarshalNumberZeroVal(t *testing.T) { var n json.Number out, err := Marshal(n) if err != nil { t.Fatal(err) } outStr := string(out) if outStr != "0" { t.Fatalf("Invalid zero val for json.Number: %q", outStr) } } func TestMarshalEmbeds(t *testing.T) { top := &Top{ Level0: 1, Embed0: Embed0{ Level1b: 2, Level1c: 3, }, Embed0a: &Embed0a{ Level1a: 5, Level1b: 6, }, Embed0b: &Embed0b{ Level1a: 8, Level1b: 9, Level1c: 10, Level1d: 11, Level1e: 12, }, Loop: Loop{ Loop1: 13, Loop2: 14, }, Embed0p: Embed0p{ Point: image.Point{X: 15, Y: 16}, }, Embed0q: Embed0q{ Point: Point{Z: 17}, }, embed: embed{ Q: 18, }, } b, err := Marshal(top) if err != nil { t.Fatal(err) } want := "{\"Level0\":1,\"Level1b\":2,\"Level1c\":3,\"Level1a\":5,\"LEVEL1B\":6,\"e\":{\"Level1a\":8,\"Level1b\":9,\"Level1c\":10,\"Level1d\":11,\"x\":12},\"Loop1\":13,\"Loop2\":14,\"X\":15,\"Y\":16,\"Z\":17,\"Q\":18}" if string(b) != want { t.Errorf("Wrong marshal result.\n got: %q\nwant: %q", b, want) } } func TestUnmarshal(t *testing.T) { for i, tt := range unmarshalTests { // construct decoder dec := decoder.NewDecoder(tt.in) if tt.useNumber { dec.UseNumber() } if tt.disallowUnknownFields { dec.DisallowUnknownFields() } if tt.validateString { dec.ValidateString() } // check decode result for invalid jsons if !json.Valid([]byte(tt.in)) || tt.ptr == nil { var sv interface{} err := dec.Decode(&sv) if err == nil && tt.err != nil { err = dec.CheckTrailings() if err == nil && tt.err != nil { t.Errorf("test json #%d: %v, %v, want %v", i, tt.in, err, tt.err) } } continue } typ := reflect.TypeOf(tt.ptr) if typ.Kind() != reflect.Ptr { t.Errorf("#%d: unmarshalTest.ptr %T is not a pointer type", i, tt.ptr) continue } typ = typ.Elem() v := reflect.New(typ) if !reflect.DeepEqual(tt.ptr, v.Interface()) { // There's no reason for ptr to point to non-zero data, // as we decode into new(right-type), so the data is // discarded. // This can easily mean tests that silently don't test // what they should. To test decoding into existing // data, see TestPrefilled. t.Errorf("#%d: unmarshalTest.ptr %#v is not a pointer to a zero value", i, tt.ptr) continue } if err := dec.Decode(v.Interface()); (err == nil) != (tt.err == nil) { spew.Dump(tt) t.Fatalf("#%d: %v, want %v", i, err, tt.err) continue } else if err != nil { continue } if !reflect.DeepEqual(v.Elem().Interface(), tt.out) { require.Equal(t, v.Elem().Interface(), tt.out) t.Errorf("#%d: mismatch\nhave: %#+v\nwant: %#+v", i, v.Elem().Interface(), tt.out) data, _ := Marshal(v.Elem().Interface()) println(string(data)) data, _ = Marshal(tt.out) println(string(data)) continue } // Check round trip also decodes correctly. if tt.err == nil { enc, err := Marshal(v.Interface()) if err != nil { t.Errorf("#%d: error re-marshaling: %v", i, err) continue } if tt.golden && !bytes.Equal(enc, []byte(tt.in)) { t.Errorf("#%d: remarshal mismatch:\nhave: %s\nwant: %s", i, enc, []byte(tt.in)) } vv := reflect.New(reflect.TypeOf(tt.ptr).Elem()) dec = decoder.NewDecoder(string(enc)) if tt.useNumber { dec.UseNumber() } if err := dec.Decode(vv.Interface()); err != nil { t.Errorf("#%d: error re-unmarshaling %#q: %v", i, enc, err) continue } if !reflect.DeepEqual(v.Elem().Interface(), vv.Elem().Interface()) { t.Errorf("#%d: mismatch\nhave: %#+v\nwant: %#+v", i, v.Elem().Interface(), vv.Elem().Interface()) t.Errorf(" In: %q", strings.Map(noSpace, tt.in)) t.Errorf("Marshal: %q", strings.Map(noSpace, string(enc))) continue } } } } var jsonBig []byte func initBig() { n := 10000 if testing.Short() { n = 100 } b, err := Marshal(genValue(n)) if err != nil { panic(err) } jsonBig = b } func genValue(n int) interface{} { if n > 1 { switch rand.Intn(2) { case 0: return genArray(n) case 1: return genMap(n) } } switch rand.Intn(3) { case 0: return rand.Intn(2) == 0 case 1: return float64(rand.Uint64()) / 4294967296 case 2: return genString(30) } panic("unreachable") } func genString(stddev float64) string { n := int(math.Abs(rand.NormFloat64()*stddev + stddev/2)) c := make([]rune, n) for i := range c { f := math.Abs(rand.NormFloat64()*64 + 32) if f > 0x10ffff { f = 0x10ffff } c[i] = rune(f) } return string(c) } func genArray(n int) []interface{} { f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) if f > n { f = n } if f < 1 { f = 1 } x := make([]interface{}, f) for i := range x { x[i] = genValue(((i+1)*n)/f - (i*n)/f) } return x } func genMap(n int) map[string]interface{} { f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) if f > n { f = n } if n > 0 && f == 0 { f = 1 } x := make(map[string]interface{}) x[genString(10)] = genValue(n / f) return x } func TestUnmarshalMarshal(t *testing.T) { initBig() var v interface{} if err := Unmarshal(jsonBig, &v); err != nil { if e, ok := err.(decoder.SyntaxError); ok { println(e.Description()) } t.Fatalf("Unmarshal: %v", err) } b, err := Marshal(v) if err != nil { t.Fatalf("Marshal: %v", err) } if !bytes.Equal(jsonBig, b) { t.Errorf("Marshal jsonBig") diff(t, b, jsonBig) println(string(b)) println(string(jsonBig)) return } } var numberTests = []struct { in string i int64 intErr string f float64 floatErr string }{ {in: "-1.23e1", intErr: "strconv.ParseInt: parsing \"-1.23e1\": invalid syntax", f: -1.23e1}, {in: "-12", i: -12, f: -12.0}, {in: "1e1000", intErr: "strconv.ParseInt: parsing \"1e1000\": invalid syntax", floatErr: "strconv.ParseFloat: parsing \"1e1000\": value out of range"}, } // Independent of Decode, basic coverage of the accessors in json.Number func TestNumberAccessors(t *testing.T) { for _, tt := range numberTests { n := json.Number(tt.in) if s := n.String(); s != tt.in { t.Errorf("json.Number(%q).String() is %q", tt.in, s) } if i, err := n.Int64(); err == nil && tt.intErr == "" && i != tt.i { t.Errorf("json.Number(%q).Int64() is %d", tt.in, i) } else if (err == nil && tt.intErr != "") || (err != nil && err.Error() != tt.intErr) { t.Errorf("json.Number(%q).Int64() wanted error %q but got: %v", tt.in, tt.intErr, err) } if f, err := n.Float64(); err == nil && tt.floatErr == "" && f != tt.f { t.Errorf("json.Number(%q).Float64() is %g", tt.in, f) } else if (err == nil && tt.floatErr != "") || (err != nil && err.Error() != tt.floatErr) { t.Errorf("json.Number(%q).Float64() wanted error %q but got: %v", tt.in, tt.floatErr, err) } } } func TestLargeByteSlice(t *testing.T) { s0 := make([]byte, 2000) for i := range s0 { s0[i] = byte(i) } b, err := Marshal(s0) if err != nil { t.Fatalf("Marshal: %v", err) } var s1 []byte if err := Unmarshal(b, &s1); err != nil { t.Fatalf("Unmarshal: %v", err) } if !bytes.Equal(s0, s1) { t.Errorf("Marshal large byte slice") diff(t, s0, s1) } } type Xint struct { X int } func TestUnmarshalPtrPtr(t *testing.T) { var xint Xint pxint := &xint if err := Unmarshal([]byte(`{"X":1}`), &pxint); err != nil { t.Fatalf("Unmarshal: %v", err) } if xint.X != 1 { t.Fatalf("Did not write to xint") } } func noSpace(c rune) rune { if utils.IsSpace(byte(c)) { // only used for ascii return -1 } return c } type All struct { Bool bool Int int Int8 int8 Int16 int16 Int32 int32 Int64 int64 Uint uint Uint8 uint8 Uint16 uint16 Uint32 uint32 Uint64 uint64 Uintptr uintptr Float32 float32 Float64 float64 Foo string `json:"bar"` Foo2 string `json:"bar2,dummyopt"` IntStr int64 `json:",string"` UintptrStr uintptr `json:",string"` PBool *bool PInt *int PInt8 *int8 PInt16 *int16 PInt32 *int32 PInt64 *int64 PUint *uint PUint8 *uint8 PUint16 *uint16 PUint32 *uint32 PUint64 *uint64 PUintptr *uintptr PFloat32 *float32 PFloat64 *float64 String string PString *string Map map[string]Small MapP map[string]*Small MapPNil map[string]*Small PMap *map[string]Small PMapP *map[string]*Small PMapPNil *map[string]*Small EmptyMap map[string]Small NilMap map[string]Small Slice []Small SliceP []*Small PSlice *[]Small PSliceP *[]*Small EmptySlice []Small NilSlice []Small StringSlice []string ByteSlice []byte Small Small PSmall *Small PPSmall **Small Interface interface{} PInterface *interface{} unexported int } type Small struct { Tag string } var allValue = All{ Bool: true, Int: 2, Int8: 3, Int16: 4, Int32: 5, Int64: 6, Uint: 7, Uint8: 8, Uint16: 9, Uint32: 10, Uint64: 11, Uintptr: 12, Float32: 14.25, Float64: 15.25, Foo: "foo", Foo2: "foo2", IntStr: 42, UintptrStr: 44, String: "16", Map: map[string]Small{ "17": {Tag: "tag17"}, }, MapP: map[string]*Small{ "18": {Tag: "tag19"}, }, MapPNil: map[string]*Small{ "19": nil, }, EmptyMap: map[string]Small{}, Slice: []Small{{Tag: "tag20"}, {Tag: "tag21"}}, SliceP: []*Small{{Tag: "tag22"}, nil, {Tag: "tag23"}}, EmptySlice: []Small{}, StringSlice: []string{"str24", "str25", "str26"}, ByteSlice: []byte{27, 28, 29}, Small: Small{Tag: "tag30"}, PSmall: &Small{Tag: "tag31"}, Interface: 5.2, } var pallValue = All{ PBool: &allValue.Bool, PInt: &allValue.Int, PInt8: &allValue.Int8, PInt16: &allValue.Int16, PInt32: &allValue.Int32, PInt64: &allValue.Int64, PUint: &allValue.Uint, PUint8: &allValue.Uint8, PUint16: &allValue.Uint16, PUint32: &allValue.Uint32, PUint64: &allValue.Uint64, PUintptr: &allValue.Uintptr, PFloat32: &allValue.Float32, PFloat64: &allValue.Float64, PString: &allValue.String, PMap: &allValue.Map, PMapP: &allValue.MapP, PMapPNil: &allValue.MapPNil, PSlice: &allValue.Slice, PSliceP: &allValue.SliceP, PPSmall: &allValue.PSmall, PInterface: &allValue.Interface, } var allValueIndent = `{ "Bool": true, "Int": 2, "Int8": 3, "Int16": 4, "Int32": 5, "Int64": 6, "Uint": 7, "Uint8": 8, "Uint16": 9, "Uint32": 10, "Uint64": 11, "Uintptr": 12, "Float32": 14.25, "Float64": 15.25, "bar": "foo", "bar2": "foo2", "IntStr": "42", "UintptrStr": "44", "PBool": null, "PInt": null, "PInt8": null, "PInt16": null, "PInt32": null, "PInt64": null, "PUint": null, "PUint8": null, "PUint16": null, "PUint32": null, "PUint64": null, "PUintptr": null, "PFloat32": null, "PFloat64": null, "String": "16", "PString": null, "Map": { "17": { "Tag": "tag17" } }, "MapP": { "18": { "Tag": "tag19" } }, "MapPNil": { "19": null }, "PMap": null, "PMapP": null, "PMapPNil": null, "EmptyMap": {}, "NilMap": null, "Slice": [ { "Tag": "tag20" }, { "Tag": "tag21" } ], "SliceP": [ { "Tag": "tag22" }, null, { "Tag": "tag23" } ], "PSlice": null, "PSliceP": null, "EmptySlice": [], "NilSlice": null, "StringSlice": [ "str24", "str25", "str26" ], "ByteSlice": "Gxwd", "Small": { "Tag": "tag30" }, "PSmall": { "Tag": "tag31" }, "PPSmall": null, "Interface": 5.2, "PInterface": null }` var allValueCompact = strings.Map(noSpace, allValueIndent) var pallValueIndent = `{ "Bool": false, "Int": 0, "Int8": 0, "Int16": 0, "Int32": 0, "Int64": 0, "Uint": 0, "Uint8": 0, "Uint16": 0, "Uint32": 0, "Uint64": 0, "Uintptr": 0, "Float32": 0, "Float64": 0, "bar": "", "bar2": "", "IntStr": "0", "UintptrStr": "0", "PBool": true, "PInt": 2, "PInt8": 3, "PInt16": 4, "PInt32": 5, "PInt64": 6, "PUint": 7, "PUint8": 8, "PUint16": 9, "PUint32": 10, "PUint64": 11, "PUintptr": 12, "PFloat32": 14.25, "PFloat64": 15.25, "String": "", "PString": "16", "Map": null, "MapP": null, "MapPNil": null, "PMap": { "17": { "Tag": "tag17" } }, "PMapP": { "18": { "Tag": "tag19" } }, "PMapPNil": { "19": null }, "EmptyMap": null, "NilMap": null, "Slice": null, "SliceP": null, "PSlice": [ { "Tag": "tag20" }, { "Tag": "tag21" } ], "PSliceP": [ { "Tag": "tag22" }, null, { "Tag": "tag23" } ], "EmptySlice": null, "NilSlice": null, "StringSlice": null, "ByteSlice": null, "Small": { "Tag": "" }, "PSmall": null, "PPSmall": { "Tag": "tag31" }, "Interface": null, "PInterface": 5.2 }` var pallValueCompact = strings.Map(noSpace, pallValueIndent) func TestRefUnmarshal(t *testing.T) { type S struct { // Ref is defined in encode_test.go. R0 Ref R1 *Ref R2 RefText R3 *RefText } want := S{ R0: 12, R1: new(Ref), R2: 13, R3: new(RefText), } *want.R1 = 12 *want.R3 = 13 var got S if err := Unmarshal([]byte(`{"R0":"ref","R1":"ref","R2":"ref","R3":"ref"}`), &got); err != nil { t.Fatalf("Unmarshal: %v", err) } if !reflect.DeepEqual(got, want) { t.Errorf("got %+v, want %+v", got, want) } } // Test that the empty string doesn't panic decoding when ,string is specified // Issue 3450 func TestEmptyString(t *testing.T) { type T2 struct { Number1 int `json:",string"` Number2 string `json:",string"` Pass bool `json:",string"` } data := `{"Number1":"1", "Number2":"","Pass":"true"}` var t2, t3 T2 t2.Number2 = "a" t3.Number2 = "a" err := Unmarshal([]byte(data), &t2) if err == nil { t.Fatal("Decode: did not return error") } println(err.Error()) err2 := json.Unmarshal([]byte(data), &t3) assert.Equal(t, err == nil, err2 == nil) assert.Equal(t, t3, t2) } // Test that a null for ,string is not replaced with the previous quoted string (issue 7046). // It should also not be an error (issue 2540, issue 8587). func TestNullString(t *testing.T) { type T struct { A int `json:",string"` B int `json:",string"` C *int `json:",string"` } data := []byte(`{"A": "1", "B": null, "C": null}`) var s T s.B = 1 s.C = new(int) *s.C = 2 err := Unmarshal(data, &s) if err != nil { t.Fatalf("Unmarshal: %v", err) } if s.B != 1 || s.C != nil { t.Fatalf("after Unmarshal, s.B=%d, s.C=%p, want 1, nil", s.B, s.C) } } type NullTest struct { Bool bool Int int Int8 int8 Int16 int16 Int32 int32 Int64 int64 Uint uint Uint8 uint8 Uint16 uint16 Uint32 uint32 Uint64 uint64 Float32 float32 Float64 float64 String string PBool *bool Map map[string]string Slice []string Interface interface{} PRaw *json.RawMessage PTime *time.Time PBigInt *big.Int PText *MustNotUnmarshalText PBuffer *bytes.Buffer // has methods, just not relevant ones PStruct *struct{} Raw json.RawMessage Time time.Time BigInt big.Int Text MustNotUnmarshalText Buffer bytes.Buffer Struct struct{} } // JSON null values should be ignored for primitives and string values instead of resulting in an error. // Issue 2540 func TestUnmarshalNulls(t *testing.T) { // Unmarshal docs: // The JSON null value unmarshals into an interface, map, pointer, or slice // by setting that Go value to nil. Because null is often used in JSON to mean // ``not present,'' unmarshaling a JSON null into any other Go type has no effect // on the value and produces no error. jsonData := []byte(`{ "Bool" : null, "Int" : null, "Int8" : null, "Int16" : null, "Int32" : null, "Int64" : null, "Uint" : null, "Uint8" : null, "Uint16" : null, "Uint32" : null, "Uint64" : null, "Float32" : null, "Float64" : null, "String" : null, "PBool": null, "Map": null, "Slice": null, "Interface": null, "PRaw": null, "PTime": null, "PBigInt": null, "PText": null, "PBuffer": null, "PStruct": null, "Raw": null, "Time": null, "BigInt": null, "Text": null, "Buffer": null, "Struct": null }`) nulls := NullTest{ Bool: true, Int: 2, Int8: 3, Int16: 4, Int32: 5, Int64: 6, Uint: 7, Uint8: 8, Uint16: 9, Uint32: 10, Uint64: 11, Float32: 12.1, Float64: 13.1, String: "14", PBool: new(bool), Map: map[string]string{}, Slice: []string{}, Interface: new(MustNotUnmarshalJSON), PRaw: new(json.RawMessage), PTime: new(time.Time), PBigInt: new(big.Int), PText: new(MustNotUnmarshalText), PStruct: new(struct{}), PBuffer: new(bytes.Buffer), Raw: json.RawMessage("123"), Time: time.Unix(123456789, 0), BigInt: *big.NewInt(123), } before := nulls.Time.String() err := Unmarshal(jsonData, &nulls) if err != nil { t.Errorf("Unmarshal of null values failed: %v", err) } if !nulls.Bool || nulls.Int != 2 || nulls.Int8 != 3 || nulls.Int16 != 4 || nulls.Int32 != 5 || nulls.Int64 != 6 || nulls.Uint != 7 || nulls.Uint8 != 8 || nulls.Uint16 != 9 || nulls.Uint32 != 10 || nulls.Uint64 != 11 || nulls.Float32 != 12.1 || nulls.Float64 != 13.1 || nulls.String != "14" { t.Errorf("Unmarshal of null values affected primitives") } if nulls.PBool != nil { t.Errorf("Unmarshal of null did not clear nulls.PBool") } if nulls.Map != nil { t.Errorf("Unmarshal of null did not clear nulls.Map") } if nulls.Slice != nil { t.Errorf("Unmarshal of null did not clear nulls.Slice") } if nulls.Interface != nil { t.Errorf("Unmarshal of null did not clear nulls.Interface") } if nulls.PRaw != nil { t.Errorf("Unmarshal of null did not clear nulls.PRaw") } if nulls.PTime != nil { t.Errorf("Unmarshal of null did not clear nulls.PTime") } if nulls.PBigInt != nil { t.Errorf("Unmarshal of null did not clear nulls.PBigInt") } if nulls.PText != nil { t.Errorf("Unmarshal of null did not clear nulls.PText") } if nulls.PBuffer != nil { t.Errorf("Unmarshal of null did not clear nulls.PBuffer") } if nulls.PStruct != nil { t.Errorf("Unmarshal of null did not clear nulls.PStruct") } if string(nulls.Raw) != "null" { t.Errorf("Unmarshal of json.RawMessage null did not record null: %v", string(nulls.Raw)) } if nulls.Time.String() != before { t.Errorf("Unmarshal of time.Time null set time to %v", nulls.Time.String()) } if nulls.BigInt.String() != "123" { t.Errorf("Unmarshal of big.Int null set int to %v", nulls.BigInt.String()) } } type MustNotUnmarshalJSON struct{} func (x MustNotUnmarshalJSON) UnmarshalJSON(_ []byte) error { return errors.New("MustNotUnmarshalJSON was used") } type MustNotUnmarshalText struct{} func (x MustNotUnmarshalText) UnmarshalText(_ []byte) error { return errors.New("MustNotUnmarshalText was used") } func TestStringKind(t *testing.T) { type stringKind string var m1, m2 map[stringKind]int m1 = map[stringKind]int{ "foo": 42, } data, err := Marshal(m1) if err != nil { t.Errorf("Unexpected error marshaling: %v", err) } err = Unmarshal(data, &m2) if err != nil { t.Errorf("Unexpected error unmarshaling: %v", err) } if !reflect.DeepEqual(m1, m2) { t.Error("Items should be equal after encoding and then decoding") } } // Custom types with []byte as underlying type could not be marshaled // and then unmarshaled. // Issue 8962. func TestByteKind(t *testing.T) { type byteKind []byte a := byteKind("hello") data, err := Marshal(a) if err != nil { t.Error(err) } var b byteKind err = Unmarshal(data, &b) if err != nil { t.Fatal(err) } if !reflect.DeepEqual(a, b) { t.Errorf("expected %v == %v", a, b) } } // The fix for issue 8962 introduced a regression. // Issue 12921. func TestSliceOfCustomByte(t *testing.T) { type Uint8 uint8 a := []Uint8("hello") data, err := Marshal(a) if err != nil { t.Fatal(err) } var b []Uint8 err = Unmarshal(data, &b) if err != nil { t.Fatal(err) } if !reflect.DeepEqual(a, b) { t.Fatalf("expected %v == %v", a, b) } } var decodeTypeErrorTests = []struct { dest interface{} src string }{ {new(error), `{}`}, // issue 4222 {new(error), `[]`}, {new(error), `""`}, {new(error), `123`}, {new(error), `true`}, } func TestUnmarshalTypeError(t *testing.T) { for _, item := range decodeTypeErrorTests { err := Unmarshal([]byte(item.src), item.dest) if _, ok := err.(*json.UnmarshalTypeError); !ok { if _, ok := err.(*decoder.MismatchTypeError); !ok { if _, ok = err.(decoder.SyntaxError); !ok { t.Errorf("expected type error for Unmarshal(%q, type %T): got %T", item.src, item.dest, err) } } } } } var decodeMismatchErrorTests = []struct { dest interface{} src string }{ {new(int), `{}`}, {new(string), `{}`}, {new(bool), `{}`}, {new([]byte), `{}`}, } func TestMismatchTypeError(t *testing.T) { for _, item := range decodeMismatchErrorTests { err := Unmarshal([]byte(item.src), item.dest) if _, ok := err.(*decoder.MismatchTypeError); !ok { if _, ok = err.(decoder.SyntaxError); !ok { t.Errorf("expected mismatch error for Unmarshal(%q, type %T): got %T", item.src, item.dest, err) } } } } var unmarshalSyntaxTests = []string{ "tru", "fals", "nul", "123e", `"hello`, `[1,2,3`, `{"key":1`, `{"key":1,`, } func TestUnmarshalSyntax(t *testing.T) { var x interface{} for _, src := range unmarshalSyntaxTests { err := Unmarshal([]byte(src), &x) if _, ok := err.(decoder.SyntaxError); !ok { t.Errorf("expected syntax error for Unmarshal(%q): got %T", src, err) } } } // Test handling of unexported fields that should be ignored. // Issue 4660 // //goland:noinspection GoVetStructTag type unexportedFields struct { Name string m map[string]interface{} `json:"-"` m2 map[string]interface{} `json:"abcd"` s []int `json:"-"` } func TestUnmarshalUnexported(t *testing.T) { input := `{"Name": "Bob", "m": {"x": 123}, "m2": {"y": 456}, "abcd": {"z": 789}, "s": [2, 3]}` want := &unexportedFields{Name: "Bob"} out := &unexportedFields{} err := Unmarshal([]byte(input), out) if err != nil { t.Errorf("got error %v, expected nil", err) } if !reflect.DeepEqual(out, want) { t.Errorf("got %q, want %q", out, want) } } // Time3339 is a time.Time which encodes to and from JSON // as an RFC 3339 time in UTC. type Time3339 time.Time func (t *Time3339) UnmarshalJSON(b []byte) error { if len(b) < 2 || b[0] != '"' || b[len(b)-1] != '"' { return fmt.Errorf("types: failed to unmarshal non-string value %q as an RFC 3339 time", b) } tm, err := time.Parse(time.RFC3339, string(b[1:len(b)-1])) if err != nil { return err } *t = Time3339(tm) return nil } func TestUnmarshalJSONLiteralError(t *testing.T) { var t3 Time3339 err := Unmarshal([]byte(`"0000-00-00T00:00:00Z"`), &t3) if err == nil { t.Fatalf("expected error; got time %v", time.Time(t3)) } if !strings.Contains(err.Error(), "range") { t.Errorf("got err = %v; want out of range error", err) } } // Test that extra object elements in an array do not result in a // "data changing underfoot" error. // Issue 3717 func TestSkipArrayObjects(t *testing.T) { s := `[{}]` var dest [0]interface{} err := Unmarshal([]byte(s), &dest) if err != nil { t.Errorf("got error %q, want nil", err) } } // Test semantics of pre-filled data, such as struct fields, map elements, // slices, and arrays. // Issues 4900 and 8837, among others. func TestPrefilled(t *testing.T) { var one int = 1 var two int = 2 // Values here change, cannot reuse table across runs. var prefillTests = []struct { in string ptr interface{} out interface{} }{ { in: `{"X": 1, "Y": 2}`, ptr: &XYZ{X: float32(3), Y: int16(4), Z: 1.5}, out: &XYZ{X: float64(1), Y: float64(2), Z: 1.5}, }, { in: `{"X": 1, "Y": 2}`, ptr: &map[string]interface{}{"X": float32(3), "Y": int16(4), "Z": 1.5}, out: &map[string]interface{}{"X": float64(1), "Y": float64(2), "Z": 1.5}, }, { in: `[2]`, ptr: &[]int{1}, out: &[]int{2}, }, { in: `[2]`, ptr: &[]int{1, 3}, out: &[]int{2}, }, { in: `[2, 3]`, ptr: &[]int{1}, out: &[]int{2, 3}, }, { in: `[2, 3]`, ptr: &[...]int{1}, out: &[...]int{2}, }, { in: `[3]`, ptr: &[...]int{1, 2}, out: &[...]int{3, 0}, }, { in: `[2]`, ptr: &[]interface{}{&one, 2.0}, out: &[]interface{}{&two}, }, // TODO(rfc): different with encoding/json // { // in: `{"a": 2}`, // ptr: &map[string]interface{}{"a": &one, "b": int(2)}, // out: &map[string]interface{}{"a": 2.0, "b": int(2)}, // }, } for _, tt := range prefillTests { ptrstr := fmt.Sprintf("%v", tt.ptr) err := Unmarshal([]byte(tt.in), tt.ptr) // tt.ptr edited here if err != nil { t.Errorf("Unmarshal: %v", err) } if !reflect.DeepEqual(tt.ptr, tt.out) { t.Errorf("Unmarshal(%#q, %s): have %v, want %v", tt.in, ptrstr, tt.ptr, tt.out) } } } var invalidUnmarshalTests = []struct { v interface{} want string }{ {nil, "json: Unmarshal(nil)"}, {struct{}{}, "json: Unmarshal(non-pointer struct {})"}, {(*int)(nil), "json: Unmarshal(nil *int)"}, } func TestInvalidUnmarshal(t *testing.T) { buf := []byte(`{"a":"1"}`) for _, tt := range invalidUnmarshalTests { err := Unmarshal(buf, tt.v) if err == nil { t.Errorf("Unmarshal expecting error, got nil") continue } if got := err.Error(); got != tt.want { t.Errorf("Unmarshal = %q; want %q", got, tt.want) } } } var invalidUnmarshalTextTests = []struct { v interface{} want string }{ {nil, "json: Unmarshal(nil)"}, {struct{}{}, "json: Unmarshal(non-pointer struct {})"}, {(*int)(nil), "json: Unmarshal(nil *int)"}, {new(net.IP), "json: cannot unmarshal number into Go value of type *net.IP"}, } func TestInvalidUnmarshalText(t *testing.T) { buf := []byte(`123`) for _, tt := range invalidUnmarshalTextTests { err := Unmarshal(buf, tt.v) if err == nil { t.Errorf("Unmarshal expecting error, got nil") continue } } } // Test that string option is ignored for invalid types. // Issue 9812. func TestInvalidStringOption(t *testing.T) { num := 0 item := struct { T time.Time `json:",string"` M map[string]string `json:",string"` S []string `json:",string"` A [1]string `json:",string"` I interface{} `json:",string"` P *int `json:",string"` }{M: make(map[string]string), S: make([]string, 0), I: num, P: &num} data, err := Marshal(item) if err != nil { t.Fatalf("Marshal: %v", err) } err = Unmarshal(data, &item) if err != nil { t.Fatalf("Unmarshal: %v", err) } } func TestUnmarshalErrorAfterMultipleJSON(t *testing.T) { tests := []struct { in string err error }{{ in: `1 false null :`, err: (&JsonSyntaxError{"invalid character ':' looking for beginning of value", 13}).err(), }, { in: `1 [] [,]`, err: (&JsonSyntaxError{"invalid character ',' looking for beginning of value", 6}).err(), }, { in: `1 [] [true:]`, err: (&JsonSyntaxError{"invalid character ':' after array element", 10}).err(), }, { in: `1 {} {"x"=}`, err: (&JsonSyntaxError{"invalid character '=' after object key", 13}).err(), }, { in: `falsetruenul#`, err: (&JsonSyntaxError{"invalid character '#' in literal null (expecting 'l')", 12}).err(), }} for i, tt := range tests { dec := decoder.NewDecoder(tt.in) var err error for { var v interface{} if err = dec.Decode(&v); err != nil { break } } if v, ok := err.(decoder.SyntaxError); !ok { t.Errorf("#%d: got %#v, want %#v", i, err, tt.err) } else if v.Pos != int(tt.err.(*json.SyntaxError).Offset) { t.Errorf("#%d: got %#v, want %#v", i, err, tt.err) println(v.Description()) } } } type unmarshalPanic struct{} func (unmarshalPanic) UnmarshalJSON([]byte) error { panic(0xdead) } func TestUnmarshalPanic(t *testing.T) { defer func() { if got := recover(); !reflect.DeepEqual(got, 0xdead) { t.Errorf("panic() = (%T)(%v), want 0xdead", got, got) } }() _ = Unmarshal([]byte("{}"), &unmarshalPanic{}) t.Fatalf("Unmarshal should have panicked") } // The decoder used to hang if decoding into an interface pointing to its own address. // See golang.org/issues/31740. func TestUnmarshalRecursivePointer(t *testing.T) { var v interface{} v = &v data := []byte(`{"a": "b"}`) if err := Unmarshal(data, v); err != nil { t.Fatal(err) } } type textUnmarshalerString string func (m *textUnmarshalerString) UnmarshalText(text []byte) error { *m = textUnmarshalerString(strings.ToLower(string(text))) return nil } // Test unmarshal to a map, where the map key is a user defined type. // See golang.org/issues/34437. func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) { var p map[textUnmarshalerString]string if err := Unmarshal([]byte(`{"FOO": "1"}`), &p); err != nil { t.Fatalf("Unmarshal unexpected error: %v", err) } if _, ok := p["foo"]; !ok { t.Errorf(`Key "foo" does not exist in map: %v`, p) } } func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) { // See golang.org/issues/38105. var p map[textUnmarshalerString]string if err := Unmarshal([]byte(`{"开源":"12345开源"}`), &p); err != nil { t.Fatalf("Unmarshal unexpected error: %v", err) } if _, ok := p["开源"]; !ok { t.Errorf(`Key "开源" does not exist in map: %v`, p) } // See golang.org/issues/38126. type T struct { F1 string `json:"F1,string"` } t1 := T{"aaa\tbbb"} b, err := Marshal(t1) if err != nil { t.Fatalf("Marshal unexpected error: %v", err) } var t2 T if err := Unmarshal(b, &t2); err != nil { t.Fatalf("Unmarshal unexpected error: %v", err) } if t1 != t2 { t.Errorf("Marshal and Unmarshal roundtrip mismatch: want %q got %q", t1, t2) } // See golang.org/issues/39555. input := map[textUnmarshalerString]string{"FOO": "", `"`: ""} encoded, err := Marshal(input) if err != nil { t.Fatalf("Marshal unexpected error: %v", err) } var got map[textUnmarshalerString]string if err := Unmarshal(encoded, &got); err != nil { t.Fatalf("Unmarshal unexpected error: %v", err) } want := map[textUnmarshalerString]string{"foo": "", `"`: ""} if !reflect.DeepEqual(want, got) { t.Fatalf("Unexpected roundtrip result:\nwant: %q\ngot: %q", want, got) } } func TestUnmarshalMaxDepth(t *testing.T) { const ( _MaxDepth = types.MAX_RECURSE _OverMaxDepth = types.MAX_RECURSE + 1 _UnderMaxDepth = types.MAX_RECURSE - 2 ) testcases := []struct { name string data string errMaxDepth bool }{ { name: "ArrayUnderMaxNestingDepth", data: `{"a":` + strings.Repeat(`[`, _UnderMaxDepth) + `0` + strings.Repeat(`]`, _UnderMaxDepth) + `}`, errMaxDepth: false, }, { name: "ArrayOverMaxNestingDepth", data: `{"a":` + strings.Repeat(`[`, _OverMaxDepth) + `0` + strings.Repeat(`]`, _OverMaxDepth) + `}`, errMaxDepth: true, }, { name: "ArrayOverStackDepth", data: `{"a":` + strings.Repeat(`[`, 3000000) + `0` + strings.Repeat(`]`, 3000000) + `}`, errMaxDepth: true, }, { name: "ObjectUnderMaxNestingDepth", data: `{"a":` + strings.Repeat(`{"a":`, _UnderMaxDepth) + `0` + strings.Repeat(`}`, _UnderMaxDepth) + `}`, errMaxDepth: false, }, { name: "ObjectOverMaxNestingDepth", data: `{"a":` + strings.Repeat(`{"a":`, _OverMaxDepth) + `0` + strings.Repeat(`}`, _OverMaxDepth) + `}`, errMaxDepth: true, }, { name: "ObjectOverStackDepth", data: `{"a":` + strings.Repeat(`{"a":`, 3000000) + `0` + strings.Repeat(`}`, 3000000) + `}`, errMaxDepth: true, }, } targets := []struct { name string newValue func() interface{} }{ { name: "unstructured", newValue: func() interface{} { var v interface{} return &v }, }, { name: "typed named field", newValue: func() interface{} { v := struct { A interface{} `json:"a"` }{} return &v }, }, { name: "typed missing field", newValue: func() interface{} { v := struct { B interface{} `json:"b"` }{} return &v }, }, { name: "custom unmarshaler", newValue: func() interface{} { v := unmarshaler{} return &v }, }, } for _, tc := range testcases { for _, target := range targets { t.Run(target.name+"-"+tc.name, func(t *testing.T) { err := Unmarshal([]byte(tc.data), target.newValue()) if !tc.errMaxDepth { if err != nil { t.Errorf("unexpected error: %v", err) } } else { if err == nil { t.Errorf("expected error containing 'exceeded max depth', got none") } else if !strings.Contains(err.Error(), "exceeded max depth") { t.Errorf("expected error containing 'exceeded max depth', got: %v", err) } } }) } } } // Issues: map value type larger than 128 bytes are stored by pointer type ChargeToolPacingBucketItemTcc struct { _ [128]byte T string `json:"T"` } type ChargeToolPacingParamsForDataRead struct { Bucket2Item map[int64]ChargeToolPacingBucketItemTcc `json:"bucket_to_item"` } var panicStr = ` { "bucket_to_item": { "102" : { "T": "xxxx" } } } ` func TestChangeTool(t *testing.T) { dataForRaw := ChargeToolPacingParamsForDataRead{} err := Unmarshal([]byte(panicStr), &dataForRaw) if err != nil { t.Fatalf("err %+v\n", err) } t.Logf("%#v\n", dataForRaw) t.Logf("%#v\n", &dataForRaw.Bucket2Item) a := dataForRaw.Bucket2Item[102] if a.T != "xxxx" { t.Fatalf("exp:%v, got:%v", "xxxx", a.T) } } func TestDecoder_LongestInvalidUtf8(t *testing.T) { for _, data := range []string{ "\"" + strings.Repeat("\x80", 4096) + "\"", "\"" + strings.Repeat("\x80", 4095) + "\"", "\"" + strings.Repeat("\x80", 4097) + "\"", "\"" + strings.Repeat("\x80", 12345) + "\"", } { testDecodeInvalidUtf8(t, []byte(data)) } } func testDecodeInvalidUtf8(t *testing.T, data []byte) { var sgot, jgot string serr := ConfigStd.Unmarshal(data, &sgot) jerr := json.Unmarshal(data, &jgot) assert.Equal(t, serr != nil, jerr != nil) if jerr == nil { assert.Equal(t, sgot, jgot) } } func needEscape(b byte) bool { return b == '"' || b == '\\' || b < '\x20' } func genRandJsonBytes(length int) []byte { var buf bytes.Buffer buf.WriteByte('"') for j := 0; j < length; j++ { r := rand.Intn(0xff + 1) if needEscape(byte(r)) { buf.WriteByte('\\') } buf.WriteByte(byte(r)) } buf.WriteByte('"') return buf.Bytes() } func genRandJsonRune(length int) []byte { var buf bytes.Buffer buf.WriteByte('"') for j := 0; j < length; j++ { r := rand.Intn(0x10FFFF + 1) if r < 0x80 && needEscape(byte(r)) { buf.WriteByte('\\') buf.WriteByte(byte(r)) } else { buf.WriteRune(rune(r)) } } buf.WriteByte('"') return buf.Bytes() } func TestDecoder_RandomInvalidUtf8(t *testing.T) { nums := 1000 maxLen := 1000 for i := 0; i < nums; i++ { length := rand.Intn(maxLen) testDecodeInvalidUtf8(t, genRandJsonBytes(length)) testDecodeInvalidUtf8(t, genRandJsonRune(length)) } } type atofTest struct { in string out string err error } // Tests from Go strconv package, https://github.com/golang/go/blob/master/src/strconv/atof_test.go // All tests are passed in Go encoding/json. var atoftests = []atofTest{ {"1.234e", "", nil}, // error {"1i", "1", nil}, // pass {"1", "1", nil}, {"1e23", "1e+23", nil}, {"1E23", "1e+23", nil}, {"100000000000000000000000", "1e+23", nil}, {"1e-100", "1e-100", nil}, {"123456700", "1.234567e+08", nil}, {"99999999999999974834176", "9.999999999999997e+22", nil}, {"100000000000000000000001", "1.0000000000000001e+23", nil}, {"100000000000000008388608", "1.0000000000000001e+23", nil}, {"100000000000000016777215", "1.0000000000000001e+23", nil}, {"100000000000000016777216", "1.0000000000000003e+23", nil}, {"-1", "-1", nil}, {"-0.1", "-0.1", nil}, {"-0", "-0", nil}, {"1e-20", "1e-20", nil}, {"625e-3", "0.625", nil}, // zeros {"0", "0", nil}, {"0e0", "0", nil}, {"-0e0", "-0", nil}, {"0e-0", "0", nil}, {"-0e-0", "-0", nil}, {"0e+0", "0", nil}, {"-0e+0", "-0", nil}, {"0e+01234567890123456789", "0", nil}, {"0.00e-01234567890123456789", "0", nil}, {"-0e+01234567890123456789", "-0", nil}, {"-0.00e-01234567890123456789", "-0", nil}, {"0e291", "0", nil}, // issue 15364 {"0e292", "0", nil}, // issue 15364 {"0e347", "0", nil}, // issue 15364 {"0e348", "0", nil}, // issue 15364 {"-0e291", "-0", nil}, {"-0e292", "-0", nil}, {"-0e347", "-0", nil}, {"-0e348", "-0", nil}, // largest float64 {"1.7976931348623157e308", "1.7976931348623157e+308", nil}, {"-1.7976931348623157e308", "-1.7976931348623157e+308", nil}, // the border is ...158079 // borderline - okay {"1.7976931348623158e308", "1.7976931348623157e+308", nil}, {"-1.7976931348623158e308", "-1.7976931348623157e+308", nil}, // a little too large {"1e308", "1e+308", nil}, // denormalized {"1e-305", "1e-305", nil}, {"1e-306", "1e-306", nil}, {"1e-307", "1e-307", nil}, {"1e-308", "1e-308", nil}, {"1e-309", "1e-309", nil}, {"1e-310", "1e-310", nil}, {"1e-322", "1e-322", nil}, // smallest denormal {"5e-324", "5e-324", nil}, {"4e-324", "5e-324", nil}, {"3e-324", "5e-324", nil}, // too small {"2e-324", "0", nil}, // way too small {"1e-350", "0", nil}, {"1e-400000", "0", nil}, // try to overflow exponent {"1e-4294967296", "0", nil}, {"1e-18446744073709551616", "0", nil}, // https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/ {"2.2250738585072012e-308", "2.2250738585072014e-308", nil}, // https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/ {"2.2250738585072011e-308", "2.225073858507201e-308", nil}, // A very large number (initially wrongly parsed by the fast algorithm). {"4.630813248087435e+307", "4.630813248087435e+307", nil}, // A different kind of very large number. {"22.222222222222222", "22.22222222222222", nil}, {"2." + strings.Repeat("2", 800) + "e+1", "22.22222222222222", nil}, // Exactly halfway between 1 and math.Nextafter(1, 2). // Round to even (down). {"1.00000000000000011102230246251565404236316680908203125", "1", nil}, // Slightly lower; still round down. {"1.00000000000000011102230246251565404236316680908203124", "1", nil}, // Slightly higher; round up. {"1.00000000000000011102230246251565404236316680908203126", "1.0000000000000002", nil}, // Slightly higher, but you have to read all the way to the end. {"1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1", "1.0000000000000002", nil}, // Halfway between x := math.Nextafter(1, 2) and math.Nextafter(x, 2) // Round to even (up). {"1.00000000000000033306690738754696212708950042724609375", "1.0000000000000004", nil}, // Halfway between 1090544144181609278303144771584 and 1090544144181609419040633126912 // (15497564393479157p+46, should round to even 15497564393479156p+46, issue 36657) {"1090544144181609348671888949248", "1.0905441441816093e+30", nil}, // Corner case between int64 and float64 for the input {"9223372036854775807", "9223372036854775807", nil}, // max int64: (1 << 63) - 1 {"9223372036854775808", "9223372036854775808", nil}, {"-9223372036854775808", "-9223372036854775808", nil}, // min int64: 1 << 63 {"-9223372036854775809", "-9223372036854775809", nil}, } func TestDecodeFloat(t *testing.T) { for i, tt := range atoftests { // default float64 var sonicout, stdout float64 sonicerr := decoder.NewDecoder(tt.in).Decode(&sonicout) stderr := json.NewDecoder(strings.NewReader(tt.in)).Decode(&stdout) if !reflect.DeepEqual(sonicout, stdout) { t.Fatalf("Test %d, %#v\ngot:\n %#v\nexp:\n %#v\n", i, tt.in, sonicout, stdout) } if !reflect.DeepEqual(sonicerr == nil, stderr == nil) { t.Fatalf("Test %d, %#v\ngot:\n %#v\nexp:\n %#v\n", i, tt.in, sonicerr, stderr) } } } type useInt64Test struct { in string out int64 } type useFloatTest struct { in string out float64 } var useinttest = []useInt64Test{ // int64 {"0", 0}, {"1", 1}, {"-1", -1}, {"100", 100}, {"-9223372036854775807", -9223372036854775807}, {"-9223372036854775808", -9223372036854775808}, //min int64 {"9223372036854775807", 9223372036854775807}, //max int64 {"9223372036854775806", 9223372036854775806}, } var usefloattest = []useFloatTest{ // float64 {"-9223372036854775809", -9223372036854775809}, // int64 overflow {"9223372036854775808", 9223372036854775808}, // int64 overflow {"1e2", 1e2}, {"1e-20", 1e-20}, {"1.0", 1}, } func TestUseInt64(t *testing.T) { for i, tt := range useinttest { var sout interface{} dc := decoder.NewDecoder(tt.in) dc.UseInt64() serr := dc.Decode(&sout) if !reflect.DeepEqual(sout, tt.out) { t.Errorf("Test %d, %#v\ngot:\n %#v\nexp:\n %#v\n", i, tt.in, sout, tt.in) } if serr != nil { t.Errorf("Test %d, %#v\ngot:\n %#v\nexp:\n nil\n", i, tt, serr) } } for i, tt := range usefloattest { var sout interface{} dc := decoder.NewDecoder(tt.in) dc.UseInt64() //the input string is not int64, still return float64 serr := dc.Decode(&sout) if !reflect.DeepEqual(sout, tt.out) { t.Errorf("Test %d, %#v\ngot:\n %#v\nexp:\n %#v\n", i, tt.in, sout, tt.in) } if serr != nil { t.Errorf("Test %d, %#v\ngot:\n %#v\nexp:\n nil\n", i, tt, serr) } } } func TestUseNumber(t *testing.T) { for i, tt := range useinttest { var sout interface{} dc := decoder.NewDecoder(tt.in) dc.UseNumber() serr := dc.Decode(&sout) if !reflect.DeepEqual(sout, json.Number(tt.in)) { t.Errorf("Test %d, %#v\ngot:\n %#v\nexp:\n %#v\n", i, tt.in, sout, tt.out) } if serr != nil { t.Errorf("Test %d, %#v\ngot:\n %#v\nexp:\n nil\n", i, tt, serr) } } for i, tt := range usefloattest { var sout interface{} dc := decoder.NewDecoder(tt.in) dc.UseNumber() serr := dc.Decode(&sout) if !reflect.DeepEqual(sout, json.Number(tt.in)) { t.Errorf("Test %d, %#v\ngot:\n %#v\nexp:\n %#v\n", i, tt.in, sout, tt.out) } if serr != nil { t.Errorf("Test %d, %#v\ngot:\n %#v\nexp:\n nil\n", i, tt, serr) } } } func BenchmarkDecoderRawMessage(b *testing.B) { data := ` { "coordinates": null, "favorited": false, "truncated": false, "created_at": "Mon Sep 24 03:35:21 +0000 2012", "id_str": "250075927172759552", "entities": { "urls": [ ], "hashtags": [ { "text": "freebandnames", "indices": [ 20, 34 ] } ], "user_mentions": [ ] }, "in_reply_to_user_id_str": null, "contributors": null, "text": "Aggressive Ponytail #freebandnames", "metadata": { "iso_language_code": "en", "result_type": "recent" }, "retweet_count": 0, "in_reply_to_status_id_str": null, "id": 250075927172759552, "geo": null, "retweeted": false, "in_reply_to_user_id": null, "place": null, "user": { "profile_sidebar_fill_color": "DDEEF6", "profile_sidebar_border_color": "C0DEED", "profile_background_tile": false, "name": "Sean Cummings", "profile_image_url": "https://a0.twimg.com/profile_images/2359746665/1v6zfgqo8g0d3mk7ii5s_normal.jpeg", "created_at": "Mon Apr 26 06:01:55 +0000 2010", "location": "LA, CA", "follow_request_sent": null, "profile_link_color": "0084B4", "is_translator": false, "id_str": "137238150", "entities": { "url": { "urls": [ { "expanded_url": null, "url": "", "indices": [ 0, 0 ] } ] }, "description": { "urls": [ ] } }, "default_profile": true, "contributors_enabled": false, "favourites_count": 0, "url": null, "profile_image_url_https": "https://si0.twimg.com/profile_images/2359746665/1v6zfgqo8g0d3mk7ii5s_normal.jpeg", "utc_offset": -28800, "id": 137238150, "profile_use_background_image": true, "listed_count": 2, "profile_text_color": "333333", "lang": "en", "followers_count": 70, "protected": false, "notifications": null, "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme1/bg.png", "profile_background_color": "C0DEED", "verified": false, "geo_enabled": true, "time_zone": "Pacific Time (US & Canada)", "description": "Born 330 Live 310", "default_profile_image": false, "profile_background_image_url": "https://a0.twimg.com/images/themes/theme1/bg.png", "statuses_count": 579, "friends_count": 110, "following": null, "show_all_inline_media": false, "screen_name": "sean_cummings" }, "in_reply_to_screen_name": null, "source": "Twitter for Mac", "in_reply_to_status_id": null }` bench := func(b *testing.B, run func(b *testing.B)) { b.ResetTimer() b.ReportAllocs() b.SetBytes(int64(len(data))) for i := 0; i < b.N; i++ { run(b) } runtime.GC() } b.Run("StdRawMessage", func(b *testing.B) { bench(b, func(b *testing.B) { var obj map[string]json.RawMessage dc := decoder.NewDecoder(data) dc.SetOptions(decoder.OptionUseNumber) if err := dc.Decode(&obj); err != nil { b.Fatal(err.Error()) } _ = obj }) }) b.Run("NocopyRawMessage", func(b *testing.B) { bench(b, func(b *testing.B) { var obj map[string]NoCopyRawMessage dc := decoder.NewDecoder(data) dc.SetOptions(decoder.OptionUseNumber) if err := dc.Decode(&obj); err != nil { b.Fatal(err.Error()) } _ = obj }) }) b.Run("NocopyRawMessageWithoutValidation", func(b *testing.B) { bench(b, func(b *testing.B) { var obj map[string]NoCopyRawMessage dc := decoder.NewDecoder(data) dc.SetOptions(decoder.OptionNoValidateJSON | decoder.OptionUseNumber) if err := dc.Decode(&obj); err != nil { b.Fatal(err.Error()) } _ = obj }) }) } func TestJsonNumber(t *testing.T) { api := Config{ UseNumber: true, }.Froze() type Foo struct { A json.Number `json:"a"` B json.Number `json:"b"` C json.Number `json:"c"` } data := []byte(`{"a": 1 , "b": "123", "c": "0.4e+56"}`) var foo1, foo2 Foo serr := api.Unmarshal(data, &foo1) jerr := json.Unmarshal(data, &foo2) assert.Equal(t, jerr, serr) assert.Equal(t, foo2, foo1) } ================================================ FILE: decoder/decoder_compat.go ================================================ //go:build (!amd64 && !arm64) || go1.27 || !go1.17 || (arm64 && !go1.20) // +build !amd64,!arm64 go1.27 !go1.17 arm64,!go1.20 /* * Copyright 2023 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package decoder import ( "bytes" "encoding/json" "io" "reflect" "unsafe" "github.com/bytedance/sonic/internal/compat" "github.com/bytedance/sonic/internal/decoder/consts" "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/option" ) func init() { compat.Warn("sonic/decoder") } const ( _F_use_int64 = consts.F_use_int64 _F_disable_urc = consts.F_disable_unknown _F_disable_unknown = consts.F_disable_unknown _F_copy_string = consts.F_copy_string _F_use_number = consts.F_use_number _F_validate_string = consts.F_validate_string _F_allow_control = consts.F_allow_control _F_no_validate_json = consts.F_no_validate_json _F_case_sensitive = consts.F_case_sensitive ) type Options uint64 const ( OptionUseInt64 Options = 1 << _F_use_int64 OptionUseNumber Options = 1 << _F_use_number OptionUseUnicodeErrors Options = 1 << _F_disable_urc OptionDisableUnknown Options = 1 << _F_disable_unknown OptionCopyString Options = 1 << _F_copy_string OptionValidateString Options = 1 << _F_validate_string OptionNoValidateJSON Options = 1 << _F_no_validate_json OptionCaseSensitive Options = 1 << _F_case_sensitive ) func (self *Decoder) SetOptions(opts Options) { if (opts&OptionUseNumber != 0) && (opts&OptionUseInt64 != 0) { panic("can't set OptionUseInt64 and OptionUseNumber both!") } self.f = uint64(opts) } // Decoder is the decoder context object type Decoder struct { i int f uint64 s string } // NewDecoder creates a new decoder instance. func NewDecoder(s string) *Decoder { return &Decoder{s: s} } // Pos returns the current decoding position. func (self *Decoder) Pos() int { return self.i } func (self *Decoder) Reset(s string) { self.s = s self.i = 0 // self.f = 0 } // NOTE: api fallback do nothing func (self *Decoder) CheckTrailings() error { pos := self.i buf := self.s /* skip all the trailing spaces */ if pos != len(buf) { for pos < len(buf) && (types.SPACE_MASK&(1< c.expTime, fmt.Sprintf("%v/%v=%v", avg1, avg2, float64(avg1)/float64(avg2))) }) } } func TestSkipMismatchTypeAmd64Error(t *testing.T) { // t.Run("struct", func(t *testing.T) { // println("TestSkipError") // type skiptype struct { // A int `json:"a"` // B string `json:"b"` // Pass *int `json:"pass"` // C struct{ // Pass4 interface{} `json:"pass4"` // D struct{ // E float32 `json:"e"` // } `json:"d"` // Pass2 int `json:"pass2"` // } `json:"c"` // E bool `json:"e"` // F []int `json:"f"` // G map[string]int `json:"g"` // H bool `json:"h,string"` // Pass3 int `json:"pass2"` // I json.Number `json:"i"` // } // var obj, obj2 = &skiptype{Pass:new(int)}, &skiptype{Pass:new(int)} // var data = `{"a":"","b":1,"c":{"d":true,"pass2":1,"pass4":true},"e":{},"f":"","g":[],"pass":null,"h":"1.0","i":true,"pass3":1}` // d := NewDecoder(data) // err := d.Decode(obj) // err2 := json.Unmarshal([]byte(data), obj2) // println(err2.Error()) // assert.Equal(t, err2 == nil, err == nil) // // assert.Equal(t, len(data), d.i) // assert.Equal(t, obj2, obj) // if te, ok := err.(*MismatchTypeError); ok { // assert.Equal(t, reflect.TypeOf(obj.I), te.Type) // assert.Equal(t, strings.Index(data, `"i":t`)+4, te.Pos) // println(err.Error()) // } else { // t.Fatal("invalid error") // } // }) t.Run("short array", func(t *testing.T) { var obj, obj2 = &[]int{}, &[]int{} var data = `[""]` d := NewDecoder(data) err := d.Decode(obj) err2 := json.Unmarshal([]byte(data), obj2) // println(err2.Error()) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) assert.Equal(t, obj2, obj) }) t.Run("int ", func(t *testing.T) { var obj int = 123 var obj2 int = 123 var data = `[""]` d := NewDecoder(data) err := d.Decode(&obj) err2 := json.Unmarshal([]byte(data), &obj2) println(err.Error(), obj, obj2) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) assert.Equal(t, obj2, obj) }) t.Run("array", func(t *testing.T) { var obj, obj2 = &[]int{}, &[]int{} var data = `["",true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true]` d := NewDecoder(data) err := d.Decode(obj) err2 := json.Unmarshal([]byte(data), obj2) // println(err2.Error()) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) assert.Equal(t, obj2, obj) }) t.Run("map", func(t *testing.T) { var obj, obj2 = &map[int]int{}, &map[int]int{} var data = `{"true" : { },"1":1,"2" : true,"3":3}` d := NewDecoder(data) err := d.Decode(obj) err2 := json.Unmarshal([]byte(data), obj2) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) assert.Equal(t, obj2, obj) }) t.Run("map error", func(t *testing.T) { var obj, obj2 = &map[int]int{}, &map[int]int{} var data = `{"true" : { ],"1":1,"2" : true,"3":3}` d := NewDecoder(data) err := d.Decode(obj) err2 := json.Unmarshal([]byte(data), obj2) println(err.Error()) println(err2.Error()) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) // assert.Equal(t, obj2, obj) }) } func TestCopyString(t *testing.T) { var data []byte var dc *Decoder var err error data = []byte(`{"A":"0","B":"1"}`) dc = NewDecoder(rt.Mem2Str(data)) dc.UseNumber() dc.CopyString() var obj struct { A string B string } err = dc.Decode(&obj) if err != nil { t.Fatal(err) } data[6] = '1' if obj.A != "0" { t.Fatal(obj) } data[14] = '0' if obj.B != "1" { t.Fatal(obj) } data = []byte(`{"A":"0","B":"1"}`) dc = NewDecoder(rt.Mem2Str(data)) dc.UseNumber() err = dc.Decode(&obj) if err != nil { t.Fatal(err) } data[6] = '1' if obj.A != "1" { t.Fatal(obj) } data[14] = '0' if obj.B != "0" { t.Fatal(obj) } data = []byte(`{"A":"0","B":"1"}`) dc = NewDecoder(rt.Mem2Str(data)) dc.UseNumber() dc.CopyString() m := map[string]interface{}{} err = dc.Decode(&m) if err != nil { t.Fatal(err) } data[2] = 'C' data[6] = '1' if m["A"] != "0" { t.Fatal(m) } data[10] = 'D' data[14] = '0' if m["B"] != "1" { t.Fatal(m) } data = []byte(`{"A":"0","B":"1"}`) dc = NewDecoder(rt.Mem2Str(data)) dc.UseNumber() m = map[string]interface{}{} err = dc.Decode(&m) if err != nil { t.Fatal(err) } data[6] = '1' if m["A"] != "1" { t.Fatal(m) } data[14] = '0' if m["B"] != "0" { t.Fatal(m) } data = []byte(`{"A":"0","B":"1"}`) dc = NewDecoder(rt.Mem2Str(data)) dc.UseNumber() dc.CopyString() var x interface{} err = dc.Decode(&x) if err != nil { t.Fatal(err) } data[2] = 'C' data[6] = '1' m = x.(map[string]interface{}) if m["A"] != "0" { t.Fatal(m) } data[10] = 'D' data[14] = '0' if m["B"] != "1" { t.Fatal(m) } data = []byte(`{"A":"0","B":"1"}`) dc = NewDecoder(rt.Mem2Str(data)) dc.UseNumber() var y interface{} err = dc.Decode(&y) if err != nil { t.Fatal(err) } m = y.(map[string]interface{}) data[6] = '1' if m["A"] != "1" { t.Fatal(m) } data[14] = '0' if m["B"] != "0" { t.Fatal(m) } } func TestDecoder_SetOption(t *testing.T) { var v interface{} d := NewDecoder("123") d.SetOptions(OptionUseInt64) err := d.Decode(&v) assert.NoError(t, err) assert.Equal(t, v, int64(123)) } func BenchmarkSkip_Sonic(b *testing.B) { var data = rt.Str2Mem(TwitterJson) if ret, _ := Skip(data); ret < 0 { b.Fatal() } b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { _, _ = Skip(data) } } ================================================ FILE: decoder/decoder_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package decoder import ( "encoding/json" "runtime" "runtime/debug" "strings" "sync" "testing" "time" "github.com/stretchr/testify/assert" ) func TestMain(m *testing.M) { go func() { if !debugAsyncGC { return } println("Begin GC looping...") for { runtime.GC() debug.FreeOSMemory() } println("stop GC looping!") }() time.Sleep(time.Millisecond) m.Run() } func TestGC(t *testing.T) { if debugSyncGC { return } var w interface{} out, err := decode(TwitterJson, &w, true) if err != nil { t.Fatal(err) } if out != len(TwitterJson) { t.Fatal(out) } wg := &sync.WaitGroup{} N := 10000 for i := 0; i < N; i++ { wg.Add(1) go func(wg *sync.WaitGroup) { defer wg.Done() var w interface{} out, err := decode(TwitterJson, &w, true) if err != nil { t.Error(err) return } if out != len(TwitterJson) { t.Error(out) return } runtime.GC() }(wg) } wg.Wait() } var _BindingValue TwitterStruct func init() { _ = json.Unmarshal([]byte(TwitterJson), &_BindingValue) } func TestSkipMismatchTypeError(t *testing.T) { t.Run("struct", func(t *testing.T) { println("TestSkipError") type skiptype struct { A int `json:"a"` B string `json:"b"` Pass *int `json:"pass"` C struct { Pass4 interface{} `json:"pass4"` D struct { E float32 `json:"e"` } `json:"d"` Pass2 int `json:"pass2"` } `json:"c"` E bool `json:"e"` F []int `json:"f"` G map[string]int `json:"g"` H bool `json:"h,string"` Pass3 int `json:"pass2"` I json.Number `json:"i"` } var obj, obj2 = &skiptype{Pass: new(int)}, &skiptype{Pass: new(int)} var data = `{"a":"","b":1,"c":{"d":true,"pass2":1,"pass4":true},"e":{},"f":"","g":[],"pass":null,"h":"1.0","i":true,"pass3":1}` d := NewDecoder(data) err := d.Decode(obj) err2 := json.Unmarshal([]byte(data), obj2) println(err2.Error()) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) assert.Equal(t, obj2, obj) if err == nil { t.Fatal("invalid error") } }) t.Run("short array", func(t *testing.T) { var obj, obj2 = &[]int{}, &[]int{} var data = `[""]` d := NewDecoder(data) err := d.Decode(obj) err2 := json.Unmarshal([]byte(data), obj2) // println(err2.Error()) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) assert.Equal(t, obj2, obj) }) t.Run("int ", func(t *testing.T) { var obj int = 123 var obj2 int = 123 var data = `[""]` d := NewDecoder(data) err := d.Decode(&obj) err2 := json.Unmarshal([]byte(data), &obj2) println(err.Error(), obj, obj2) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) assert.Equal(t, obj2, obj) }) t.Run("array", func(t *testing.T) { var obj, obj2 = &[]int{}, &[]int{} var data = `["",true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true]` d := NewDecoder(data) err := d.Decode(obj) err2 := json.Unmarshal([]byte(data), obj2) // println(err2.Error()) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) assert.Equal(t, obj2, obj) }) t.Run("map", func(t *testing.T) { var obj, obj2 = &map[int]int{}, &map[int]int{} var data = `{"true" : { },"1":1,"2" : true,"3":3}` d := NewDecoder(data) err := d.Decode(obj) err2 := json.Unmarshal([]byte(data), obj2) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) assert.Equal(t, obj2, obj) }) t.Run("map error", func(t *testing.T) { var obj, obj2 = &map[int]int{}, &map[int]int{} var data = `{"true" : { ],"1":1,"2" : true,"3":3}` d := NewDecoder(data) err := d.Decode(obj) err2 := json.Unmarshal([]byte(data), obj2) println(err.Error()) println(err2.Error()) assert.Equal(t, err2 == nil, err == nil) // assert.Equal(t, len(data), d.i) // assert.Equal(t, obj2, obj) }) } func TestDecodeCorrupt(t *testing.T) { var ds = []string{ `{,}`, `{,"a"}`, `{"a":}`, `{"a":1,}`, `{"a":1,"b"}`, `{"a":1,"b":}`, `{,"a":1 "b":2}`, `{"a",:1 "b":2}`, `{"a":,1 "b":2}`, `{"a":1 "b",:2}`, `{"a":1 "b":,2}`, `{"a":1 "b":2,}`, `{"a":1 "b":2}`, `[,]`, `[,1]`, `[1,]`, `[,1,2]`, `[1,2,]`, } for _, d := range ds { var o interface{} _, err := decode(d, &o, false) if err == nil { t.Fatalf("%#v", d) } if !(strings.Contains(err.Error(), "Syntax error") || strings.Contains(err.Error(), "invalid character")) { t.Fatal(err.Error()) } } } func TestDecodeOption(t *testing.T) { var s string var d *Decoder var out interface{} var out2 struct{} var err error s = "123" d = NewDecoder(s) d.SetOptions(OptionUseNumber) err = d.Decode(&out) assert.NoError(t, err) assert.Equal(t, out.(json.Number), json.Number("123")) d = NewDecoder(s) err = d.Decode(&out) assert.NoError(t, err) assert.Equal(t, out.(float64), float64(123)) s = `{"un": 123}` d = NewDecoder(s) d.SetOptions(OptionDisableUnknown) err = d.Decode(&out2) assert.Error(t, err) d = NewDecoder(s) err = d.Decode(&out2) assert.NoError(t, err) } func decode(s string, v interface{}, copy bool) (int, error) { d := NewDecoder(s) if copy { d.CopyString() } err := d.Decode(v) if err != nil { return 0, err } return len(s), err } func TestDecoder_Basic(t *testing.T) { var v int pos, err := decode("12345", &v, false) assert.NoError(t, err) assert.Equal(t, 5, pos) assert.Equal(t, 12345, v) } func TestDecoder_Generic(t *testing.T) { var v interface{} pos, err := decode(TwitterJson, &v, false) assert.NoError(t, err) assert.Equal(t, len(TwitterJson), pos) } func TestDecoder_Binding(t *testing.T) { var v TwitterStruct pos, err := decode(TwitterJson, &v, false) assert.NoError(t, err) assert.Equal(t, len(TwitterJson), pos) assert.Equal(t, _BindingValue, v, 0) } func BenchmarkDecoder_Generic_Sonic(b *testing.B) { var w interface{} _, _ = decode(TwitterJson, &w, true) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { var v interface{} _, _ = decode(TwitterJson, &v, true) } } func BenchmarkDecoder_Generic_Sonic_Fast(b *testing.B) { var w interface{} _, _ = decode(TwitterJson, &w, false) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { var v interface{} _, _ = decode(TwitterJson, &v, false) } } func BenchmarkDecoder_Generic_StdLib(b *testing.B) { var w interface{} m := []byte(TwitterJson) _ = json.Unmarshal(m, &w) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { var v interface{} _ = json.Unmarshal(m, &v) } } func BenchmarkDecoder_Binding_Sonic(b *testing.B) { var w TwitterStruct _, _ = decode(TwitterJson, &w, true) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { var v TwitterStruct _, _ = decode(TwitterJson, &v, true) } } func BenchmarkDecoder_Binding_Sonic_Fast(b *testing.B) { var w TwitterStruct _, _ = decode(TwitterJson, &w, false) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { var v TwitterStruct _, _ = decode(TwitterJson, &v, false) } } func BenchmarkDecoder_Binding_StdLib(b *testing.B) { var w TwitterStruct m := []byte(TwitterJson) _ = json.Unmarshal(m, &w) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { var v TwitterStruct _ = json.Unmarshal(m, &v) } } func BenchmarkDecoder_Parallel_Generic_Sonic(b *testing.B) { var w interface{} _, _ = decode(TwitterJson, &w, true) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { var v interface{} _, _ = decode(TwitterJson, &v, true) } }) } func BenchmarkDecoder_Parallel_Generic_Sonic_Fast(b *testing.B) { var w interface{} _, _ = decode(TwitterJson, &w, false) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { var v interface{} _, _ = decode(TwitterJson, &v, false) } }) } func BenchmarkDecoder_Parallel_Generic_StdLib(b *testing.B) { var w interface{} m := []byte(TwitterJson) _ = json.Unmarshal(m, &w) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { var v interface{} _ = json.Unmarshal(m, &v) } }) } func BenchmarkDecoder_Parallel_Binding_Sonic(b *testing.B) { var w TwitterStruct _, _ = decode(TwitterJson, &w, true) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { var v TwitterStruct _, _ = decode(TwitterJson, &v, true) } }) } func BenchmarkDecoder_Parallel_Binding_Sonic_Fast(b *testing.B) { var w TwitterStruct _, _ = decode(TwitterJson, &w, false) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { var v TwitterStruct _, _ = decode(TwitterJson, &v, false) } }) } func BenchmarkDecoder_Parallel_Binding_StdLib(b *testing.B) { var w TwitterStruct m := []byte(TwitterJson) _ = json.Unmarshal(m, &w) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { var v TwitterStruct _ = json.Unmarshal(m, &v) } }) } ================================================ FILE: decoder/testdata_test.go ================================================ /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package decoder import ( "os" ) var ( debugSyncGC = os.Getenv("SONIC_SYNC_GC") != "" debugAsyncGC = os.Getenv("SONIC_NO_ASYNC_GC") == "" ) const TwitterJson = `{ "statuses": [ { "coordinates": null, "favorited": false, "truncated": false, "created_at": "Mon Sep 24 03:35:21 +0000 2012", "id_str": "250075927172759552", "entities": { "urls": [ ], "hashtags": [ { "text": "freebandnames", "indices": [ 20, 34 ] } ], "user_mentions": [ ] }, "in_reply_to_user_id_str": null, "contributors": null, "text": "Aggressive Ponytail #freebandnames", "metadata": { "iso_language_code": "en", "result_type": "recent" }, "retweet_count": 0, "in_reply_to_status_id_str": null, "id": 250075927172759552, "geo": null, "retweeted": false, "in_reply_to_user_id": null, "place": null, "user": { "profile_sidebar_fill_color": "DDEEF6", "profile_sidebar_border_color": "C0DEED", "profile_background_tile": false, "name": "Sean Cummings", "profile_image_url": "https://a0.twimg.com/profile_images/2359746665/1v6zfgqo8g0d3mk7ii5s_normal.jpeg", "created_at": "Mon Apr 26 06:01:55 +0000 2010", "location": "LA, CA", "follow_request_sent": null, "profile_link_color": "0084B4", "is_translator": false, "id_str": "137238150", "entities": { "url": { "urls": [ { "expanded_url": null, "url": "", "indices": [ 0, 0 ] } ] }, "description": { "urls": [ ] } }, "default_profile": true, "contributors_enabled": false, "favourites_count": 0, "url": null, "profile_image_url_https": "https://si0.twimg.com/profile_images/2359746665/1v6zfgqo8g0d3mk7ii5s_normal.jpeg", "utc_offset": -28800, "id": 137238150, "profile_use_background_image": true, "listed_count": 2, "profile_text_color": "333333", "lang": "en", "followers_count": 70, "protected": false, "notifications": null, "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme1/bg.png", "profile_background_color": "C0DEED", "verified": false, "geo_enabled": true, "time_zone": "Pacific Time (US & Canada)", "description": "Born 330 Live 310", "default_profile_image": false, "profile_background_image_url": "https://a0.twimg.com/images/themes/theme1/bg.png", "statuses_count": 579, "friends_count": 110, "following": null, "show_all_inline_media": false, "screen_name": "sean_cummings" }, "in_reply_to_screen_name": null, "source": "Twitter for Mac", "in_reply_to_status_id": null }, { "coordinates": null, "favorited": false, "truncated": false, "created_at": "Fri Sep 21 23:40:54 +0000 2012", "id_str": "249292149810667520", "entities": { "urls": [ ], "hashtags": [ { "text": "FreeBandNames", "indices": [ 20, 34 ] } ], "user_mentions": [ ] }, "in_reply_to_user_id_str": null, "contributors": null, "text": "Thee Namaste Nerdz. #FreeBandNames", "metadata": { "iso_language_code": "pl", "result_type": "recent" }, "retweet_count": 0, "in_reply_to_status_id_str": null, "id": 249292149810667520, "geo": null, "retweeted": false, "in_reply_to_user_id": null, "place": null, "user": { "profile_sidebar_fill_color": "DDFFCC", "profile_sidebar_border_color": "BDDCAD", "profile_background_tile": true, "name": "Chaz Martenstein", "profile_image_url": "https://a0.twimg.com/profile_images/447958234/Lichtenstein_normal.jpg", "created_at": "Tue Apr 07 19:05:07 +0000 2009", "location": "Durham, NC", "follow_request_sent": null, "profile_link_color": "0084B4", "is_translator": false, "id_str": "29516238", "entities": { "url": { "urls": [ { "expanded_url": null, "url": "https://bullcityrecords.com/wnng/", "indices": [ 0, 32 ] } ] }, "description": { "urls": [ ] } }, "default_profile": false, "contributors_enabled": false, "favourites_count": 8, "url": "https://bullcityrecords.com/wnng/", "profile_image_url_https": "https://si0.twimg.com/profile_images/447958234/Lichtenstein_normal.jpg", "utc_offset": -18000, "id": 29516238, "profile_use_background_image": true, "listed_count": 118, "profile_text_color": "333333", "lang": "en", "followers_count": 2052, "protected": false, "notifications": null, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/9423277/background_tile.bmp", "profile_background_color": "9AE4E8", "verified": false, "geo_enabled": false, "time_zone": "Eastern Time (US & Canada)", "description": "You will come to Durham, North Carolina. I will sell you some records then, here in Durham, North Carolina. Fun will happen.", "default_profile_image": false, "profile_background_image_url": "https://a0.twimg.com/profile_background_images/9423277/background_tile.bmp", "statuses_count": 7579, "friends_count": 348, "following": null, "show_all_inline_media": true, "screen_name": "bullcityrecords" }, "in_reply_to_screen_name": null, "source": "web", "in_reply_to_status_id": null }, { "coordinates": null, "favorited": false, "truncated": false, "created_at": "Fri Sep 21 23:30:20 +0000 2012", "id_str": "249289491129438208", "entities": { "urls": [ ], "hashtags": [ { "text": "freebandnames", "indices": [ 29, 43 ] } ], "user_mentions": [ ] }, "in_reply_to_user_id_str": null, "contributors": null, "text": "Mexican Heaven, Mexican Hell #freebandnames", "metadata": { "iso_language_code": "en", "result_type": "recent" }, "retweet_count": 0, "in_reply_to_status_id_str": null, "id": 249289491129438208, "geo": null, "retweeted": false, "in_reply_to_user_id": null, "place": null, "user": { "profile_sidebar_fill_color": "99CC33", "profile_sidebar_border_color": "829D5E", "profile_background_tile": false, "name": "Thomas John Wakeman", "profile_image_url": "https://a0.twimg.com/profile_images/2219333930/Froggystyle_normal.png", "created_at": "Tue Sep 01 21:21:35 +0000 2009", "location": "Kingston New York", "follow_request_sent": null, "profile_link_color": "D02B55", "is_translator": false, "id_str": "70789458", "entities": { "url": { "urls": [ { "expanded_url": null, "url": "", "indices": [ 0, 0 ] } ] }, "description": { "urls": [ ] } }, "default_profile": false, "contributors_enabled": false, "favourites_count": 19, "url": null, "profile_image_url_https": "https://si0.twimg.com/profile_images/2219333930/Froggystyle_normal.png", "utc_offset": -18000, "id": 70789458, "profile_use_background_image": true, "listed_count": 1, "profile_text_color": "3E4415", "lang": "en", "followers_count": 63, "protected": false, "notifications": null, "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme5/bg.gif", "profile_background_color": "352726", "verified": false, "geo_enabled": false, "time_zone": "Eastern Time (US & Canada)", "description": "Science Fiction Writer, sort of. Likes Superheroes, Mole People, Alt. Timelines.", "default_profile_image": false, "profile_background_image_url": "https://a0.twimg.com/images/themes/theme5/bg.gif", "statuses_count": 1048, "friends_count": 63, "following": null, "show_all_inline_media": false, "screen_name": "MonkiesFist" }, "in_reply_to_screen_name": null, "source": "web", "in_reply_to_status_id": null }, { "coordinates": null, "favorited": false, "truncated": false, "created_at": "Fri Sep 21 22:51:18 +0000 2012", "id_str": "249279667666817024", "entities": { "urls": [ ], "hashtags": [ { "text": "freebandnames", "indices": [ 20, 34 ] } ], "user_mentions": [ ] }, "in_reply_to_user_id_str": null, "contributors": null, "text": "The Foolish Mortals #freebandnames", "metadata": { "iso_language_code": "en", "result_type": "recent" }, "retweet_count": 0, "in_reply_to_status_id_str": null, "id": 249279667666817024, "geo": null, "retweeted": false, "in_reply_to_user_id": null, "place": null, "user": { "profile_sidebar_fill_color": "BFAC83", "profile_sidebar_border_color": "615A44", "profile_background_tile": true, "name": "Marty Elmer", "profile_image_url": "https://a0.twimg.com/profile_images/1629790393/shrinker_2000_trans_normal.png", "created_at": "Mon May 04 00:05:00 +0000 2009", "location": "Wisconsin, USA", "follow_request_sent": null, "profile_link_color": "3B2A26", "is_translator": false, "id_str": "37539828", "entities": { "url": { "urls": [ { "expanded_url": null, "url": "https://www.omnitarian.me", "indices": [ 0, 24 ] } ] }, "description": { "urls": [ ] } }, "default_profile": false, "contributors_enabled": false, "favourites_count": 647, "url": "https://www.omnitarian.me", "profile_image_url_https": "https://si0.twimg.com/profile_images/1629790393/shrinker_2000_trans_normal.png", "utc_offset": -21600, "id": 37539828, "profile_use_background_image": true, "listed_count": 52, "profile_text_color": "000000", "lang": "en", "followers_count": 608, "protected": false, "notifications": null, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/106455659/rect6056-9.png", "profile_background_color": "EEE3C4", "verified": false, "geo_enabled": false, "time_zone": "Central Time (US & Canada)", "description": "Cartoonist, Illustrator, and T-Shirt connoisseur", "default_profile_image": false, "profile_background_image_url": "https://a0.twimg.com/profile_background_images/106455659/rect6056-9.png", "statuses_count": 3575, "friends_count": 249, "following": null, "show_all_inline_media": true, "screen_name": "Omnitarian" }, "in_reply_to_screen_name": null, "source": "Twitter for iPhone", "in_reply_to_status_id": null } ], "search_metadata": { "max_id": 250126199840518145, "since_id": 24012619984051000, "refresh_url": "?since_id=250126199840518145&q=%23freebandnames&result_type=mixed&include_entities=1", "next_results": "?max_id=249279667666817023&q=%23freebandnames&count=4&include_entities=1&result_type=mixed", "count": 4, "completed_in": 0.035, "since_id_str": "24012619984051000", "query": "%23freebandnames", "max_id_str": "250126199840518145" } }` type TwitterStruct struct { Statuses []Statuses `json:"statuses"` SearchMetadata SearchMetadata `json:"search_metadata"` } type Hashtags struct { Text string `json:"text"` Indices []int `json:"indices"` } type Entities struct { Urls []interface{} `json:"urls"` Hashtags []Hashtags `json:"hashtags"` UserMentions []interface{} `json:"user_mentions"` } type Metadata struct { IsoLanguageCode string `json:"iso_language_code"` ResultType string `json:"result_type"` } type Urls struct { ExpandedURL interface{} `json:"expanded_url"` URL string `json:"url"` Indices []int `json:"indices"` } type URL struct { Urls []Urls `json:"urls"` } type Description struct { Urls []interface{} `json:"urls"` } type UserEntities struct { URL URL `json:"url"` Description Description `json:"description"` } type User struct { ProfileSidebarFillColor string `json:"profile_sidebar_fill_color"` ProfileSidebarBorderColor string `json:"profile_sidebar_border_color"` ProfileBackgroundTile bool `json:"profile_background_tile"` Name string `json:"name"` ProfileImageURL string `json:"profile_image_url"` CreatedAt string `json:"created_at"` Location string `json:"location"` FollowRequestSent interface{} `json:"follow_request_sent"` ProfileLinkColor string `json:"profile_link_color"` IsTranslator bool `json:"is_translator"` IDStr string `json:"id_str"` Entities UserEntities `json:"entities"` DefaultProfile bool `json:"default_profile"` ContributorsEnabled bool `json:"contributors_enabled"` FavouritesCount int `json:"favourites_count"` URL interface{} `json:"url"` ProfileImageURLHTTPS string `json:"profile_image_url_https"` UtcOffset int `json:"utc_offset"` ID int `json:"id"` ProfileUseBackgroundImage bool `json:"profile_use_background_image"` ListedCount int `json:"listed_count"` ProfileTextColor string `json:"profile_text_color"` Lang string `json:"lang"` FollowersCount int `json:"followers_count"` Protected bool `json:"protected"` Notifications interface{} `json:"notifications"` ProfileBackgroundImageURLHTTPS string `json:"profile_background_image_url_https"` ProfileBackgroundColor string `json:"profile_background_color"` Verified bool `json:"verified"` GeoEnabled bool `json:"geo_enabled"` TimeZone string `json:"time_zone"` Description string `json:"description"` DefaultProfileImage bool `json:"default_profile_image"` ProfileBackgroundImageURL string `json:"profile_background_image_url"` StatusesCount int `json:"statuses_count"` FriendsCount int `json:"friends_count"` Following interface{} `json:"following"` ShowAllInlineMedia bool `json:"show_all_inline_media"` ScreenName string `json:"screen_name"` } type Statuses struct { Coordinates interface{} `json:"coordinates"` Favorited bool `json:"favorited"` Truncated bool `json:"truncated"` CreatedAt string `json:"created_at"` IDStr string `json:"id_str"` Entities Entities `json:"entities"` InReplyToUserIDStr interface{} `json:"in_reply_to_user_id_str"` Contributors interface{} `json:"contributors"` Text string `json:"text"` Metadata Metadata `json:"metadata"` RetweetCount int `json:"retweet_count"` InReplyToStatusIDStr interface{} `json:"in_reply_to_status_id_str"` ID int64 `json:"id"` Geo interface{} `json:"geo"` Retweeted bool `json:"retweeted"` InReplyToUserID interface{} `json:"in_reply_to_user_id"` Place interface{} `json:"place"` User User `json:"user"` InReplyToScreenName interface{} `json:"in_reply_to_screen_name"` Source string `json:"source"` InReplyToStatusID interface{} `json:"in_reply_to_status_id"` } type SearchMetadata struct { MaxID int64 `json:"max_id"` SinceID int64 `json:"since_id"` RefreshURL string `json:"refresh_url"` NextResults string `json:"next_results"` Count int `json:"count"` CompletedIn float64 `json:"completed_in"` SinceIDStr string `json:"since_id_str"` Query string `json:"query"` MaxIDStr string `json:"max_id_str"` } ================================================ FILE: docs/INTRODUCTION.md ================================================ # Introduction to Sonic English | [中文](INTRODUCTION_ZH_CN.md) ## Background According to the overall profiling of production services in Bytedance, we found that the overhead of JSON serialization and deserialization is unexpectedly high: the total is near to 10% CPU, and the extreme one accounts for more than 40% CPU. Therefore, **the performance of JSON lib is a key issue for the promotion of machine utilization**. ## Research We conducted a series of surveys and benchmarks on open-sourced JSON libraries for Golang, but the result is disappointing: **no silver bullet**. First of all, no one can perform at least the top three across various business scenarios. Even the most widely used [json-iterator](https://github.com/json-iterator/go) will severely degrade in generic (no-schema) or big-volume JSON serialization and deserialization. Secondly, compared with other JSON libraries writing in other languages, their speed is generally much slower. For example, [Simdjson-go](https://github.com/minio/simdjson-go) has a 50% reduction in decoding performance compared to [simdjson](https://github.com/simdjson/simdjson). What's more, we barely found JSON libraries which provide API to modify the underlying values. Therefore, we decided to **develop a brand-new JSON library with high performance as well as wide applicability**. ## Thinking Before starting our design, we need to figure out some questions: ### Why is Json-iterator faster than Standard Library? First of all, the **schema-based processing mechanism** used by the standard library is commendable, in which the parser can obtain meta information in advance when scanning, thereby shortening the time of branch selection. However, its original implementation did not make good use of this mechanism, instead, **it spent a lot of time reflecting to obtain meta info of schema**. Meanwhile, The approach of json-iterator is: Interpret structure as field-by-field encoding and decoding functions, and then assembled and cached them, minimizing the performance loss cost by reflection. But does it work once and for all? No. In practical tests, we found that **the deeper and larger the input JSON got, the smaller the gap between json-iterator and other libraries gradually became** - eventually event got surpassed: ![Scalability](./imgs/introduction-1.png) The reason is that **this implementation transforms into a large number of interface encapsulations and function calls**, followed by function-call losses: 1. **Calling interface involves dynamic addressing of itab** 2. **Assembly functions cannot be inlined**, while Golang's function-call performance is poor (no parameter-passing-by-register) #### Is there a way to avoid the function-call overhead of dynamic assembly? The first thing we thought about was code generation like [easyjson](https://github.com/mailru/easyjson). But it comes with **schema dependency and convenience losses**. To achieve a real drop-in replacement of the standard library, we turned to another technology - **[JIT](https://en.wikipedia.org/wiki/Just-in-time_compilation) (just-in-time compiling)**. Because the compiled codec function is an integrated function, which can greatly reduce function calls while ensuring flexibility. ### Why is Simdjson-go not fast enough? [SIMD](https://en.wikipedia.org/wiki/SIMD) (Single-Instruction-Multi-Data) is a special set of CPU instructions for the parallel processing of vectorized data. At present, it is supported by most CPUs and widely used in image processing and big data computing. Undoubtedly, SIMD is useful in JSON processing (itoa, char-search, and so on are all suitable scenarios). We can see that simdjson-go is very competitive in large JSON scenarios (>100KB). However, for some extremely small or irregular character strings, **the extra load operation required by SIMD will lead to performance degradation**. Therefore, we need to dedicate ourselves to branch predicting and decide which scenarios should use SIMD and which should not (for example, the string length is less than 16 bytes). The second problem comes from the Go compiler itself. In order to ensure the compilation speed, **Golang does very little optimization work during the compilation phase** and cannot directly use compiler backends such as [LLVM](https://en.wikipedia.org/wiki/LLVM) (Low-Level Virtual Machine) for optimization. So, **can some crucial calculation functions be written in another language with higher execution efficiency**? C/Clang is an ideal compilation tool (internal integration LLVM). But the key is how to embed the optimized assembly into Golang. ### How to use Gjson well? We also found that [gjson](https://github.com/tidwall/gjson) has a huge advantage in single-key lookup scenarios. This is because its lookup is implemented by a **lazy-load mechanism**, which subtly skips passing-by values and effectively reduces a lot of unnecessary parsing. Practical application has proved that making good use of this feature in product can indeed bring benefits. But when it comes to multi-key lookup, Gjson does worse event than std, which is a side effect of its skipping mechanism - **searching for the same path leads to repeated parsing** (skip is also a lightweight parsing). Therefore, the accurate adaptation of practical scenarios is the key. ## Design Based on the above questions, our design is easy to implement: 1. Aiming at the function-call overhead cost by the codec dynamic-assembly, **`JIT` tech is used to assemble opcodes (asm) corresponding to the schema at runtime**, which is finally cached into the off-heap memory in the form of Golang functions. 2. For practical scenarios where big data and small data coexist, we **use pre-conditional judgment** (string size, floating precision, etc.) **to combine `SIMD` with scalar instructions** to achieve the best adaptation. 3. As for insufficiency in compiling optimization of go language, we decided to **use `C/Clang` to write and compile core computational functions**, and **developed a set of [asm2asm](https://github.com/chenzhuoyu/asm2asm) tools to translate the fully optimized x86 assembly into plan9** and finally load it into Golang runtime. 4. Giving the big speed gap between parsing and skipping, the **`lazy-load` mechanism** is certainly used in our AST parser, but in **a more adaptive and efficient way to reduce the overhead of multiple-key queries**. ![design](./imgs/introduction-2.png) In detail, we conducted some further optimization: 1. Since the native-asm functions cannot be inlined in Golang, we found that its cost even exceeded the improvement brought by the optimization of the C compiler. So we reimplemented a set of lightweight function-calls in JIT: - `Global-function-table + static offset` for calling instruction - **Pass parameters using registers** 2. `Sync.Map` was used to cache the codecs at first, but for our **quasi-static** (read far more than write), **fewer elements** (usually no more than a few dozen) scenarios, its performance is not optimal, so we reimplement a high-performance and concurrent-safe cache with `open-addressing-hash + RCU` tech. ================================================ FILE: docs/INTRODUCTION_ZH_CN.md ================================================ # Sonic 简介 [English](INTRODUCTION.md) | 中文 ## 背景 根据字节跳动生产服务的整体分析,我们发现 JSON 序列化和反序列化的开销意外地很高:CPU 使用率接近 10%,其中极端情况下超过 40%。因此,**JSON 库的性能是提高机器利用率的关键问题**。 ## 研究 我们对开源的 Golang JSON 库进行了一系列调研和基准测试,但结果令人失望:**没有万能的解决方案**。首先,没有一个库能够在各种业务场景中至少达到前三名。即使是最广泛使用的 [json-iterator](https://github.com/json-iterator/go) ,在通用(无模式)或大量 JSON 序列化和反序列化的情况下,性能也会严重下降。其次,与其他语言编写的 JSON 库相比,它们的速度通常要慢得多。例如,[Simdjson-go](https://github.com/minio/simdjson-go)的解码性能比[simdjson](https://github.com/simdjson/simdjson)低了 50%。而且,我们几乎找不到支持修改底层值的 JSON 库的 API。 因此,我们决定**开发一个全新的高性能、适用广泛的 JSON 库**。 ## 设想 在开始设计之前,我们需要搞清楚一些问题: ### 为什么 Json-iterator 比标准库快? 首先,标准库使用的**基于模式(Schema)的处理机制**是值得称赞的,解析器可以在扫描时提前获取元信息,从而缩短分支选择的时间。然而,它的原始实现没有很好地利用这个机制,而是**花费了大量时间使用反射获取模式的元信息**。与此同时,json-iterator 的方法是:将结构解释为逐个字段的编码和解码函数,然后将它们组装和缓存起来,最小化反射带来的性能损失。但这种方法是否一劳永逸呢?实际测试中,我们发现**随着输入的 JSON 变深、变大,json-iterator 和其他库之间的差距逐渐缩小**——甚至最终被超越: ![Scalability](./imgs/introduction-1.png) 原因是**该实现转化为大量接口封装和函数调用**,导致了函数调用的性能损失: 1. **调用接口涉及到对 `itab` 的动态地址获取** 2. **组装的函数无法内联**,而 Golang 的函数调用性能较差(没有寄存器传参) #### 有没有办法避免动态组装函数的调用开销? 我们首先考虑的是类似[easyjson](https://github.com/mailru/easyjson)的代码生成。但是这会带来**模式依赖和便利性下降**。为了实现对标准库的真正插拔式替换,我们转向了另一种技术- **[JIT](https://zh.wikipedia.org/zh-cn/即时编译) (即时编译)**。因为编译后的编解码函数是一个集成的函数,它可以大大减少函数调用,同时保证灵活性。 ### 为什么 Simdjson-go 速度不够快? [SIMD](https://en.wikipedia.org/wiki/SIMD) (单指令流多数据流)是一组特殊的 CPU 指令,用于并行处理矢量化数据。目前,大多数 CPU 都支持 SIMD ,并广泛用于图像处理和大数据计算。毫无疑问,SIMD在JSON处理中很有用(整形-字符串转换,字符搜索等都是合适的场景)。我们可以看到, simdjson-go 在大型 JSON 场景 (>100KB) 下非常有竞争力。然而,对于一些很小或不规则的字符字符串, **SIMD 所需的额外加载操作将导致性能下降**。因此,我们需要考虑不同的场景,并决定哪些场景应该使用 SIMD ,哪些不应该使用(例如,长度小于16字节的字符串)。 第二个问题来自 Go 编译器本身。为了保证编译速度, **Golang 在编译阶段几乎不进行任何优化工作**也无法直接使用编译器后端,如 [LLVM](https://en.wikipedia.org/wiki/LLVM) 等进行优化。 那么,**一些关键的计算函数能否用计算效率更高的其他语言编写吗**? C/Clang 是一种理想的编译工具(内部集成了 LLVM )。但关键是如何将优化后的汇编嵌入到 Golang 中。 ### 如何更好地使用 `Gjson` ? 我们还发现在单键查找场景中, [gjson](https://github.com/tidwall/gjson)具有巨大的优势。这是因为它的查找是通过**惰性加载机制**实现的,巧妙地跳过了传递的值,并有效的减少了许多不必要的解析。实际应用证明,在产品中充分利用这个特性确实能带来收益。但是,当涉及到多键查找时,Gjson甚至比标准库还要差,这是其跳过机制的副作用——**搜索相同路径会导致重复解析**(跳过解析也是一种轻量的解析)因此,根据实际情况准确的做出调整是关键问题。 ## 设计 基于以上问题,我们的设计很好实现: 1. 针对编解码动态汇编的函数调用开销,我们**使用 JIT 技术在运行时组装与模式对应的字节码(汇编指令)**,最终将其以 Golang 函数的形式缓存在堆外内存上。 2. 针对大数据和小数据共存的实际场景,我们**使用预处理判断**(字符串大小、浮点数精度等)**将 SIMD 与标量指令相结合**,从而实现对实际情况的最佳适应。 3. 对于 Golang 语言编译优化的不足,我们决定**使用 C/Clang 编写和编译核心计算函数**,并且**开发了一套 [asm2asm](https://github.com/chenzhuoyu/asm2asm) 工具,将经过充分优化的 x86 汇编代码转换为 Plan9 格式**,最终加载到 Golang 运行时中。 4. 考虑到解析和跳过解析之间的速度差异很大, **惰性加载机制**当然也在我们的 AST 解析器中使用了,但**以一种更具适应性和高效性的方式来降低多键查询的开销**。 ![design](./imgs/introduction-2.png) 在细节上,我们进行了一些进一步的优化: 1. 由于 Golang 中的原生汇编函数不能被内联,我们发现其成本甚至超过了 C 编译器的优化所带来的改善。所以我们在 JIT 中重新实现了一组轻量级的函数调用: - 全局函数表+静态偏移量,用于调用指令 - **使用寄存器传递参数** 2. `Sync.Map` 一开始被用来缓存编解码器,但是对于我们的**准静态**(读远多于写),**元素较少**(通常不足几十个)的场景,它的性能并不理想,所以我们使用开放寻址哈希和 RCU 技术重新实现了一个高性能且并发安全的缓存。 ================================================ FILE: encode_test.go ================================================ //go:build (amd64 && go1.17 && !go1.27) || (arm64 && go1.20 && !go1.27) // +build amd64,go1.17,!go1.27 arm64,go1.20,!go1.27 /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package sonic import ( "bytes" "encoding" "encoding/json" "fmt" "log" "math" "os" "reflect" "regexp" "runtime" "runtime/debug" "strconv" "strings" "testing" "time" "unsafe" "github.com/bytedance/sonic/encoder" "github.com/stretchr/testify/assert" ) var ( debugAsyncGC = os.Getenv("SONIC_NO_ASYNC_GC") == "" ) func TestMain(m *testing.M) { go func() { if !debugAsyncGC { return } println("Begin GC looping...") for { runtime.GC() debug.FreeOSMemory() } }() time.Sleep(time.Millisecond) m.Run() } type Optionals struct { Sr string `json:"sr"` So string `json:"so,omitempty"` Sw string `json:"-"` Ir int `json:"omitempty"` // actually named omitempty, not an option Io int `json:"io,omitempty"` Slr []string `json:"slr,random"` Slo []string `json:"slo,omitempty"` Mr map[string]interface{} `json:"mr"` Mo map[string]interface{} `json:",omitempty"` Fr float64 `json:"fr"` Fo float64 `json:"fo,omitempty"` Br bool `json:"br"` Bo bool `json:"bo,omitempty"` Ur uint `json:"ur"` Uo uint `json:"uo,omitempty"` Str struct{} `json:"str"` Sto struct{} `json:"sto,omitempty"` } var optionalsExpected = `{ "sr": "", "omitempty": 0, "slr": null, "mr": {}, "fr": 0, "br": false, "ur": 0, "str": {}, "sto": {} }` func TestOmitEmpty(t *testing.T) { var o Optionals o.Sw = "something" o.Mr = map[string]interface{}{} o.Mo = map[string]interface{}{} got, err := encoder.EncodeIndented(&o, "", " ", 0) if err != nil { t.Fatal(err) } if got := string(got); got != optionalsExpected { t.Errorf(" got: %s\nwant: %s\n", got, optionalsExpected) } } type StringTag struct { BoolStr bool `json:",string"` IntStr int64 `json:",string"` UintptrStr uintptr `json:",string"` StrStr string `json:",string"` NumberStr json.Number `json:",string"` } func TestRoundtripStringTag(t *testing.T) { tests := []struct { name string in StringTag want string // empty to just test that we roundtrip }{ { name: "AllTypes", in: StringTag{ BoolStr: true, IntStr: 42, UintptrStr: 44, StrStr: "xzbit", NumberStr: "46", }, want: `{ "BoolStr": "true", "IntStr": "42", "UintptrStr": "44", "StrStr": "\"xzbit\"", "NumberStr": "46" }`, }, { // See golang.org/issues/38173. name: "StringDoubleEscapes", in: StringTag{ StrStr: "\b\f\n\r\t\"\\", NumberStr: "0", // just to satisfy the roundtrip }, want: `{ "BoolStr": "false", "IntStr": "0", "UintptrStr": "0", "StrStr": "\"\\u0008\\u000c\\n\\r\\t\\\"\\\\\"", "NumberStr": "0" }`, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { // Indent with a tab prefix to make the multi-line string // literals in the table nicer to read. got, err := encoder.EncodeIndented(&test.in, " ", " ", 0) if err != nil { t.Fatal(err) } if got := string(got); got != test.want { t.Fatalf(" got: %s\nwant: %s\n", got, test.want) } // Verify that it round-trips. var s2 StringTag if err := Unmarshal(got, &s2); err != nil { t.Fatalf("Decode: %v", err) } if !reflect.DeepEqual(test.in, s2) { t.Fatalf("decode didn't match.\nsource: %#v\nEncoded as:\n%s\ndecode: %#v", test.in, string(got), s2) } }) } } // byte slices are special even if they're renamed types. type renamedByte byte type renamedByteSlice []byte type renamedRenamedByteSlice []renamedByte func TestEncodeRenamedByteSlice(t *testing.T) { s := renamedByteSlice("abc") result, err := Marshal(s) if err != nil { t.Fatal(err) } expect := `"YWJj"` if string(result) != expect { t.Errorf(" got %s want %s", result, expect) } r := renamedRenamedByteSlice("abc") result, err = Marshal(r) if err != nil { t.Fatal(err) } if string(result) != expect { t.Errorf(" got %s want %s", result, expect) } } type SamePointerNoCycle struct { Ptr1, Ptr2 *SamePointerNoCycle } var samePointerNoCycle = &SamePointerNoCycle{} type PointerCycle struct { Ptr *PointerCycle } var pointerCycle = &PointerCycle{} type PointerCycleIndirect struct { Ptrs []interface{} } type RecursiveSlice []RecursiveSlice var ( pointerCycleIndirect = &PointerCycleIndirect{} mapCycle = make(map[string]interface{}) sliceCycle = []interface{}{nil} sliceNoCycle = []interface{}{nil, nil} recursiveSliceCycle = []RecursiveSlice{nil} ) func init() { ptr := &SamePointerNoCycle{} samePointerNoCycle.Ptr1 = ptr samePointerNoCycle.Ptr2 = ptr pointerCycle.Ptr = pointerCycle pointerCycleIndirect.Ptrs = []interface{}{pointerCycleIndirect} mapCycle["x"] = mapCycle sliceCycle[0] = sliceCycle sliceNoCycle[1] = sliceNoCycle[:1] for i := 3; i > 0; i-- { sliceNoCycle = []interface{}{sliceNoCycle} } recursiveSliceCycle[0] = recursiveSliceCycle } func TestSamePointerNoCycle(t *testing.T) { if _, err := Marshal(samePointerNoCycle); err != nil { t.Fatalf("unexpected error: %v", err) } } func TestSliceNoCycle(t *testing.T) { if _, err := Marshal(sliceNoCycle); err != nil { t.Fatalf("unexpected error: %v", err) } } var unsupportedValues = []interface{}{ math.NaN(), math.Inf(-1), math.Inf(1), pointerCycle, pointerCycleIndirect, mapCycle, sliceCycle, recursiveSliceCycle, } func TestUnsupportedValues(t *testing.T) { for _, v := range unsupportedValues { if _, err := Marshal(v); err != nil { if _, ok := err.(*json.UnsupportedValueError); !ok { t.Errorf("for %v, got %T want UnsupportedValueError", v, err) } } else { t.Errorf("for %v, expected error", v) } } } // Ref has Marshaler and Unmarshaler methods with pointer receiver. type Ref int func (*Ref) MarshalJSON() ([]byte, error) { return []byte(`"ref"`), nil } func (r *Ref) UnmarshalJSON([]byte) error { *r = 12 return nil } // Val has Marshaler methods with value receiver. type Val int func (Val) MarshalJSON() ([]byte, error) { return []byte(`"val"`), nil } // RefText has Marshaler and Unmarshaler methods with pointer receiver. type RefText int func (*RefText) MarshalText() ([]byte, error) { return []byte(`"ref"`), nil } func (r *RefText) UnmarshalText([]byte) error { *r = 13 return nil } // ValText has Marshaler methods with value receiver. type ValText int func (ValText) MarshalText() ([]byte, error) { return []byte(`"val"`), nil } func TestRefValMarshal(t *testing.T) { var s = struct { R0 Ref R1 *Ref R2 RefText R3 *RefText V0 Val V1 *Val V2 ValText V3 *ValText }{ R0: 12, R1: new(Ref), R2: 14, R3: new(RefText), V0: 13, V1: new(Val), V2: 15, V3: new(ValText), } const want = `{"R0":"ref","R1":"ref","R2":"\"ref\"","R3":"\"ref\"","V0":"val","V1":"val","V2":"\"val\"","V3":"\"val\""}` b, err := Marshal(&s) if err != nil { t.Fatalf("Marshal: %v", err) } if got := string(b); got != want { t.Errorf("got %q, want %q", got, want) } } /* FIXME: disabling these test cases for now, because Sonic does not implement HTML escape I don't think there are real usages of the `HTMLEscape` feature in real code // C implements Marshaler and returns unescaped JSON. type C int func (C) MarshalJSON() ([]byte, error) { return []byte(`"<&>"`), nil } // CText implements Marshaler and returns unescaped text. type CText int func (CText) MarshalText() ([]byte, error) { return []byte(`"<&>"`), nil } func TestMarshalerEscaping(t *testing.T) { var c C want := `"\u003c\u0026\u003e"` b, err := Marshal(c) if err != nil { t.Fatalf("Marshal(c): %v", err) } if got := string(b); got != want { t.Errorf("Marshal(c) = %#q, want %#q", got, want) } var ct CText want = `"\"\u003c\u0026\u003e\""` b, err = Marshal(ct) if err != nil { t.Fatalf("Marshal(ct): %v", err) } if got := string(b); got != want { t.Errorf("Marshal(ct) = %#q, want %#q", got, want) } } */ func TestAnonymousFields(t *testing.T) { tests := []struct { label string // Test name makeInput func() interface{} // Function to create input value want string // Expected JSON output }{{ // Both S1 and S2 have a field named X. From the perspective of S, // it is ambiguous which one X refers to. // This should not serialize either field. label: "AmbiguousField", makeInput: func() interface{} { type ( S1 struct{ x, X int } S2 struct{ x, X int } S struct { S1 S2 } ) return S{S1{1, 2}, S2{3, 4}} }, want: `{}`, }, { label: "DominantField", // Both S1 and S2 have a field named X, but since S has an X field as // well, it takes precedence over S1.X and S2.X. makeInput: func() interface{} { type ( S1 struct{ x, X int } S2 struct{ x, X int } S struct { S1 S2 x, X int } ) return S{S1{1, 2}, S2{3, 4}, 5, 6} }, want: `{"X":6}`, }, { // Unexported embedded field of non-struct type should not be serialized. label: "UnexportedEmbeddedInt", makeInput: func() interface{} { type ( myInt int S struct{ myInt } ) return S{5} }, want: `{}`, }, { // Exported embedded field of non-struct type should be serialized. label: "ExportedEmbeddedInt", makeInput: func() interface{} { type ( MyInt int S struct{ MyInt } ) return S{5} }, want: `{"MyInt":5}`, }, { // Unexported embedded field of pointer to non-struct type // should not be serialized. label: "UnexportedEmbeddedIntPointer", makeInput: func() interface{} { type ( myInt int S struct{ *myInt } ) s := S{new(myInt)} *s.myInt = 5 return s }, want: `{}`, }, { // Exported embedded field of pointer to non-struct type // should be serialized. label: "ExportedEmbeddedIntPointer", makeInput: func() interface{} { type ( MyInt int S struct{ *MyInt } ) s := S{new(MyInt)} *s.MyInt = 5 return s }, want: `{"MyInt":5}`, }, { // Exported fields of embedded structs should have their // exported fields be serialized regardless of whether the struct types // themselves are exported. label: "EmbeddedStruct", makeInput: func() interface{} { type ( s1 struct{ x, X int } S2 struct{ y, Y int } S struct { s1 S2 } ) return S{s1{1, 2}, S2{3, 4}} }, want: `{"X":2,"Y":4}`, }, { // Exported fields of pointers to embedded structs should have their // exported fields be serialized regardless of whether the struct types // themselves are exported. label: "EmbeddedStructPointer", makeInput: func() interface{} { type ( s1 struct{ x, X int } S2 struct{ y, Y int } S struct { *s1 *S2 } ) return S{&s1{1, 2}, &S2{3, 4}} }, want: `{"X":2,"Y":4}`, }, { // Exported fields on embedded unexported structs at multiple levels // of nesting should still be serialized. label: "NestedStructAndInts", makeInput: func() interface{} { type ( MyInt1 int MyInt2 int myInt int s2 struct { MyInt2 myInt } s1 struct { MyInt1 myInt s2 } S struct { s1 myInt } ) return S{s1{1, 2, s2{3, 4}}, 6} }, want: `{"MyInt1":1,"MyInt2":3}`, }, { // If an anonymous struct pointer field is nil, we should ignore // the embedded fields behind it. Not properly doing so may // result in the wrong output or reflect panics. label: "EmbeddedFieldBehindNilPointer", makeInput: func() interface{} { type ( S2 struct{ Field string } S struct{ *S2 } ) return S{} }, want: `{}`, }} for _, tt := range tests { t.Run(tt.label, func(t *testing.T) { b, err := Marshal(tt.makeInput()) if err != nil { t.Fatalf("Marshal() = %v, want nil error", err) } if string(b) != tt.want { t.Fatalf("Marshal() = %q, want %q", b, tt.want) } }) } } type BugA struct { S string } type BugB struct { BugA S string } type BugC struct { S string } // Legal Go: We never use the repeated embedded field (S). type BugX struct { A int BugA BugB } // golang.org/issue/16042. // Even if a nil interface value is passed in, as long as // it implements Marshaler, it should be marshaled. type nilJSONMarshaler string func (nm *nilJSONMarshaler) MarshalJSON() ([]byte, error) { if nm == nil { return Marshal("0zenil0") } return Marshal("zenil:" + string(*nm)) } // golang.org/issue/34235. // Even if a nil interface value is passed in, as long as // it implements encoding.TextMarshaler, it should be marshaled. type nilTextMarshaler string func (nm *nilTextMarshaler) MarshalText() ([]byte, error) { if nm == nil { return []byte("0zenil0"), nil } return []byte("zenil:" + string(*nm)), nil } // See golang.org/issue/16042 and golang.org/issue/34235. func TestNilMarshal(t *testing.T) { testCases := []struct { v interface{} want string }{ {v: nil, want: `null`}, {v: new(float64), want: `0`}, {v: []interface{}(nil), want: `null`}, {v: []string(nil), want: `null`}, {v: map[string]string(nil), want: `null`}, {v: []byte(nil), want: `null`}, {v: struct{ M string }{"gopher"}, want: `{"M":"gopher"}`}, {v: struct{ M json.Marshaler }{}, want: `{"M":null}`}, {v: struct{ M json.Marshaler }{(*nilJSONMarshaler)(nil)}, want: `{"M":"0zenil0"}`}, {v: struct{ M interface{} }{(*nilJSONMarshaler)(nil)}, want: `{"M":null}`}, {v: struct{ M encoding.TextMarshaler }{}, want: `{"M":null}`}, {v: struct{ M encoding.TextMarshaler }{(*nilTextMarshaler)(nil)}, want: `{"M":"0zenil0"}`}, {v: struct{ M interface{} }{(*nilTextMarshaler)(nil)}, want: `{"M":null}`}, } for _, tt := range testCases { out, err := Marshal(tt.v) if err != nil || string(out) != tt.want { t.Errorf("Marshal(%#v) = %#q, %#v, want %#q, nil", tt.v, out, err, tt.want) continue } } } // Issue 5245. func TestEmbeddedBug(t *testing.T) { v := BugB{ BugA{"A"}, "B", } b, err := Marshal(v) if err != nil { t.Fatal("Marshal:", err) } want := `{"S":"B"}` got := string(b) if got != want { t.Fatalf("Marshal: got %s want %s", got, want) } // Now check that the duplicate field, S, does not appear. x := BugX{ A: 23, } b, err = Marshal(x) if err != nil { t.Fatal("Marshal:", err) } want = `{"A":23}` got = string(b) if got != want { t.Fatalf("Marshal: got %s want %s", got, want) } } type BugD struct { // Same as BugA after tagging. XXX string `json:"S"` } // BugD's tagged S field should dominate BugA's. type BugY struct { BugA BugD } // Test that a field with a tag dominates untagged fields. func TestTaggedFieldDominates(t *testing.T) { v := BugY{ BugA{"BugA"}, BugD{"BugD"}, } b, err := Marshal(v) if err != nil { t.Fatal("Marshal:", err) } want := `{"S":"BugD"}` got := string(b) if got != want { t.Fatalf("Marshal: got %s want %s", got, want) } } // There are no tags here, so S should not appear. type BugZ struct { BugA BugC BugY // Contains a tagged S field through BugD; should not dominate. } func TestDuplicatedFieldDisappears(t *testing.T) { v := BugZ{ BugA{"BugA"}, BugC{"BugC"}, BugY{ BugA{"nested BugA"}, BugD{"nested BugD"}, }, } b, err := Marshal(v) if err != nil { t.Fatal("Marshal:", err) } want := `{}` got := string(b) if got != want { t.Fatalf("Marshal: got %s want %s", got, want) } } func TestStdLibIssue10281(t *testing.T) { type Foo struct { N json.Number } x := Foo{json.Number(`invalid`)} b, err := Marshal(&x) if err == nil { t.Errorf("Marshal(&x) = %#q; want error", b) } } // golang.org/issue/8582 func TestEncodePointerString(t *testing.T) { type stringPointer struct { N *int64 `json:"n,string"` } var n int64 = 42 b, err := Marshal(stringPointer{N: &n}) if err != nil { t.Fatalf("Marshal: %v", err) } if got, want := string(b), `{"n":"42"}`; got != want { t.Errorf("Marshal = %s, want %s", got, want) } var back stringPointer err = Unmarshal(b, &back) if err != nil { t.Fatalf("Unmarshal: %v", err) } if back.N == nil { t.Fatalf("Unmarshaled nil N field") } if *back.N != 42 { t.Fatalf("*N = %d; want 42", *back.N) } } var encodeStringTests = []struct { in string out string }{ {"\x00", `"\u0000"`}, {"\x01", `"\u0001"`}, {"\x02", `"\u0002"`}, {"\x03", `"\u0003"`}, {"\x04", `"\u0004"`}, {"\x05", `"\u0005"`}, {"\x06", `"\u0006"`}, {"\x07", `"\u0007"`}, {"\x08", `"\u0008"`}, {"\x09", `"\t"`}, {"\x0a", `"\n"`}, {"\x0b", `"\u000b"`}, {"\x0c", `"\u000c"`}, {"\x0d", `"\r"`}, {"\x0e", `"\u000e"`}, {"\x0f", `"\u000f"`}, {"\x10", `"\u0010"`}, {"\x11", `"\u0011"`}, {"\x12", `"\u0012"`}, {"\x13", `"\u0013"`}, {"\x14", `"\u0014"`}, {"\x15", `"\u0015"`}, {"\x16", `"\u0016"`}, {"\x17", `"\u0017"`}, {"\x18", `"\u0018"`}, {"\x19", `"\u0019"`}, {"\x1a", `"\u001a"`}, {"\x1b", `"\u001b"`}, {"\x1c", `"\u001c"`}, {"\x1d", `"\u001d"`}, {"\x1e", `"\u001e"`}, {"\x1f", `"\u001f"`}, } func TestEncodeString(t *testing.T) { for _, tt := range encodeStringTests { b, err := Marshal(tt.in) if err != nil { t.Errorf("Marshal(%q): %v", tt.in, err) continue } out := string(b) if out != tt.out { t.Errorf("Marshal(%q) = %#q, want %#q", tt.in, out, tt.out) } } } type jsonbyte byte func (b jsonbyte) MarshalJSON() ([]byte, error) { return tenc(`{"JB":%d}`, b) } type textbyte byte func (b textbyte) MarshalText() ([]byte, error) { return tenc(`TB:%d`, b) } type jsonint int func (i jsonint) MarshalJSON() ([]byte, error) { return tenc(`{"JI":%d}`, i) } type textint int func (i textint) MarshalText() ([]byte, error) { return tenc(`TI:%d`, i) } func tenc(format string, a ...interface{}) ([]byte, error) { var buf bytes.Buffer _, _ = fmt.Fprintf(&buf, format, a...) return buf.Bytes(), nil } // Issue 13783 func TestEncodeBytekind(t *testing.T) { testdata := []struct { data interface{} want string }{ {byte(7), "7"}, {jsonbyte(7), `{"JB":7}`}, {textbyte(4), `"TB:4"`}, {jsonint(5), `{"JI":5}`}, {textint(1), `"TI:1"`}, {[]byte{0, 1}, `"AAE="`}, {[]jsonbyte{0, 1}, `[{"JB":0},{"JB":1}]`}, {[][]jsonbyte{{0, 1}, {3}}, `[[{"JB":0},{"JB":1}],[{"JB":3}]]`}, {[]textbyte{2, 3}, `["TB:2","TB:3"]`}, {[]jsonint{5, 4}, `[{"JI":5},{"JI":4}]`}, {[]textint{9, 3}, `["TI:9","TI:3"]`}, {[]int{9, 3}, `[9,3]`}, } for _, d := range testdata { js, err := Marshal(d.data) if err != nil { t.Error(err) continue } got, want := string(js), d.want if got != want { t.Errorf("got %s, want %s", got, want) } } } // https://golang.org/issue/33675 func TestNilMarshalerTextMapKey(t *testing.T) { b, err := Marshal(map[*unmarshalerText]int{ (*unmarshalerText)(nil): 1, }) if err != nil { t.Fatalf("Failed to Marshal *text.Marshaler: %v", err) } const want = `{"":1}` if string(b) != want { t.Errorf("Marshal map with *text.Marshaler keys: got %#q, want %#q", b, want) } } var re = regexp.MustCompile // syntactic checks on form of marshaled floating point numbers. var badFloatREs = []*regexp.Regexp{ re(`p`), // no binary exponential notation re(`^\+`), // no leading + sign re(`^-?0[^.]`), // no unnecessary leading zeros re(`^-?\.`), // leading zero required before decimal point re(`\.(e|$)`), // no trailing decimal re(`\.[0-9]+0(e|$)`), // no trailing zero in fraction re(`^-?(0|[0-9]{2,})\..*e`), // exponential notation must have normalized mantissa re(`e[+-]0`), // exponent must not have leading zeros re(`e-[1-6]$`), // not tiny enough for exponential notation re(`e+(.|1.|20)$`), // not big enough for exponential notation re(`^-?0\.0000000`), // too tiny, should use exponential notation re(`^-?[0-9]{22}`), // too big, should use exponential notation re(`[1-9][0-9]{16}[1-9]`), // too many significant digits in integer re(`[1-9][0-9.]{17}[1-9]`), // too many significant digits in decimal } func TestMarshalFloat(t *testing.T) { t.Parallel() nfail := 0 test := func(f float64, bits int) { vf := interface{}(f) if bits == 32 { f = float64(float32(f)) // round vf = float32(f) } bout, err := Marshal(vf) if err != nil { t.Errorf("Marshal(%T(%g)): %v", vf, vf, err) nfail++ return } out := string(bout) // result must convert back to the same float g, err := strconv.ParseFloat(out, bits) if err != nil { t.Errorf("Marshal(%T(%g)) = %q, cannot parse back: %v", vf, vf, out, err) nfail++ return } if f != g { t.Errorf("Marshal(%T(%g)) = %q (is %g, not %g)", vf, vf, out, float32(g), vf) nfail++ return } for _, re := range badFloatREs { if re.MatchString(out) { t.Errorf("Marshal(%T(%g)) = %q, must not match /%s/", vf, vf, out, re) nfail++ return } } } var ( bigger = math.Inf(+1) smaller = math.Inf(-1) ) var digits = "1.2345678901234567890123" for i := len(digits); i >= 2; i-- { if testing.Short() && i < len(digits)-4 { break } for exp := -30; exp <= 30; exp++ { for _, sign := range "+-" { for bits := 32; bits <= 64; bits += 32 { s := fmt.Sprintf("%c%se%d", sign, digits[:i], exp) f, err := strconv.ParseFloat(s, bits) if err != nil { log.Fatal(err) } next := math.Nextafter if bits == 32 { next = func(g, h float64) float64 { return float64(math.Nextafter32(float32(g), float32(h))) } } test(f, bits) test(next(f, bigger), bits) test(next(f, smaller), bits) if nfail > 50 { t.Fatalf("stopping test early") } } } } } test(0, 64) test(math.Copysign(0, -1), 64) test(0, 32) test(math.Copysign(0, -1), 32) } func TestMarshalRawMessageValue(t *testing.T) { type ( T1 struct { M json.RawMessage `json:",omitempty"` } T2 struct { M *json.RawMessage `json:",omitempty"` } ) var ( rawNil = json.RawMessage(nil) rawEmpty = json.RawMessage([]byte{}) rawText = json.RawMessage(`"foo"`) ) tests := []struct { in interface{} want string ok bool }{ // Test with nil RawMessage. {rawNil, "null", true}, {&rawNil, "null", true}, {[]interface{}{rawNil}, "[null]", true}, {&[]interface{}{rawNil}, "[null]", true}, {[]interface{}{&rawNil}, "[null]", true}, {&[]interface{}{&rawNil}, "[null]", true}, {struct{ M json.RawMessage }{rawNil}, `{"M":null}`, true}, {&struct{ M json.RawMessage }{rawNil}, `{"M":null}`, true}, {struct{ M *json.RawMessage }{&rawNil}, `{"M":null}`, true}, {&struct{ M *json.RawMessage }{&rawNil}, `{"M":null}`, true}, {map[string]interface{}{"M": rawNil}, `{"M":null}`, true}, {&map[string]interface{}{"M": rawNil}, `{"M":null}`, true}, {map[string]interface{}{"M": &rawNil}, `{"M":null}`, true}, {&map[string]interface{}{"M": &rawNil}, `{"M":null}`, true}, {T1{rawNil}, "{}", true}, {T2{&rawNil}, `{"M":null}`, true}, {&T1{rawNil}, "{}", true}, {&T2{&rawNil}, `{"M":null}`, true}, // Test with empty, but non-nil, RawMessage. {rawEmpty, "", false}, {&rawEmpty, "", false}, {[]interface{}{rawEmpty}, "", false}, {&[]interface{}{rawEmpty}, "", false}, {[]interface{}{&rawEmpty}, "", false}, {&[]interface{}{&rawEmpty}, "", false}, {struct{ X json.RawMessage }{rawEmpty}, "", false}, {&struct{ X json.RawMessage }{rawEmpty}, "", false}, {struct{ X *json.RawMessage }{&rawEmpty}, "", false}, {&struct{ X *json.RawMessage }{&rawEmpty}, "", false}, {map[string]interface{}{"nil": rawEmpty}, "", false}, {&map[string]interface{}{"nil": rawEmpty}, "", false}, {map[string]interface{}{"nil": &rawEmpty}, "", false}, {&map[string]interface{}{"nil": &rawEmpty}, "", false}, {T1{rawEmpty}, "{}", true}, {T2{&rawEmpty}, "", false}, {&T1{rawEmpty}, "{}", true}, {&T2{&rawEmpty}, "", false}, // Test with RawMessage with some text. // // The tests below marked with Issue6458 used to generate "ImZvbyI=" instead "foo". // This behavior was intentionally changed in Go 1.8. // See https://golang.org/issues/14493#issuecomment-255857318 {rawText, `"foo"`, true}, // Issue6458 {&rawText, `"foo"`, true}, {[]interface{}{rawText}, `["foo"]`, true}, // Issue6458 {&[]interface{}{rawText}, `["foo"]`, true}, // Issue6458 {[]interface{}{&rawText}, `["foo"]`, true}, {&[]interface{}{&rawText}, `["foo"]`, true}, {struct{ M json.RawMessage }{rawText}, `{"M":"foo"}`, true}, // Issue6458 {&struct{ M json.RawMessage }{rawText}, `{"M":"foo"}`, true}, {struct{ M *json.RawMessage }{&rawText}, `{"M":"foo"}`, true}, {&struct{ M *json.RawMessage }{&rawText}, `{"M":"foo"}`, true}, {map[string]interface{}{"M": rawText}, `{"M":"foo"}`, true}, // Issue6458 {&map[string]interface{}{"M": rawText}, `{"M":"foo"}`, true}, // Issue6458 {map[string]interface{}{"M": &rawText}, `{"M":"foo"}`, true}, {&map[string]interface{}{"M": &rawText}, `{"M":"foo"}`, true}, {T1{rawText}, `{"M":"foo"}`, true}, // Issue6458 {T2{&rawText}, `{"M":"foo"}`, true}, {&T1{rawText}, `{"M":"foo"}`, true}, {&T2{&rawText}, `{"M":"foo"}`, true}, } for i, tt := range tests { b, err := Marshal(tt.in) if ok := err == nil; ok != tt.ok { if err != nil { t.Errorf("test %d, unexpected failure: %v", i, err) } else { t.Errorf("test %d, unexpected success", i) } } if got := string(b); got != tt.want { t.Errorf("test %d, Marshal(%#v) = %q, want %q", i, tt.in, got, tt.want) } } } type marshalPanic struct{} func (marshalPanic) MarshalJSON() ([]byte, error) { panic(0xdead) } func TestMarshalPanic(t *testing.T) { defer func() { if got := recover(); !reflect.DeepEqual(got, 0xdead) { t.Errorf("panic() = (%T)(%v), want 0xdead", got, got) } }() _, _ = Marshal(&marshalPanic{}) t.Error("Marshal should have panicked") } //goland:noinspection NonAsciiCharacters func TestMarshalUncommonFieldNames(t *testing.T) { v := struct { A0, À, Aβ int }{} b, err := Marshal(v) if err != nil { t.Fatal("Marshal:", err) } want := `{"A0":0,"À":0,"Aβ":0}` got := string(b) if got != want { t.Fatalf("Marshal: got %s want %s", got, want) } } type DummyMarshalerError struct { Type reflect.Type Err error SourceFunc string } func (self *DummyMarshalerError) err() *json.MarshalerError { return (*json.MarshalerError)(unsafe.Pointer(self)) } func TestMarshalerError(t *testing.T) { s := "test variable" st := reflect.TypeOf(s) errText := "json: test error" tests := []struct { err *json.MarshalerError want string }{ { (&DummyMarshalerError{st, fmt.Errorf(errText), ""}).err(), "json: error calling MarshalJSON for type " + st.String() + ": " + errText, }, { (&DummyMarshalerError{st, fmt.Errorf(errText), "TestMarshalerError"}).err(), "json: error calling TestMarshalerError for type " + st.String() + ": " + errText, }, } for i, tt := range tests { got := tt.err.Error() if got != tt.want { t.Errorf("MarshalerError test %d, got: %s, want: %s", i, got, tt.want) } } } func TestMarshalNullNil(t *testing.T) { var v = struct { A []int B map[string]int }{} o, e := Marshal(v) assert.Nil(t, e) assert.Equal(t, `{"A":null,"B":null}`, string(o)) o, e = Config{ NoNullSliceOrMap: true, }.Froze().Marshal(v) assert.Nil(t, e) assert.Equal(t, `{"A":[],"B":{}}`, string(o)) } func TestEncoder_LongestInvalidUtf8(t *testing.T) { for _, data := range []string{ "\"" + strings.Repeat("\x80", 4096) + "\"", "\"" + strings.Repeat("\x80", 4095) + "\"", "\"" + strings.Repeat("\x80", 4097) + "\"", "\"" + strings.Repeat("\x80", 12345) + "\"", } { testEncodeInvalidUtf8(t, []byte(data)) } } func testEncodeInvalidUtf8(t *testing.T, data []byte) { jgot, jerr := json.Marshal(data) sgot, serr := ConfigStd.Marshal(data) assert.Equal(t, serr != nil, jerr != nil) if jerr == nil { assert.Equal(t, sgot, jgot) } } func TestEncoder_RandomInvalidUtf8(t *testing.T) { nums := 1000 maxLen := 1000 for i := 0; i < nums; i++ { testEncodeInvalidUtf8(t, genRandJsonBytes(maxLen)) testEncodeInvalidUtf8(t, genRandJsonRune(maxLen)) } } func TestMarshalInfOrNan(t *testing.T) { tests := []interface{}{ math.Inf(1), math.Inf(-1), math.NaN(), float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.NaN()), []interface{}{math.Inf(1), math.Inf(-1), math.NaN(), float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.NaN())}, []float64{math.Inf(1), math.Inf(-1), math.NaN()}, []float32{float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.NaN())}, } allowNanInf := Config{ EncodeNullForInfOrNan: true, }.Froze() for _, tt := range tests { b, err := allowNanInf.Marshal(tt) assert.Nil(t, err) if len(b) == 4 { assert.Equal(t, string(b), "null") } else { println(string(b)) } b, err = Marshal(tt) assert.NotNil(t, err) assert.True(t, strings.Contains(err.Error(), "json: unsupported value: NaN or ±Infinite")) } } ================================================ FILE: encoder/encoder_compat.go ================================================ //go:build (!amd64 && !arm64) || go1.27 || !go1.17 || (arm64 && !go1.20) // +build !amd64,!arm64 go1.27 !go1.17 arm64,!go1.20 /* * Copyright 2023 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package encoder import ( "bytes" "encoding/json" "io" "reflect" "github.com/bytedance/sonic/internal/compat" "github.com/bytedance/sonic/option" ) func init() { compat.Warn("sonic/encoder") } // EnableFallback indicates if encoder use fallback const EnableFallback = true // Options is a set of encoding options. type Options uint64 const ( bitSortMapKeys = iota bitEscapeHTML bitCompactMarshaler bitNoQuoteTextMarshaler bitNoNullSliceOrMap bitValidateString bitNoValidateJSONMarshaler bitNoEncoderNewline // used for recursive compile bitPointerValue = 63 ) const ( // SortMapKeys indicates that the keys of a map needs to be sorted // before serializing into JSON. // WARNING: This hurts performance A LOT, USE WITH CARE. SortMapKeys Options = 1 << bitSortMapKeys // EscapeHTML indicates encoder to escape all HTML characters // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape). // WARNING: This hurts performance A LOT, USE WITH CARE. EscapeHTML Options = 1 << bitEscapeHTML // CompactMarshaler indicates that the output JSON from json.Marshaler // is always compact and needs no validation CompactMarshaler Options = 1 << bitCompactMarshaler // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler // is always escaped string and needs no quoting NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}', // instead of 'null' NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap // ValidateString indicates that encoder should validate the input string // before encoding it into JSON. ValidateString Options = 1 << bitValidateString // NoValidateJSONMarshaler indicates that the encoder should not validate the output string // after encoding the JSONMarshaler to JSON. NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler // NoEncoderNewline indicates that the encoder should not add a newline after every message NoEncoderNewline Options = 1 << bitNoEncoderNewline // CompatibleWithStd is used to be compatible with std encoder. CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler ) // Encoder represents a specific set of encoder configurations. type Encoder struct { Opts Options prefix string indent string } // Encode returns the JSON encoding of v. func (self *Encoder) Encode(v interface{}) ([]byte, error) { if self.indent != "" || self.prefix != "" { return EncodeIndented(v, self.prefix, self.indent, self.Opts) } return Encode(v, self.Opts) } // SortKeys enables the SortMapKeys option. func (self *Encoder) SortKeys() *Encoder { self.Opts |= SortMapKeys return self } // SetEscapeHTML specifies if option EscapeHTML opens func (self *Encoder) SetEscapeHTML(f bool) { if f { self.Opts |= EscapeHTML } else { self.Opts &= ^EscapeHTML } } // SetValidateString specifies if option ValidateString opens func (self *Encoder) SetValidateString(f bool) { if f { self.Opts |= ValidateString } else { self.Opts &= ^ValidateString } } // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens func (self *Encoder) SetNoValidateJSONMarshaler(f bool) { if f { self.Opts |= NoValidateJSONMarshaler } else { self.Opts &= ^NoValidateJSONMarshaler } } // SetNoEncoderNewline specifies if option NoEncoderNewline opens func (self *Encoder) SetNoEncoderNewline(f bool) { if f { self.Opts |= NoEncoderNewline } else { self.Opts &= ^NoEncoderNewline } } // SetCompactMarshaler specifies if option CompactMarshaler opens func (self *Encoder) SetCompactMarshaler(f bool) { if f { self.Opts |= CompactMarshaler } else { self.Opts &= ^CompactMarshaler } } // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens func (self *Encoder) SetNoQuoteTextMarshaler(f bool) { if f { self.Opts |= NoQuoteTextMarshaler } else { self.Opts &= ^NoQuoteTextMarshaler } } // SetIndent instructs the encoder to format each subsequent encoded // value as if indented by the package-level function EncodeIndent(). // Calling SetIndent("", "") disables indentation. func (enc *Encoder) SetIndent(prefix, indent string) { enc.prefix = prefix enc.indent = indent } // Quote returns the JSON-quoted version of s. func Quote(s string) string { /* check for empty string */ if s == "" { return `""` } out, _ := json.Marshal(s) return string(out) } // Encode returns the JSON encoding of val, encoded with opts. func Encode(val interface{}, opts Options) ([]byte, error) { return json.Marshal(val) } // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating // a new one. func EncodeInto(buf *[]byte, val interface{}, opts Options) error { if buf == nil { panic("user-supplied buffer buf is nil") } w := bytes.NewBuffer(*buf) enc := json.NewEncoder(w) enc.SetEscapeHTML((opts & EscapeHTML) != 0) err := enc.Encode(val) *buf = w.Bytes() l := len(*buf) if l > 0 && (opts&NoEncoderNewline != 0) && (*buf)[l-1] == '\n' { *buf = (*buf)[:l-1] } return err } // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 // so that the JSON will be safe to embed inside HTML