常见的序列化方式性能测试

常见的序列化方式性能测试

json

  • 测试
import json
def gen_json_inst():
    inst = {
        "f01": 0.1,
        "f02": 0.1,
        "f03": 1,
        "f04": 1,
        "f05": 1,
        "f06": 1,
        "f07": True,
        "f08": "1",
        "f09": "1",
        "f10": {
            "f11": ["1"]
        },
    }
    return inst
def test_json():
    inst = gen_json_inst()
    data = json.dumps(inst)
    t1 = int(time.time()*1000)
    for i in range(100000):
        encoded = json.dumps(inst)
        json.loads(encoded)
    t2 = int(time.time()*1000)
    print("data len:%d, time elapsed:%dms" % (len(data), t2 - t1))
    print(data)
  • 结果
data len:124, time elapsed:1118ms
{"f01": 0.1, "f02": 0.1, "f03": 1, "f04": 1, "f05": 1, "f06": 1, "f07": true, "f08": "1", "f09": "1", "f10": {"f11": ["1"]}}

protobuf

  • 数据模板
// test.proto
syntax = "proto3";
message Other {
    repeated string f11 = 1;
}
message SearchRequest {
  float     f01 = 1;
  double    f02 = 2;
  int32     f03 = 3;
  int64     f04 = 4;
  uint32    f05 = 5;
  uint64    f06 = 6;
  bool      f07 = 13;
  string    f08 = 14;
  bytes     f09 = 15;
  Other     f10 = 16;
}
protoc test.proto --python_out=. # 编译生成test_pb2.py
  • 测试
import time
from google.protobuf.json_format import Parse
import test_pb2
def gen_pb_inst():
    inst = test_pb2.SearchRequest()
    inst.f01 = 0.1
    inst.f02 = 0.1
    inst.f03 = 1
    inst.f04 = 1
    inst.f05 = 1
    inst.f06 = 1
    inst.f07 = True
    inst.f08 = "1"
    inst.f09 = b"1"
    inst.f10.f11.append("1")
    return inst
def test_pb():
    inst = gen_pb_inst()
    data = inst.SerializeToString()
    t1 = int(time.time()*1000)
    for i in range(100000):
        encoded = inst.SerializeToString()
        inst_1 = test_pb2.SearchRequest()
        inst_1.ParseFromString(encoded)
    t2 = int(time.time()*1000)
    print("data len:%d, time elapsed:%dms" % (len(data), t2 - t1))
    print(data.hex())
  • 结果
data len:36, time elapsed:498ms
0dcdcccc3d119a9999999999b93f180120012801300168017201317a01318201030a0131

capnproto

  • 数据模板
# test.capnp
@0x9e391fc8f3af545d;
struct Other {
    f11 @0  :List(Text);
}
struct SearchRequest {
    f01 @0  :Float32;
    f02 @1  :Float64;
    f03 @2  :Int32;
    f04 @3  :Int64;
    f05 @4  :UInt32;
    f06 @5  :UInt64;
    f07 @6  :Bool;
    f08 @7  :Text;
    f09 @8  :Data;
    f10 @9  :Other;
}
  • 测试
import capnp
capnp.remove_import_hook()
def gen_capnp_inst():
    test_capnp = capnp.load("test.capnp")
    inst = test_capnp.SearchRequest.new_message()
    inst.f01 = 0.1
    inst.f02 = 0.1
    inst.f03 = 1
    inst.f04 = 1
    inst.f05 = 1
    inst.f06 = 1
    inst.f07 = True
    inst.f08 = "1"
    inst.f09 = b"1"
    inst.f10.f11 = ["1"]
    return inst
def test_capnp():
    test_capnp = capnp.load("test.capnp")
    inst = gen_capnp_inst()
    data = inst.to_bytes_packed()
    t1 = int(time.time()*1000)
    for i in range(100000):
        encoded = inst.to_bytes_packed()
        out = test_capnp.SearchRequest.from_bytes_packed(encoded)
    t2 = int(time.time()*1000)
    print("data len:%d, time elapsed:%dms" % (len(data), t2 - t1))
    print(data.hex())
  • 结果
data len:49, time elapsed:250ms
100e5005031fcdcccc3d01ff9a9999999999b93f000101110101010111091211090a4108010131013111010e1101120131

msgpack

  • 测试
import msgpack
def gen_msgpack_inst():
    inst = {
        "f01": 0.1,
        "f02": 0.1,
        "f03": 1,
        "f04": 1,
        "f05": 1,
        "f06": 1,
        "f07": True,
        "f08": "1",
        "f09": "1",
        "f10": {
            "f11": ["1"]
        },
    }
    return inst
def test_msgpack():
    inst = gen_msgpack_inst()
    data = msgpack.dumps(inst)
    t1 = int(time.time()*1000)
    for i in range(100000):
        encoded = msgpack.dumps(inst)
        msgpack.loads(encoded)
    t2 = int(time.time()*1000)
    print("data len:%d, time elapsed:%dms" % (len(data), t2 - t1))
    print(data.hex())
  • 结果
data len:76, time elapsed:354ms
8aa3663031cb3fb999999999999aa3663032cb3fb999999999999aa366303301a366303401a366303501a366303601a3663037c3a3663038a131a3663039a131a366313081a366313191a131

flatbuffers

  • 数据模板
// test.fbs
table Other {
  f11:[string];
}
table SearchRequest {
  f01:float;
  f02:double;
  f03:int;
  f04:long;
  f05:uint;
  f06:ulong;
  f07:bool;
  f08:string;
  f09:[ubyte];
  f10:Other;
}
flatc -p test.fbs # 生成Other.py SearchRequest.py
  • 开发
import flatbuffers
import SearchRequest, Other
def dump_flatbuf_inst(inst):
    f08_val = inst.CreateString("1")
    f11_0_val = inst.CreateString("1")
    SearchRequest.StartF09Vector(inst, 1)
    inst.PrependByte(ord('1'))
    f09_val = inst.EndVector()
    Other.StartF11Vector(inst, 1)
    inst.PrependUOffsetTRelative(f11_0_val)
    f11_val = inst.EndVector()
    Other.Start(inst)
    Other.AddF11(inst, f11_val)
    f10_val = Other.End(inst)
    inst_inst = flatbuffers.Builder(0)
    SearchRequest.Start(inst)
    SearchRequest.AddF01(inst, 0.1)
    SearchRequest.AddF02(inst, 0.1)
    SearchRequest.AddF03(inst, 1)
    SearchRequest.AddF04(inst, 1)
    SearchRequest.AddF05(inst, 1)
    SearchRequest.AddF06(inst, 1)
    SearchRequest.AddF07(inst, True)
    SearchRequest.AddF08(inst, f08_val)
    SearchRequest.AddF09(inst, f09_val)
    SearchRequest.AddF10(inst, f10_val)
    end = SearchRequest.End(inst)
    inst.Finish(end)
    return inst.Bytes, inst.Head()
def test_flatbuf():
    inst = flatbuffers.Builder(0)
    data, l = dump_flatbuf_inst(inst)
    t1 = int(time.time()*1000)
    for i in range(10000):
        encode, l = dump_flatbuf_inst(inst)
        out = SearchRequest.SearchRequest.GetRootAs(encode, l)
    t2 = int(time.time()*1000)
    print("data len:%d, time elapsed:%dms" % (len(data), t2 - t1))
    print(data.hex())
  • 结果
    由于flatbuf目前没有优秀的python wrapper,都是使用python模拟底层实现,因此效率极低,这里不做展示. flatbuf效率应该高于上述几种方式,但是操作起来较复杂

性能对比

prop json protobuf flatbuf capnproto msgpack
speed(ms) 1118 498 - 250 354
size 124 36 256 49 76