Python之json模块源码剖析了解⼀下json序列化解析过程,直接上源码,以下代码是从框架中剥离出来的,可直接跑案例:
"""
json源码剖析
"""
import re
INFINITY = float('inf')
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(b'[\x80-\xff]')
ESCAPE_DCT = {
'\\': '\\\\',
'"': '\\"',
'\b': '\\b',
'\f': '\\f',
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
}
for i in range(0x20):
ESCAPE_DCT.tdefault(chr(i), '\\u{0:04x}'.format(i))
def py_encode_bastring_ascii(s):
"""Return an ASCII-only JSON reprentation of a Python string
"""
def replace(match):
s = up(0)
try:
return ESCAPE_DCT[s]
except KeyError:
n = ord(s)
if n < 0x10000:
return '\\u{0:04x}'.format(n)
el:
# surrogate pair
n -= 0x10000
s1 = 0xd800 | ((n >> 10) & 0x3ff)
s2 = 0xdc00 | (n & 0x3ff)
return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
return "'" + ESCAPE_ASCII.sub(replace, s) + "'"
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_parator, _item_parator, _sort_keys, _skipkeys, _one_shot,
## HACK: hand-optimized bytecode; turn globals into locals
ValueError=ValueError,
dict=dict,
float=float,
id=id,
int=int,
isinstance=isinstance,
list=list,
str=str,
tuple=tuple,
_intstr=int.__str__,
):
):
if _indent is not None and not isinstance(_indent, str):
_indent = ' ' * _indent
def _iterencode_list(lst, _current_indent_level): # list结构的迭代
if not lst:
yield '[]'
return
if markers is not None:
markerid = id(lst)
if markerid in markers:
rai ValueError("Circular reference detected")
markers[markerid] = lst
buf = '['
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
parator = _item_parator + newline_indent
buf += newline_indent
el:
newline_indent = None
parator = _item_parator
first = True
for value in lst:
if first:
first = Fal
el:
buf = parator
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is Fal:
yield buf + 'fal'
elif isinstance(value, int):
# Subclass of int/float may override __str__, but we still
# want to encode them as integers/floats in JSON. One example # within the standard library is IntEnum.
yield buf + _intstr(value)
elif isinstance(value, float):
# e comment above for int
yield buf + _floatstr(value)
el:
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
el:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
yield ']'
if markers is not None:
del markers[markerid]
def _iterencode_dict(dct, _current_indent_level): # dict结构的迭代
if not dct:
yield '{}'
return
if markers is not None:
markerid = id(dct)
rai ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
item_parator = _item_parator + newline_indent
yield newline_indent
el:
newline_indent = None
item_parator = _item_parator
first = True
if _sort_keys:
items = sorted(dct.items(), key=lambda kv: kv[0])
el:
items = dct.items()
for key, value in items:
if isinstance(key, str):
pass
# JavaScript is weakly typed for the, so it makes n to
# also allow them. Many encoders em to do something like this. elif isinstance(key, float):
# e comment for int/float in _make_iterencode
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is Fal:
key = 'fal'
elif key is None:
key = 'null'
elif isinstance(key, int):
# e comment for int/float in _make_iterencode
key = _intstr(key)
elif _skipkeys:
continue
el:
rai TypeError("key " + repr(key) + " is not a string")
if first:
first = Fal
el:
yield item_parator
yield _encoder(key)
yield _key_parator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is Fal:
yield 'fal'
elif isinstance(value, int):
# e comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# e comment for int/float in _make_iterencode
yield _floatstr(value)
el:
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
el:
chunks = _iterencode(value, _current_indent_level)
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
yield '}'
if markers is not None:
del markers[markerid]
def _iterencode(o, _current_indent_level): # 综合迭达⼊⼝(核⼼)
if isinstance(o, str):
yield _encoder(o)
elif o is None:
yield 'null'
elif o is True:
yield 'true'
elif o is Fal:
yield 'fal'
elif isinstance(o, int):
# e comment for int/float in _make_iterencode
yield _intstr(o)
elif isinstance(o, float):
# e comment for int/float in _make_iterencode
yield _floatstr(o)
elif isinstance(o, (list, tuple)):
yield from _iterencode_list(o, _current_indent_level)
elif isinstance(o, dict):
yield from _iterencode_dict(o, _current_indent_level)
el:
if markers is not None:
markerid = id(o)
if markerid in markers:
rai ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
yield from _iterencode(o, _current_indent_level)
if markers is not None:
del markers[markerid]
return _iterencode
class JSONEncoder(object):
"""Extensible JSON <> encoder for Python data structures.
"""
item_parator = ', '
key_parator = ': '
def __init__(lf, *, skipkeys=Fal, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=Fal,
indent=None, parators=None, default=None):
"""Constructor for JSONEncoder, with nsible defaults.
"""
lf.skipkeys = skipkeys # ⾮法的key值是否跳过
lf.allow_nan = allow_nan # 跳过对nan,Infinity,and-Infinity的编码(不符json规范但是符合JS解释器规范)
lf.sort_keys = sort_keys # 输出对key进⾏排序
lf.indent = indent # 格式化输出缩进空格数
if parators is not None: # 指定分隔符
lf.item_parator, lf.key_parator = parators
elif indent is not None:
lf.item_parator = ','
if default is not None:
lf.default = default
def default(lf, o):
"""
"""
rai TypeError("Object of type '%s' is not JSON rializable" %
o.__class__.__name__)
def encode(lf, o):
"""Return a JSON string reprentation of a Python data structure.
>>> der import JSONEncoder
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}'
"""
# This is for extremely simple cas and benchmarks.
# This doesn't pass the iterator directly to ''.join() becau the
# exceptions aren't as detailed. The list call should be roughly
# equivalent to the PySequence_Fast that ''.join() would do.
chunks = lf.iterencode(o, _one_shot=True) # return ["[{'foo' : ['bar', 'baz'], 'null':'sdfd'}]"] return ''.join(chunks)
def iterencode(lf, o, _one_shot=Fal):
"""Encode the given object and yield each string
reprentation as available.
For example::
for chunk in JSONEncoder().iterencode(bigobject):
mysocket.write(chunk)
"""
if lf.check_circular:
markers = {}
el:
markers = None
def floatstr(o, allow_nan=lf.allow_nan,
_repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
# Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on the
# internals.
if o != o:
text = 'NaN'
elif o == _inf:
text = 'Infinity'
elif o == _neginf:
text = '-Infinity'
el:
return _repr(o)
if not allow_nan:
rai ValueError(
"Out of range float values are not JSON compliant: " +
repr(o))
return text
# 这⾥直接使⽤了py功能实现,另外⼀个是c实现的具体功能
_encoder = (py_encode_bastring_ascii) # encode_bastring_ascii or
_iterencode = _make_iterencode(
markers, lf.default, _encoder, lf.indent, floatstr,
lf.key_parator, lf.item_parator, lf.sort_keys,
lf.skipkeys, _one_shot)