很容易更改不是JSON可序列化的对象的格式,例如datetime.datetime.
出于调试目的,我的要求是改变一些自定义对象从基本对象扩展的方式,dict
并list
以json格式进行序列化.代码:
import datetime
import json
def json_debug_handler(obj):
print("object received:")
print type(obj)
print("\n\n")
if isinstance(obj, datetime.datetime):
return obj.isoformat()
elif isinstance(obj,mDict):
return {'orig':obj , 'attrs': vars(obj)}
elif isinstance(obj,mList):
return {'orig':obj, 'attrs': vars(obj)}
else:
return None
class mDict(dict):
pass
class mList(list):
pass
def test_debug_json():
games = mList(['mario','contra','tetris'])
games.src = 'console'
scores = mDict({'dp':10,'pk':45})
scores.processed = "unprocessed"
test_json = { 'games' : games , 'scores' : scores , 'date': datetime.datetime.now() }
print(json.dumps(test_json,default=json_debug_handler))
if __name__ == '__main__':
test_debug_json()
Run Code Online (Sandbox Code Playgroud)
输出:
{"date": "2013-05-07T01:03:13.098727", "games": ["mario", "contra", "tetris"], "scores": {"pk": 45, "dp": 10}}
期望的输出:
{"date": "2013-05-07T01:03:13.098727", "games": { "orig": ["mario", "contra", "tetris"] ,"attrs" : { "src":"console"}} , "scores": { "orig": {"pk": 45, "dp": 10},"attrs":
"processed":"unprocessed }}
是否default
处理不序列化对象的工作?如果没有,如何在不向扩展类添加JSON方法的情况下覆盖它?
此外,有这个版本的JSON编码器不起作用:
class JsonDebugEncoder(json.JSONEncoder):
def default(self,obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
elif isinstance(obj,mDict):
return {'orig':obj , 'attrs': vars(obj)}
elif isinstance(obj,mList):
return {'orig':obj, 'attrs': vars(obj)}
else:
return json.JSONEncoder.default(self, obj)
Run Code Online (Sandbox Code Playgroud)
如果有一个hack with pickle,__getstate__,__setstate__,
然后使用json.dump而不是pickle.loads对象,我也对此持开放态度,我试过了,但那不起作用.
Fas*_*tle 22
似乎要实现你想要的行为,在给定的限制下,你将不得不深入研究这个JSONEncoder
类.下面我写了一个自定义JSONEncoder
覆盖了iterencode
传递自定义isinstance
方法的方法_make_iterencode
.它不是世界上最干净的东西,但似乎是最好的选择,它将定制保持在最低限度.
# customencoder.py
from json.encoder import (_make_iterencode, JSONEncoder,
encode_basestring_ascii, FLOAT_REPR, INFINITY,
c_make_encoder, encode_basestring)
class CustomObjectEncoder(JSONEncoder):
def iterencode(self, o, _one_shot=False):
"""
Most of the original method has been left untouched.
_one_shot is forced to False to prevent c_make_encoder from
being used. c_make_encoder is a funcion defined in C, so it's easier
to avoid using it than overriding/redefining it.
The keyword argument isinstance for _make_iterencode has been set
to self.isinstance. This allows for a custom isinstance function
to be defined, which can be used to defer the serialization of custom
objects to the default method.
"""
# Force the use of _make_iterencode instead of c_make_encoder
_one_shot = False
if self.check_circular:
markers = {}
else:
markers = None
if self.ensure_ascii:
_encoder = encode_basestring_ascii
else:
_encoder = encode_basestring
if self.encoding != 'utf-8':
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
if isinstance(o, str):
o = o.decode(_encoding)
return _orig_encoder(o)
def floatstr(o, allow_nan=self.allow_nan,
_repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
if o != o:
text = 'NaN'
elif o == _inf:
text = 'Infinity'
elif o == _neginf:
text = '-Infinity'
else:
return _repr(o)
if not allow_nan:
raise ValueError(
"Out of range float values are not JSON compliant: " +
repr(o))
return text
# Instead of forcing _one_shot to False, you can also just
# remove the first part of this conditional statement and only
# call _make_iterencode
if (_one_shot and c_make_encoder is not None
and self.indent is None and not self.sort_keys):
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan)
else:
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, isinstance=self.isinstance)
return _iterencode(o, 0)
Run Code Online (Sandbox Code Playgroud)
您现在可以将CustomObjectEncoder
其子类化,以便正确序列化您的自定义对象.它CustomObjectEncoder
也可以做很酷的东西,如句柄嵌套对象.
# test.py
import json
import datetime
from customencoder import CustomObjectEncoder
class MyEncoder(CustomObjectEncoder):
def isinstance(self, obj, cls):
if isinstance(obj, (mList, mDict)):
return False
return isinstance(obj, cls)
def default(self, obj):
"""
Defines custom serialization.
To avoid circular references, any object that will always fail
self.isinstance must be converted to something that is
deserializable here.
"""
if isinstance(obj, datetime.datetime):
return obj.isoformat()
elif isinstance(obj, mDict):
return {"orig": dict(obj), "attrs": vars(obj)}
elif isinstance(obj, mList):
return {"orig": list(obj), "attrs": vars(obj)}
else:
return None
class mList(list):
pass
class mDict(dict):
pass
def main():
zelda = mList(['zelda'])
zelda.src = "oldschool"
games = mList(['mario', 'contra', 'tetris', zelda])
games.src = 'console'
scores = mDict({'dp': 10, 'pk': 45})
scores.processed = "unprocessed"
test_json = {'games': games, 'scores': scores,
'date': datetime.datetime.now()}
print(json.dumps(test_json, cls=MyEncoder))
if __name__ == '__main__':
main()
Run Code Online (Sandbox Code Playgroud)
Roy*_*rau 10
FastTurtle的答案可能是一个更清洁的解决方案.
根据我的问题/答案中解释的技术,这里有你想要的东西:覆盖继承的默认支持对象的嵌套JSON编码,如dict,list
import json
import datetime
class mDict(dict):
pass
class mList(list):
pass
class JsonDebugEncoder(json.JSONEncoder):
def _iterencode(self, o, markers=None):
if isinstance(o, mDict):
yield '{"__mDict__": '
# Encode dictionary
yield '{"orig": '
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
yield chunk
yield ', '
# / End of Encode dictionary
# Encode attributes
yield '"attr": '
for key, value in o.__dict__.iteritems():
yield '{"' + key + '": '
for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
yield chunk
yield '}'
yield '}'
# / End of Encode attributes
yield '}'
elif isinstance(o, mList):
yield '{"__mList__": '
# Encode list
yield '{"orig": '
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
yield chunk
yield ', '
# / End of Encode list
# Encode attributes
yield '"attr": '
for key, value in o.__dict__.iteritems():
yield '{"' + key + '": '
for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
yield chunk
yield '}'
yield '}'
# / End of Encode attributes
yield '}'
else:
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers=markers):
yield chunk
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
class JsonDebugDecoder(json.JSONDecoder):
def decode(self, s):
obj = super(JsonDebugDecoder, self).decode(s)
obj = self.recursiveObjectDecode(obj)
return obj
def recursiveObjectDecode(self, obj):
if isinstance(obj, dict):
decoders = [("__mList__", self.mListDecode),
("__mDict__", self.mDictDecode)]
for placeholder, decoder in decoders:
if placeholder in obj: # We assume it's supposed to be converted
return decoder(obj[placeholder])
else:
for k in obj:
obj[k] = self.recursiveObjectDecode(obj[k])
elif isinstance(obj, list):
for x in range(len(obj)):
obj[x] = self.recursiveObjectDecode(obj[x])
return obj
def mDictDecode(self, o):
res = mDict()
for key, value in o['orig'].iteritems():
res[key] = self.recursiveObjectDecode(value)
for key, value in o['attr'].iteritems():
res.__dict__[key] = self.recursiveObjectDecode(value)
return res
def mListDecode(self, o):
res = mList()
for value in o['orig']:
res.append(self.recursiveObjectDecode(value))
for key, value in o['attr'].iteritems():
res.__dict__[key] = self.recursiveObjectDecode(value)
return res
def test_debug_json():
games = mList(['mario','contra','tetris'])
games.src = 'console'
scores = mDict({'dp':10,'pk':45})
scores.processed = "unprocessed"
test_json = { 'games' : games, 'scores' : scores ,'date': datetime.datetime.now() }
jsonDump = json.dumps(test_json, cls=JsonDebugEncoder)
print jsonDump
test_pyObject = json.loads(jsonDump, cls=JsonDebugDecoder)
print test_pyObject
if __name__ == '__main__':
test_debug_json()
Run Code Online (Sandbox Code Playgroud)
这导致:
{"date": "2013-05-06T22:28:08.967000", "games": {"__mList__": {"orig": ["mario", "contra", "tetris"], "attr": {"src": "console"}}}, "scores": {"__mDict__": {"orig": {"pk": 45, "dp": 10}, "attr": {"processed": "unprocessed"}}}}
Run Code Online (Sandbox Code Playgroud)
这样你就可以对它进行编码并将其解码回它来自的python对象.
编辑:
这是一个实际上将其编码为您想要的输出的版本,也可以对其进行解码.每当字典包含'orig'和'attr'时,它将检查'orig'是否包含字典或列表,如果是,它将分别将对象转换回mDict或mList.
import json
import datetime
class mDict(dict):
pass
class mList(list):
pass
class JsonDebugEncoder(json.JSONEncoder):
def _iterencode(self, o, markers=None):
if isinstance(o, mDict): # Encode mDict
yield '{"orig": '
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
yield chunk
yield ', '
yield '"attr": '
for key, value in o.__dict__.iteritems():
yield '{"' + key + '": '
for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
yield chunk
yield '}'
yield '}'
# / End of Encode attributes
elif isinstance(o, mList): # Encode mList
yield '{"orig": '
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
yield chunk
yield ', '
yield '"attr": '
for key, value in o.__dict__.iteritems():
yield '{"' + key + '": '
for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
yield chunk
yield '}'
yield '}'
else:
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers=markers):
yield chunk
def default(self, obj):
if isinstance(obj, datetime.datetime): # Encode datetime
return obj.isoformat()
class JsonDebugDecoder(json.JSONDecoder):
def decode(self, s):
obj = super(JsonDebugDecoder, self).decode(s)
obj = self.recursiveObjectDecode(obj)
return obj
def recursiveObjectDecode(self, obj):
if isinstance(obj, dict):
if "orig" in obj and "attr" in obj and isinstance(obj["orig"], list):
return self.mListDecode(obj)
elif "orig" in obj and "attr" in obj and isinstance(obj['orig'], dict):
return self.mDictDecode(obj)
else:
for k in obj:
obj[k] = self.recursiveObjectDecode(obj[k])
elif isinstance(obj, list):
for x in range(len(obj)):
obj[x] = self.recursiveObjectDecode(obj[x])
return obj
def mDictDecode(self, o):
res = mDict()
for key, value in o['orig'].iteritems():
res[key] = self.recursiveObjectDecode(value)
for key, value in o['attr'].iteritems():
res.__dict__[key] = self.recursiveObjectDecode(value)
return res
def mListDecode(self, o):
res = mList()
for value in o['orig']:
res.append(self.recursiveObjectDecode(value))
for key, value in o['attr'].iteritems():
res.__dict__[key] = self.recursiveObjectDecode(value)
return res
def test_debug_json():
games = mList(['mario','contra','tetris'])
games.src = 'console'
scores = mDict({'dp':10,'pk':45})
scores.processed = "unprocessed"
test_json = { 'games' : games, 'scores' : scores ,'date': datetime.datetime.now() }
jsonDump = json.dumps(test_json, cls=JsonDebugEncoder)
print jsonDump
test_pyObject = json.loads(jsonDump, cls=JsonDebugDecoder)
print test_pyObject
print test_pyObject['games'].src
if __name__ == '__main__':
test_debug_json()
Run Code Online (Sandbox Code Playgroud)
以下是有关输出的更多信息:
# Encoded
{"date": "2013-05-06T22:41:35.498000", "games": {"orig": ["mario", "contra", "tetris"], "attr": {"src": "console"}}, "scores": {"orig": {"pk": 45, "dp": 10}, "attr": {"processed": "unprocessed"}}}
# Decoded ('games' contains the mList with the src attribute and 'scores' contains the mDict processed attribute)
# Note that printing the python objects doesn't directly show the processed and src attributes, as seen below.
{u'date': u'2013-05-06T22:41:35.498000', u'games': [u'mario', u'contra', u'tetris'], u'scores': {u'pk': 45, u'dp': 10}}
Run Code Online (Sandbox Code Playgroud)
对不起任何错误的命名约定,这是一个快速设置.;)
注意:日期时间不会被解码回python表示.实现可以通过检查任何名为'date'的dict键并包含日期时间的有效字符串表示来完成.
正如其他人已经指出的那样,默认处理程序仅针对不是已识别类型之一的值进行调用.我对此问题的建议解决方案是预处理要序列化的对象,对列表,元组和字典进行递归,但将所有其他值包装在自定义类中.
像这样的东西:
def debug(obj):
class Debug:
def __init__(self,obj):
self.originalObject = obj
if obj.__class__ == list:
return [debug(item) for item in obj]
elif obj.__class__ == tuple:
return (debug(item) for item in obj)
elif obj.__class__ == dict:
return dict((key,debug(obj[key])) for key in obj)
else:
return Debug(obj)
Run Code Online (Sandbox Code Playgroud)
在将对象传递给json.dumps之前,您可以调用此函数,如下所示:
test_json = debug(test_json)
print(json.dumps(test_json,default=json_debug_handler))
Run Code Online (Sandbox Code Playgroud)
请注意,此代码检查其类与列表,元组或字典完全匹配的对象,因此从这些类型扩展的任何自定义对象都将被包装而不是解析.因此,常规列表,元组和字典将照常序列化,但所有其他值将传递给默认处理程序.
所有这一切的最终结果是,每个到达默认处理程序的值都保证包含在其中一个Debug类中.所以你要做的第一件事是提取原始对象,如下所示:
obj = obj.originalObject
Run Code Online (Sandbox Code Playgroud)
然后,您可以检查原始对象的类型并处理需要特殊处理的类型.对于其他所有内容,您应该只返回原始对象(因此处理程序的最后一次返回return obj
不应该return None
).
def json_debug_handler(obj):
obj = obj.originalObject # Add this line
print("object received:")
print type(obj)
print("\n\n")
if isinstance(obj, datetime.datetime):
return obj.isoformat()
elif isinstance(obj,mDict):
return {'orig':obj, 'attrs': vars(obj)}
elif isinstance(obj,mList):
return {'orig':obj, 'attrs': vars(obj)}
else:
return obj # Change this line
Run Code Online (Sandbox Code Playgroud)
请注意,此代码不检查不可序列化的值.这些将落在最后return obj
,然后将被序列化程序拒绝并再次传递回默认处理程序 - 只有这一次没有Debug包装器.
如果您需要处理该场景,可以在处理程序的顶部添加一个检查,如下所示:
if not hasattr(obj, 'originalObject'):
return None
Run Code Online (Sandbox Code Playgroud)
Ideone演示:http://ideone.com/tOloNq
仅当被转储的节点不是本机可序列化的时,才会调用默认函数,并且您的mDict类按原样序列化.这是一个小的演示,显示何时调用默认值,何时不调用:
import json
def serializer(obj):
print 'serializer called'
return str(obj)
class mDict(dict):
pass
class mSet(set):
pass
d = mDict(dict(a=1))
print json.dumps(d, default=serializer)
s = mSet({1, 2, 3,})
print json.dumps(s, default=serializer)
Run Code Online (Sandbox Code Playgroud)
并输出:
{"a": 1}
serializer called
"mSet([1, 2, 3])"
Run Code Online (Sandbox Code Playgroud)
请注意,集合不是本机可序列化的,但是dicts是.
由于您的m___类是可序列化的,因此永远不会调用您的处理程序.
更新#1 -----
您可以更改JSON编码器代码.有关如何执行此操作的详细信息取决于您正在使用的JSON实现.例如在simplejson中,相关代码就是这个,在encode.py中:
def _iterencode(o, _current_indent_level):
...
for_json = _for_json and getattr(o, 'for_json', None)
if for_json and callable(for_json):
...
elif isinstance(o, list):
...
else:
_asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
if _asdict and callable(_asdict):
for chunk in _iterencode_dict(_asdict(),
_current_indent_level):
yield chunk
elif (_tuple_as_array and isinstance(o, tuple)):
...
elif isinstance(o, dict):
...
elif _use_decimal and isinstance(o, Decimal):
...
else:
...
o = _default(o)
for chunk in _iterencode(o, _current_indent_level):
yield chunk
...
Run Code Online (Sandbox Code Playgroud)
换句话说,只有当被编码的节点不是已识别的基类型之一时,才会调用默认的硬连线行为.您可以通过以下几种方式之一覆盖它:
1 - 子JSONEncoder你之前所做的那样,而是一个参数添加到它的初始化,指定地方标准_make_iterencode,在其中添加一个测试,将调用默认为符合条件的类要使用的功能.这是一个干净的方法,因为您没有更改JSON模块,但您将重复原始_make_iterencode中的大量代码.(此方法的其他变体包括monkeypatching _make_iterencode或其子函数_iterencode_dict).
2 - 更改JSON模块源,并使用__debug__
常量更改行为:
def _iterencode(o, _current_indent_level):
...
for_json = _for_json and getattr(o, 'for_json', None)
if for_json and callable(for_json):
...
elif isinstance(o, list):
...
## added code below
elif __debug__:
o = _default(o)
for chunk in _iterencode(o, _current_indent_level):
yield chunk
## added code above
else:
...
Run Code Online (Sandbox Code Playgroud)
理想情况下,JSONEncoder类将提供一个参数来指定"使用所有类型的默认值",但事实并非如此.以上是一个简单的一次性更改,可以满足您的需求.
归档时间: |
|
查看次数: |
11449 次 |
最近记录: |