我有以下数组(只有一个dict):
[{
'RuntimeInMinutes': '21',
'EpisodeNumber': '21',
'Genres': ['Animation'],
'ReleaseDate': '2005-02-05',
'LanguageOfMetadata': 'EN',
'Languages': [{
'_Key': 'CC',
'Value': ['en']
}, {
'_Key': 'Primary',
'Value': ['EN']
}],
'Products': [{
'URL': 'http://www.hulu.com/watch/217566',
'Rating': 'TV-Y',
'Currency': 'USD',
'SUBSCRIPTION': '0.00',
'_Key': 'US'
}, {
'URL': 'http://www.hulu.com/d/217566',
'Rating': 'TV-Y',
'Currency': 'USD',
'SUBSCRIPTION': '0.00',
'_Key': 'DE'
}],
'ReleaseYear': '2005',
'TVSeriesID': '5638#TVSeries',
'Type': 'TVEpisode',
'Studio': '4K Media'
}]
Run Code Online (Sandbox Code Playgroud)
我想把这个字母弄平如下:
[{
'RuntimeInMinutes': '21',
'EpisodeNumber': '21',
'Genres': ['Animation'],
'ReleaseDate': '2005-02-05',
'LanguageOfMetadata': 'EN',
'Languages._Key': ['CC', 'Primary'],
'Languages.Value': ['en', 'EN'],
'Products.URL': ['http://www.hulu.com/watch/217566', 'http://www.hulu.com/d/217566'],
'Products.Rating': ['TV-Y', 'TV-Y'],
'Products.Currency': ['USD', 'USD'],
'Products.SUBSCRIPTION': ['0.00', '0.00'],
'Products._Key': ['US', 'DE'],
'ReleaseYear': '2005',
'TVSeriesID': '5638#TVSeries',
'Type': 'TVEpisode',
'Studio': '4K Media'
}]
Run Code Online (Sandbox Code Playgroud)
换句话说,无论何时遇到dict,都需要转换为字符串,数字或列表.
我目前所拥有的是下面的内容,它使用while循环遍历json的所有子路径.
while True:
for key in copy(keys):
val = get_sub_object_from_path(obj, key)
if isinstance(val, dict):
FLAT_OBJ[key.replace('/', '.')] = val
else:
keys.extend(os.path.join(key, _nextkey) for _nextkey in val.keys())
keys.remove(key)
if (not keys) or (n > 5):
break
else:
n += 1
continue
Run Code Online (Sandbox Code Playgroud)
您可以使用生成器的递归:
from collections import defaultdict
_d = [{'RuntimeInMinutes': '21', 'EpisodeNumber': '21', 'Genres': ['Animation'], 'ReleaseDate': '2005-02-05', 'LanguageOfMetadata': 'EN', 'Languages': [{'_Key': 'CC', 'Value': ['en']}, {'_Key': 'Primary', 'Value': ['EN']}], 'Products': [{'URL': 'http://www.hulu.com/watch/217566', 'Rating': 'TV-Y', 'Currency': 'USD', 'SUBSCRIPTION': '0.00', '_Key': 'US'}, {'URL': 'http://www.hulu.com/d/217566', 'Rating': 'TV-Y', 'Currency': 'USD', 'SUBSCRIPTION': '0.00', '_Key': 'DE'}], 'ReleaseYear': '2005', 'TVSeriesID': '5638#TVSeries', 'Type': 'TVEpisode', 'Studio': '4K Media'}]
def get_vals(d, _path = []):
for a, b in getattr(d, 'items', lambda :{})():
if isinstance(b, list) and all(isinstance(i, dict) or isinstance(i, list) for i in b):
for c in b:
yield from get_vals(c, _path+[a])
elif isinstance(b, dict):
yield from get_vals(b, _path+[a])
else:
yield ['.'.join(_path+[a]), b]
results = [i for b in _d for i in get_vals(b)]
_c = defaultdict(list)
for a, b in results:
_c[a].append(b)
result = [{a:list(b) if len(b) > 1 else b[0] for a, b in _c.items()}]
import json
print(json.dumps(result, indent=4))
Run Code Online (Sandbox Code Playgroud)
输出:
[
{
"RuntimeInMinutes": "21",
"EpisodeNumber": "21",
"Genres": [
"Animation"
],
"ReleaseDate": "2005-02-05",
"LanguageOfMetadata": "EN",
"Languages._Key": [
"CC",
"Primary"
],
"Languages.Value": [
[
"en"
],
[
"EN"
]
],
"Products.URL": [
"http://www.hulu.com/watch/217566",
"http://www.hulu.com/d/217566"
],
"Products.Rating": [
"TV-Y",
"TV-Y"
],
"Products.Currency": [
"USD",
"USD"
],
"Products.SUBSCRIPTION": [
"0.00",
"0.00"
],
"Products._Key": [
"US",
"DE"
],
"ReleaseYear": "2005",
"TVSeriesID": "5638#TVSeries",
"Type": "TVEpisode",
"Studio": "4K Media"
}
]
Run Code Online (Sandbox Code Playgroud)
编辑:外部函数包装解决方案:
def flatten_obj(data):
def get_vals(d, _path = []):
for a, b in getattr(d, 'items', lambda :{})():
if isinstance(b, list) and all(isinstance(i, dict) or isinstance(i, list) for i in b):
for c in b:
yield from get_vals(c, _path+[a])
elif isinstance(b, dict):
yield from get_vals(b, _path+[a])
else:
yield ['.'.join(_path+[a]), b]
results = [i for b in data for i in get_vals(b)]
_c = defaultdict(list)
for a, b in results:
_c[a].append(b)
return [{a:list(b) if len(b) > 1 else b[0] for a, b in _c.items()}]
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
205 次 |
| 最近记录: |