VBo*_*Cat 5 python django json deserialization json-deserialization
我需要使用一个服务来发送包含 JSON 序列化嵌套结构的 JSON 响应,我想将其反序列化并存储在我的数据库中 - 我的应用程序使用 Django。
业务规则如下:
查询返回的对象总是具有id一个唯一的整数createdAt属性,通常是一个属性和一个updatedAt属性,都带有日期时间数据,然后是其他几个基本类型的属性(int、float、str、datetime 等),以及几个属性可以是另一个对象或对象数组。
如果属性值是一个对象,则父级通过“外键”与它相关联。如果它是一个对象数组,那么我们有两种情况:要么数组的对象通过“外键”与父对象相关联,要么通过“多对多”将父对象与数组的每个成员相关联' 关系。
我需要在我的数据库中镜像每个对象,所以每个模型都有一个id作为主键的字段,但它不是自动生成的,因为真正的 id 将与导入的数据一起提供。
所有这些实体之间的关系已经反映在我的模型模式中。我采用这种方法(镜像数据结构)是因为如果我将接收到的数据展平以将其全部保存到单个表中,则会出现可怕的复制,无视所有数据规范化规则。
对于每个根对象,我需要这样做:
idupdatedAt记录和传入数据的值相同,则可能会跳过更新下面我将复制我从服务接收到的数据的一个非常简化的示例,以及我想在其中存储它的模型。真正的东西比那要庞大得多,而且复杂得多,这就是为什么我非常想学习一种让 ORM 解决问题的方法,如果它能够的话。硬编码整个事情需要永远,除了很容易出错并且如果将来数据模式发生变化会造成维护地狱。
{
"id": 37125965,
"number": "029073432019403",
"idCommunication": "1843768",
"docReceivedAt": {
"date": "2019-12-20 08:46:42"
},
"createdAt": {
"date": "2019-12-20 09:01:14"
},
"updatedAt": {
"date": "2019-12-20 09:01:32"
},
"branch": {
"id": 20,
"name": "REGIONAL OFFICE #3",
"address": "457 Beau St., S\u00e3o Paulo, SP, 08547-003",
"active": true,
"createdAt": {
"date": "2013-02-14 23:12:30"
},
"updatedAt": {
"date": "2019-05-09 13:40:47"
}
},
"modality": {
"id": 1,
"valor": "CITA\u00c7\u00c3O",
"descricao": "CITA\u00c7\u00c3O",
"active": true,
"createdAt": {
"date": "2014-08-29 20:47:56"
},
"updatedAt": {
"date": "2014-08-29 20:47:56"
}
},
"operation": {
"id": 12397740,
"number": "029073432019403",
"startedAt": {
"date": "2019-11-07 22:28:25"
},
"managementType": 27,
"assessmentValue": 5000000,
"createdAt": {
"date": "2019-12-20 09:01:30"
},
"updatedAt": {
"date": "2019-12-20 09:01:30"
},
"operationClass": {
"id": 22,
"name": "A\u00c7\u00c3O RESCIS\u00d3RIA",
"createdAt": {
"date": "2014-02-28 20:24:55"
},
"updatedAt": {
"date": "2014-02-28 20:24:55"
}
},
"evaluator": {
"id": 26798,
"name": "JANE DOE",
"level": 1,
"active": true,
"createdAt": {
"date": "2017-02-22 22:54:04"
},
"updatedAt": {
"date": "2017-03-15 18:03:20"
},
"evaluatorsOffice": {
"id": 7,
"name": "ACME",
"area": 4,
"active": true,
"createdAt": {
"date": "2014-02-28 20:25:16"
},
"updatedAt": {
"date": "2014-02-28 20:25:16"
}
},
"evaluatorsOffice_id": 7
},
"operationClass_id": 22,
"evaluator_id": 26798
},
"folder": {
"id": 16901241,
"singleDocument": false,
"state": 0,
"IFN": "00409504174201972",
"closed": false,
"dataHoraAbertura": {
"date": "2019-12-20 09:01:31"
},
"dataHoraTransicao": {
"date": "2024-12-20 09:01:31"
},
"titulo": "CONTROL FOLDER REF. OP. N. 029073432019403",
"createdAt": {
"date": "2019-12-20 09:01:32"
},
"updatedAt": {
"date": "2019-12-20 09:01:32"
},
"subjects": [
{
"id": 22255645,
"main": true,
"createdAt": {
"date": "2019-12-20 09:01:32"
},
"updatedAt": {
"date": "2019-12-20 09:01:32"
},
"subjectClass": {
"id": 20872,
"name": "SPECIAL RETIREMENT PROCESS",
"active": true,
"regulation": "8.213/91, 53.831/64, 83.080/79, 2.172/97, 1.663/98, 9.711/98, 9.528/97 AND 9.032/95",
"glossary": "SPECIAL RETIREMENT APPLICATION DUE TO HAZARDOUS LABOR CONDITION FOR 15+/20+/25+ YEARS",
"createdAt": {
"date": "2013-10-18 16:22:44"
},
"updatedAt": {
"date": "2013-10-18 16:22:44"
},
"parent": {
"id": 20866,
"name": "RETIREMENT BENEFITS",
"active": true,
"createdAt": {
"date": "2013-10-18 16:22:44"
},
"updatedAt": {
"date": "2013-10-18 16:22:44"
},
"parent": {
"id": 20126,
"name": "SOCIAL SECURITY",
"active": true,
"createdAt": {
"date": "2013-10-18 16:22:42"
},
"updatedAt": {
"date": "2013-10-18 16:22:42"
}
},
"parent_id": 20126
},
"parent_id": 20866
},
"subjectClass_id": 20872
}
],
"person": {
"id": 7318,
"isClient": true,
"isRelated": false,
"name": "SOCSEC CO.",
"createdAt": {
"date": "2013-02-14 23:11:43"
},
"updatedAt": {
"date": "2019-11-18 16:05:07"
}
},
"operation": {
"id": 12397740,
"number": "029073432019403",
"startedAt": {
"date": "2019-11-07 22:28:25"
},
"managementType": 27,
"assessmentValue": 5000000,
"createdAt": {
"date": "2019-12-20 09:01:30"
},
"updatedAt": {
"date": "2019-12-20 09:01:30"
}
},
"section": {
"id": 311,
"name": "PROTOCOL",
"address": "457 Beau St., ground floor, S\u00e3o Paulo, SP, 08547-003",
"active": true,
"management": false,
"onlyDistribution": true,
"createdAt": {
"date": "2013-02-14 23:12:31"
},
"updatedAt": {
"date": "2019-07-05 16:40:34"
},
"branch": {
"id": 20,
"name": "REGIONAL OFFICE #3",
"address": "457 Beau St., S\u00e3o Paulo, SP, 08547-003",
"active": true,
"createdAt": {
"date": "2013-02-14 23:12:30"
},
"updatedAt": {
"date": "2019-05-09 13:40:47"
}
},
"branch_id": 20
},
"person_id": 7318,
"operation_id": 12397740,
"section_id": 311
},
"branch_id": 20,
"modality_id": 1,
"operation_id": 12397740,
"folder_id": 16901241
}
Run Code Online (Sandbox Code Playgroud)
from django.db import models
class Section(models.Model):
id = models.PositiveIntegerField(primary_key=True)
name = models.CharField(max_length=255, null=True)
address = models.CharField(max_length=255, null=True)
active = models.BooleanField(default=True)
management = models.BooleanField(default=False)
onlyDistribution = models.BooleanField(default=False)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
branch = models.ForeignKey('Branch', null=True, on_delete=models.SET_NULL)
class Person(models.Model):
id = models.PositiveIntegerField(primary_key=True)
name = models.CharField(max_length=255, null=True)
isClient = models.BooleanField(default=True)
isRelated = models.BooleanField(default=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
class SubjectClass(models.Model):
id = models.PositiveIntegerField(primary_key=True)
name = models.CharField(max_length=255, null=True)
active = models.BooleanField(default=True)
regulation = models.CharField(max_length=255, null=True)
glossary = models.CharField(max_length=255, null=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
parent = models.ForeignKey('SubjectClass', null=True, on_delete=models.SET_NULL)
class Subject(models.Model):
id = models.PositiveIntegerField(primary_key=True)
main = models.BooleanField(default=False)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
folder = models.ForeignKey('Folder', null=True, on_delete=models.SET_NULL)
subjectClass = models.ForeignKey(SubjectClass, null=True, on_delete=models.SET_NULL)
class Folder(models.Model):
id = models.PositiveIntegerField(primary_key=True)
singleDocument = models.BooleanField(default=False)
state = models.PositiveSmallIntegerField(null=True)
IFN = models.CharField(max_length=31, null=True)
closed = models.BooleanField(default=False)
title = models.CharField(max_length=255, null=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
subjects = models.ManyToManyField(SubjectClass, through=Subject, through_fields=('folder', 'subjectClass'))
interestedEntity = models.ForeignKey(Person, null=True, on_delete=models.SET_NULL)
class EvaluatorsOffice(models.Model):
id = models.PositiveIntegerField(primary_key=True)
name = models.CharField(max_length=255, null=True)
area = models.PositiveSmallIntegerField(null=True)
active = models.BooleanField(default=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
class Evaluator(models.Model):
id = models.PositiveIntegerField(primary_key=True)
name = models.CharField(max_length=255, null=True)
level = models.PositiveSmallIntegerField(null=True)
active = models.BooleanField(default=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
evaluatorsOffice = models.ForeignKey(EvaluatorsOffice, null=True, on_delete=models.SET_NULL)
class OperationClass(models.Model):
id = models.PositiveIntegerField(primary_key=True)
name = models.CharField(max_length=255, null=True)
active = models.BooleanField(default=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
class Operation(models.Model):
id = models.PositiveIntegerField(primary_key=True)
number = models.CharField(max_length=31, null=True)
startedAt = models.DateTimeField(null=True)
managementType = models.PositiveIntegerField(null=True)
assessmentValue = models.PositiveIntegerField(null=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
operationClass = models.ForeignKey(OperationClass, null=True, on_delete=models.SET_NULL)
evaluator = models.ForeignKey(Evaluator, null=True, on_delete=models.SET_NULL)
class Branch(models.Model):
id = models.PositiveIntegerField(primary_key=True)
name = models.CharField(max_length=255, null=True)
address = models.CharField(max_length=255, null=True)
active = models.BooleanField(default=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
class Modality(models.Model):
id = models.PositiveIntegerField(primary_key=True)
value = models.CharField(max_length=255, null=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
class CommunicationRecord(models.Model):
id = models.PositiveIntegerField(primary_key=True)
number = models.CharField(max_length=31, null=True)
idCommunication = models.CharField(max_length=31, null=True)
docReceivedAt = models.DateTimeField(null=True)
createdAt = models.DateTimeField()
updatedAt = models.DateTimeField()
branch = models.ForeignKey(Branch, null=True, on_delete=models.SET_NULL)
modality = models.ForeignKey(Modality, null=True, on_delete=models.SET_NULL)
operation = models.ForeignKey(Operation, null=True, on_delete=models.SET_NULL)
folder = models.ForeignKey(Folder, null=True, on_delete=models.SET_NULL)
Run Code Online (Sandbox Code Playgroud)
我正在尝试遵循Max Malysh I Reinstate Monica的建议,并且我开始研究递归序列化程序:
from django.db.models import Manager, Model, Field, DateTimeField, ForeignKey
from rest_framework.serializers import ModelSerializer
class RecursiveSerializer(ModelSerializer):
manager: Manager
field_dict: dict
def __init__(self, target_manager: Manager, data: dict, **kwargs):
self.manager = target_manager
self.Meta.model = self.manager.model
self.field_dict = {f.name: f for f in self.manager.model._meta.fields}
instance = None
data = self.process_data(data)
pk_name = self.manager.model._meta.pk.name
if pk_name in data:
try:
instance = target_manager.get(pk=data[pk_name])
except target_manager.model.DoesNotExist:
pass
super().__init__(instance, data, **kwargs)
def process_data(self, data: dict):
processed_data = {}
for name, value in data.items():
field: Field = self.field_dict.get(name)
if isinstance(value, dict):
if isinstance(field, ForeignKey):
processed_data[name] = self.__class__(field.related_model.objects, data=value)
continue
elif len(value) == 1 and 'date' in value and isinstance(field, DateTimeField):
processed_data[name] = value['date']
continue
processed_data[name] = value
return processed_data
class Meta:
model: Model = None
fields = '__all__'
Run Code Online (Sandbox Code Playgroud)
然而,它做了一件奇怪的事情:当第一次运行时,针对一个空数据库,它只创建最后一个也是最深嵌套的对象。在第二次运行中,它什么都不做,并返回一个code='unique'验证错误,指出这样的对象已经存在。
现在我必须说我对 Python 和 Django 还是很陌生(我来自 .NET 开发),我在这项任务中面临的困难对我来说开始显得很尴尬。我一直在阅读有关 Django 和 DRF 的文档,这对我的帮助比我预期的要小。然而,我拒绝相信上述语言和框架缺乏执行这种微不足道的操作的资源。所以,如果我因为缺乏我的知识而遗漏了一些非常明显的东西,如果有人教我我在这里似乎不知道的东西,我将不胜感激。
一般来说,为什么我同意DRF 对于这种情况没有用:DRF 定义了一个 API,并且在许多方面类似于视图而不是模型:它定义应该导出数据的哪一部分。它能够支持同一数据结构上的所有 CRUD 操作。相同的数据可能有更多的 API。因此,序列化器与模型分离是正常的。如果第三方包应该成为新 API 的一部分,那么通常也不需要更改模型中的任何内容。您只需要创建和更新(无需读取或删除)。您确认不需要任何复杂的安全限制。
编辑我的更新后的 JSON 和模型代码的主要功能将与您的更加相似。这没有道理。我会写更多的注释并减少对代码的更改,因为这可能会导致模型和 JSON 的无限增长,以解释为什么您会忽略一些错误。
您的作业的重要信息是: 1. 所有多对多关系中“通过”实体的数据都以 JSON 形式呈现(以前从未出现过) 2.changedAt根实体的时间戳会随着嵌套实体的每次更改而更新JSON,包括所有中间实体甚至包括“通过”实体。
from datetime import datetime
from django.db import models
from django.utils import timezone
class UpdateableModel(models.Model):
class Meta:
abstract = True
@classmethod
def creupdate(cls, data: dict, save_main_instance: bool = True, no_optimization=False):
primary_key_name = cls._meta.pk.name
if primary_key_name not in data:
raise ValueError(f'parameter \'data\' must contain \'{primary_key_name}\' key (model\'s primary key).')
try:
instance = cls.objects.get(pk=data[primary_key_name])
at_instance = getattr(instance, 'atualizadoEm', None)
at_data = data.get('atualizadoEm', None)
operation = 'unchanged' if at_instance and at_data and at_instance >= at_data else 'updated'
if operation == 'unchanged' and not no_optimizations:
print(f'{operation} instance {primary_key_name} {instance.pk} from {instance._meta.model}')
return instance
except cls.DoesNotExist:
instance = cls()
operation = 'created'
many_to_many_instances = []
for name, value in data.items():
if isinstance(value, dict):
if len(value) == 1 and 'date' in value:
date_value = datetime.strptime(value['date'], '%Y-%m-%d %H:%M:%S')
if timezone.is_naive(date_value):
date_value = timezone.make_aware(date_value)
new_value = date_value
else:
foreign_key = cls._meta.get_field(name)
foreign_model = foreign_key.related_model
foreign_data: dict = value
foreign_instance = foreign_model.creupdate(foreign_data)
new_value = foreign_instance
elif isinstance(value, list):
remote_field = getattr(instance, name)
obj_ids = []
for remote_data in value:
assert isinstance(remote_data, dict) and remote_field.model._meta.pk.name in remote_data
obj_ids.append(remote_field.model.creupdate(remote_data, False).pk)
many_to_many_instances.append((remote_field, obj_ids))
else:
new_value = value
if operation != 'unchanged':
setattr(instance, name, new_value)
if save_main_instance and operation != 'unchanged':
instance.save()
print(f'{operation} instance {primary_key_name} {instance.pk} from {instance._meta.model}')
for remote_field, obj_ids in many_to_many_instances:
remote_field.add(*obj_ids)
return instance
Run Code Online (Sandbox Code Playgroud)
笔记:
多对多关系经过优化,可以通过一个请求添加所有对象,以在没有任何更改的情况下最大限度地减少保存计数。(它是针对之前的 JSON 结构编写的,没有任何显式的“直通”数据)
添加了断言而不是 try ... except ValueError: pass (or FieldDoesNotExist)。
“错误永远不应该悄无声息地过去。” Python 之禅- 特别是在开发中。(未知through名称与未知正常属性是类似的错误。)
添加了参数“no_optimization”,并让我的逻辑仅对同一实体使用“modifiedAt”,而不是跳过对相关实体的检查。如果发生错误或者 FieldDoesNotExist 错误地忽略更新,则可以稍后通过使用 no_optimization=True 重放数据来更新数据库的状态。如果所有实体都使用时间戳,那么它甚至是幂等的,并且可以以任何随机顺序处理数据,例如通过重复一段时间内存在一些错误的数据。它对于检查您的优化也很有用,您可以通过使用和不使用优化的处理来获得相同的数据库状态 - 例如,通过比较导出的 sql 转储。我的经验是,如果没有准备好替代方法,那么过于依赖时间戳的优化会在很久以后出现问题。
| 归档时间: |
|
| 查看次数: |
2475 次 |
| 最近记录: |