Han*_*non 5 sql-server azure-sql-database cross-apply json sql-server-2019
我正在尝试优化从 REST API 获得的值的提取,该 API 在数组中返回 json 值。
这是一个最小、完整且可验证的示例,它准确地反映了我在做什么。
USE tempdb;
DROP TABLE IF EXISTS dbo.json_test;
CREATE TABLE dbo.json_test
(
json_test_id int NOT NULL
IDENTITY(1,1)
, some_uniqueidentifier uniqueidentifier NULL
, some_varchar varchar(100) NULL
, the_json nvarchar(max) NULL
);
INSERT INTO dbo.json_test (some_uniqueidentifier, some_varchar, the_json)
SELECT
some_uniqueidentifier = NEWID()
, some_varchar = CONVERT(varchar(100), CRYPT_GEN_RANDOM(64), 1)
, the_json = (
SELECT st.*
FROM sys.tables st
CROSS JOIN sys.tables st2
WHERE st.object_id = t.object_id FOR JSON AUTO
)
FROM sys.tables t;
;WITH src AS
(
SELECT jt.some_uniqueidentifier
, jt.some_varchar
, top_array.[key]
, top_array.[value]
FROM dbo.json_test jt
CROSS APPLY OPENJSON(jt.the_json, N'lax $') top_array
),
src2 AS
(
SELECT src.some_uniqueidentifier
, src.some_varchar
, src.[key]
, src.[value]
, inner_key = inner_array.[key]
, inner_value = inner_array.[value]
FROM src
CROSS APPLY OPENJSON(src.[value], N'lax $') inner_array
)
SELECT src2.some_uniqueidentifier
, src2.some_varchar
, src2.[key]
, [name] = MAX(CASE WHEN src2.[inner_key] = 'name' THEN src2.[inner_value] ELSE NULL END)
, [object_id] = MAX(CASE WHEN src2.[inner_key] = 'object_id' THEN src2.[inner_value] ELSE NULL END)
, [principal_id] = MAX(CASE WHEN src2.[inner_key] = 'principal_id' THEN src2.[inner_value] ELSE NULL END)
, [schema_id] = MAX(CASE WHEN src2.[inner_key] = 'schema_id' THEN src2.[inner_value] ELSE NULL END)
, [parent_object_id] = MAX(CASE WHEN src2.[inner_key] = 'parent_object_id' THEN src2.[inner_value] ELSE NULL END)
, [type] = MAX(CASE WHEN src2.[inner_key] = 'type' THEN src2.[inner_value] ELSE NULL END)
, [type_desc] = MAX(CASE WHEN src2.[inner_key] = 'type_desc' THEN src2.[inner_value] ELSE NULL END)
, [create_date] = MAX(CASE WHEN src2.[inner_key] = 'create_date' THEN src2.[inner_value] ELSE NULL END)
, [modify_date] = MAX(CASE WHEN src2.[inner_key] = 'modify_date' THEN src2.[inner_value] ELSE NULL END)
, [is_ms_shipped] = MAX(CASE WHEN src2.[inner_key] = 'is_ms_shipped' THEN src2.[inner_value] ELSE NULL END)
, [is_published] = MAX(CASE WHEN src2.[inner_key] = 'is_published' THEN src2.[inner_value] ELSE NULL END)
, [is_schema_published] = MAX(CASE WHEN src2.[inner_key] = 'is_schema_published' THEN src2.[inner_value] ELSE NULL END)
, [lob_data_space_id] = MAX(CASE WHEN src2.[inner_key] = 'lob_data_space_id' THEN src2.[inner_value] ELSE NULL END)
, [filestream_data_space_id] = MAX(CASE WHEN src2.[inner_key] = 'filestream_data_space_id' THEN src2.[inner_value] ELSE NULL END)
, [max_column_id_used] = MAX(CASE WHEN src2.[inner_key] = 'max_column_id_used' THEN src2.[inner_value] ELSE NULL END)
, [lock_on_bulk_load] = MAX(CASE WHEN src2.[inner_key] = 'lock_on_bulk_load' THEN src2.[inner_value] ELSE NULL END)
, [uses_ansi_nulls] = MAX(CASE WHEN src2.[inner_key] = 'uses_ansi_nulls' THEN src2.[inner_value] ELSE NULL END)
, [is_replicated] = MAX(CASE WHEN src2.[inner_key] = 'is_replicated' THEN src2.[inner_value] ELSE NULL END)
, [has_replication_filter] = MAX(CASE WHEN src2.[inner_key] = 'has_replication_filter' THEN src2.[inner_value] ELSE NULL END)
, [is_merge_published] = MAX(CASE WHEN src2.[inner_key] = 'is_merge_published' THEN src2.[inner_value] ELSE NULL END)
, [is_sync_tran_subscribed] = MAX(CASE WHEN src2.[inner_key] = 'is_sync_tran_subscribed' THEN src2.[inner_value] ELSE NULL END)
, [has_unchecked_assembly_data] = MAX(CASE WHEN src2.[inner_key] = 'has_unchecked_assembly_data' THEN src2.[inner_value] ELSE NULL END)
, [text_in_row_limit] = MAX(CASE WHEN src2.[inner_key] = 'text_in_row_limit' THEN src2.[inner_value] ELSE NULL END)
, [large_value_types_out_of_row] = MAX(CASE WHEN src2.[inner_key] = 'large_value_types_out_of_row' THEN src2.[inner_value] ELSE NULL END)
, [is_tracked_by_cdc] = MAX(CASE WHEN src2.[inner_key] = 'is_tracked_by_cdc' THEN src2.[inner_value] ELSE NULL END)
, [lock_escalation] = MAX(CASE WHEN src2.[inner_key] = 'lock_escalation' THEN src2.[inner_value] ELSE NULL END)
, [lock_escalation_desc] = MAX(CASE WHEN src2.[inner_key] = 'lock_escalation_desc' THEN src2.[inner_value] ELSE NULL END)
, [is_filetable] = MAX(CASE WHEN src2.[inner_key] = 'is_filetable' THEN src2.[inner_value] ELSE NULL END)
, [is_memory_optimized] = MAX(CASE WHEN src2.[inner_key] = 'is_memory_optimized' THEN src2.[inner_value] ELSE NULL END)
, [durability] = MAX(CASE WHEN src2.[inner_key] = 'durability' THEN src2.[inner_value] ELSE NULL END)
, [durability_desc] = MAX(CASE WHEN src2.[inner_key] = 'durability_desc' THEN src2.[inner_value] ELSE NULL END)
, [temporal_type] = MAX(CASE WHEN src2.[inner_key] = 'temporal_type' THEN src2.[inner_value] ELSE NULL END)
, [temporal_type_desc] = MAX(CASE WHEN src2.[inner_key] = 'temporal_type_desc' THEN src2.[inner_value] ELSE NULL END)
, [history_table_id] = MAX(CASE WHEN src2.[inner_key] = 'history_table_id' THEN src2.[inner_value] ELSE NULL END)
, [is_remote_data_archive_enabled] = MAX(CASE WHEN src2.[inner_key] = 'is_remote_data_archive_enabled' THEN src2.[inner_value] ELSE NULL END)
, [is_external] = MAX(CASE WHEN src2.[inner_key] = 'is_external' THEN src2.[inner_value] ELSE NULL END)
, [history_retention_period] = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period' THEN src2.[inner_value] ELSE NULL END)
, [history_retention_period_unit] = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period_unit' THEN src2.[inner_value] ELSE NULL END)
, [history_retention_period_unit_desc] = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period_unit_desc' THEN src2.[inner_value] ELSE NULL END)
, [is_node] = MAX(CASE WHEN src2.[inner_key] = 'is_node' THEN src2.[inner_value] ELSE NULL END)
, [is_edge] = MAX(CASE WHEN src2.[inner_key] = 'is_edge' THEN src2.[inner_value] ELSE NULL END)
FROM src2
GROUP BY src2.some_uniqueidentifier
, src2.some_varchar
, src2.[key]
ORDER BY src2.some_uniqueidentifier
, src2.some_varchar
, src2.[key];
Run Code Online (Sandbox Code Playgroud)
无论输入表中包含多少行,查询计划都使用几个嵌套循环连接。据推测,这是使用CROSS APPLY运算符的人工制品。为了您的乐趣,我已经设置了一个DB Fiddle。
是否有更有效的方法将数据从 json 格式转换为“真实”的列集?
我使用本地 SQL Server 2019 实例创建了上面的代码,但目标将是 Azure SQL 数据库,因此所有最新和最好的选项都可用。
您应该使用WITH子句直接从 json 数组中提取值,如下所示:
SELECT src.*
, tt.*
FROM json_test src
CROSS APPLY OPENJSON(src.the_json , 'lax $')
WITH (
[name] sysname '$.name'
, [object_id] int '$.object_id'
, [principal_id] int '$.principal_id'
, [schema_id] smallint '$.schema_id'
, [parent_object_id] int '$.parent_object_id'
, [type] char(2) '$.type'
, [type_desc] nvarchar(60) '$.type_desc'
, [create_date] datetime '$.create_date'
, [modify_date] datetime '$.modify_date'
, [is_ms_shipped] bit '$.is_ms_shipped'
, [is_published] bit '$.is_published'
, [is_schema_published] bit '$.is_schema_published'
, [lob_data_space_id] int '$.lob_data_space_id'
, [filestream_data_space_id] int '$.filestream_data_space_id'
, [max_column_id_used] int '$.max_column_id_used'
, [lock_on_bulk_load] bit '$.lock_on_bulk_load'
, [uses_ansi_nulls] bit '$.uses_ansi_nulls'
, [is_replicated] bit '$.is_replicated'
, [has_replication_filter] bit '$.has_replication_filter'
, [is_merge_published] bit '$.is_merge_published'
, [is_sync_tran_subscribed] bit '$.is_sync_tran_subscribed'
, [has_unchecked_assembly_data] bit '$.has_unchecked_assembly_data'
, [text_in_row_limit] int '$.text_in_row_limit'
, [large_value_types_out_of_row] bit '$.large_value_types_out_of_row'
, [is_tracked_by_cdc] bit '$.is_tracked_by_cdc'
, [lock_escalation] tinyint '$.lock_escalation'
, [lock_escalation_desc] nvarchar(60) '$.lock_escalation_desc'
, [is_filetable] bit '$.is_filetable'
, [is_memory_optimized] bit '$.is_memory_optimized'
, [durability] tinyint '$.durability'
, [durability_desc] nvarchar(60) '$.durability_desc'
, [temporal_type] tinyint '$.temporal_type'
, [temporal_type_desc] nvarchar(60) '$.temporal_type_desc'
, [history_table_id] int '$.history_table_id'
, [is_remote_data_archive_enabled] bit '$.is_remote_data_archive_enabled'
, [is_external] bit '$.is_external'
, [history_retention_period] int '$.history_retention_period'
, [history_retention_period_unit] int '$.history_retention_period_unit'
, [history_retention_period_unit_desc] nvarchar(10) '$.history_retention_period_unit_desc'
, [is_node] bit '$.is_node'
, [is_edge] bit '$.is_edge'
) AS tt
Run Code Online (Sandbox Code Playgroud)
这个 Fiddle比较了两种方法的输出,证明结果是相同的。
输出中唯一缺少的列是表值函数key生成的值,该OPENJSON值仅在未WITH指定子句时返回。我的变体的计划只有一个嵌套循环,而且似乎效率更高。
OPENJSON CROSS APPLY 的 Microsoft Docs 示例