我们在Bigquery表的String字段中加载了json blob.我需要在表上创建一个视图(使用标准的sql),它将数组字段提取为一个bigquery数组/重复字段"RECORD"类型(它本身包含一个重复的字段).
这是一个示例记录(json_blob):
{"order_id":"123456","customer_id":"2abcd", "items":[{"line":"1","ref_ids":["66b56e60","9e7ca2b7"],"sku":"1111","amount":40 },{"line":"2","ref_ids":["7777h0","8888j0"],"sku":"2222","amount":10 }]}
Run Code Online (Sandbox Code Playgroud)
我希望最终得到一个具有以下布局的视图:
[
{
"name": "order_id",
"type": "STRING",
"mode": "NULLABLE"
},
{
"mode": "NULLABLE",
"name": "customer_id",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "items",
"type": "RECORD",
"fields": [
{
"mode": "NULLABLE",
"name": "line",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "ref_ids",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "sku",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "amount",
"type": "INTEGER"
}
]
}
]
Run Code Online (Sandbox Code Playgroud)
Json_extract(json_blob,'$ .items')提取项目部分,但是如何将其转换为类型为"RECORD"的bigquery数组,然后可以像普通的bigquery数组/重复STRUCT一样处理?
感谢任何帮助.
Mik*_*ant 10
更粗暴的版本 - 我认为如果需要更容易阅读和修改/调整
#standardSQL
WITH `yourTable` AS (
SELECT '{"order_id":"123456","customer_id":"2abcd", "items":[{"line":"1","ref_ids":["66b56e60","9e7ca2b7"],"sku":"1111","amount":40 },{"line":"2","ref_ids":["7777h0","8888j0"],"sku":"2222","amount":10 }]}' AS json_blob
)
SELECT
JSON_EXTRACT_SCALAR(json_blob, '$.order_id') AS order_id,
JSON_EXTRACT_SCALAR(json_blob, '$.customer_id') AS customer_id,
ARRAY(
SELECT STRUCT(
JSON_EXTRACT_SCALAR(split_items, '$.line') AS line,
SPLIT(REGEXP_REPLACE(JSON_EXTRACT (split_items, '$.ref_ids'), r'[\[\]\"]', '')) AS ref_ids,
JSON_EXTRACT_SCALAR(split_items, '$.sku') AS sku,
JSON_EXTRACT_SCALAR(split_items, '$.amount') AS amount
)
FROM (
SELECT CONCAT('{', REGEXP_REPLACE(split_items, r'^\[{|}\]$', ''), '}') AS split_items
FROM UNNEST(SPLIT(JSON_EXTRACT(json_blob, '$.items'), '},{')) AS split_items
)
) AS items
FROM `yourTable`
Run Code Online (Sandbox Code Playgroud)
截至 2020 年 5 月 1 日,已添加JSON_EXTRACT_ARRAY函数,可用于从 json 检索数组。
#standardSQL
WITH `yourTable` AS (
SELECT '{"order_id":"123456","customer_id":"2abcd", "items":[{"line":"1","ref_ids":["66b56e60","9e7ca2b7"],"sku":"1111","amount":40 },{"line":"2","ref_ids":["7777h0","8888j0"],"sku":"2222","amount":10 }]}' AS json_blob
)
SELECT
json_extract_scalar(json_blob,'$.order_id') AS order_id,
json_extract_scalar(json_blob,'$.customer_id') AS customer_id,
ARRAY(
SELECT
STRUCT(json_extract_scalar(split_items,'$.line') AS line,
ARRAY(SELECT json_extract_scalar(ref_element,'$') FROM UNNEST(json_extract_array(split_items, '$.ref_ids')) ref_element) AS ref_ids,
json_extract_scalar(split_items,'$.sku') AS sku,
json_extract_scalar(split_items,'$.amount') AS amount
)
FROM UNNEST(json_extract_array(json_blob,'$.items')) split_items
) AS items
FROM
`yourTable`
Run Code Online (Sandbox Code Playgroud)
返回:
要仅获取类型查询将是:
#standardSQL
WITH `yourTable` AS (
SELECT '{ "firstName": "John", "lastName" : "doe", "age" : 26, "address" : { "streetAddress": "naist street", "city" : "Nara", "postalCode" : "630-0192" }, "phoneNumbers": [ { "type" : "iPhone", "number": "0123-4567-8888" }, { "type" : "home", "number": "0123-4567-8910" } ]}' AS json_blob
)
SELECT
json_extract_scalar(split_items,'$.type') AS type FROM `yourTable`, UNNEST(json_extract_array(json_blob,'$.phoneNumbers')) split_items
Run Code Online (Sandbox Code Playgroud)
返回:
在撰写本文时,除非在JSON数组中强加硬性限制,否则无法在BigQuery中使用SQL函数来执行此操作。请参阅相关的问题跟踪器项。您的选择是:
这是使用JavaScript UDF的方法:
#standardSQL
CREATE TEMP FUNCTION JsonToItems(input STRING)
RETURNS STRUCT<order_id INT64, customer_id STRING, items ARRAY<STRUCT<line STRING, ref_ids ARRAY<STRING>, sku STRING, amount INT64>>>
LANGUAGE js AS """
return JSON.parse(input);
""";
WITH Input AS (
SELECT '{"order_id":"123456","customer_id":"2abcd", "items":[{"line":"1","ref_ids":["66b56e60","9e7ca2b7"],"sku":"1111","amount":40 },{"line":"2","ref_ids":["7777h0","8888j0"],"sku":"2222","amount":10 }]}' AS json
)
SELECT
JsonToItems(json).*
FROM Input;
Run Code Online (Sandbox Code Playgroud)
如果您确实想尝试不带JavaScript的基于SQL的方法,那么在上述功能请求得到解决之前,这里有些骇人听闻,其中数组元素的数量不得超过10:
#standardSQL
CREATE TEMP FUNCTION JsonExtractRefIds(json STRING) AS (
(SELECT ARRAY_AGG(v IGNORE NULLS)
FROM UNNEST([
JSON_EXTRACT_SCALAR(json, '$.ref_ids[0]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[1]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[2]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[3]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[4]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[5]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[6]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[7]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[8]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[9]')]) AS v)
);
CREATE TEMP FUNCTION JsonToItem(json STRING)
RETURNS STRUCT<line STRING, ref_ids ARRAY<STRING>, sku STRING, amount INT64>
AS (
IF(json IS NULL, NULL,
STRUCT(
JSON_EXTRACT_SCALAR(json, '$.line'),
JsonExtractRefIds(json),
JSON_EXTRACT_SCALAR(json, '$.sku'),
CAST(JSON_EXTRACT_SCALAR(json, '$.amount') AS INT64)
)
)
);
CREATE TEMP FUNCTION JsonToItems(json STRING) AS (
(SELECT AS STRUCT
CAST(JSON_EXTRACT_SCALAR(json, '$.order_id') AS INT64) AS order_id,
JSON_EXTRACT_SCALAR(json, '$.customer_id') AS customer_id,
(SELECT ARRAY_AGG(v IGNORE NULLS)
FROM UNNEST([
JsonToItem(JSON_EXTRACT(json, '$.items[0]')),
JsonToItem(JSON_EXTRACT(json, '$.items[1]')),
JsonToItem(JSON_EXTRACT(json, '$.items[2]')),
JsonToItem(JSON_EXTRACT(json, '$.items[3]')),
JsonToItem(JSON_EXTRACT(json, '$.items[4]')),
JsonToItem(JSON_EXTRACT(json, '$.items[5]')),
JsonToItem(JSON_EXTRACT(json, '$.items[6]')),
JsonToItem(JSON_EXTRACT(json, '$.items[7]')),
JsonToItem(JSON_EXTRACT(json, '$.items[8]')),
JsonToItem(JSON_EXTRACT(json, '$.items[9]'))]) AS v) AS items
)
);
WITH Input AS (
SELECT '{"order_id":"123456","customer_id":"2abcd", "items":[{"line":"1","ref_ids":["66b56e60","9e7ca2b7"],"sku":"1111","amount":40 },{"line":"2","ref_ids":["7777h0","8888j0"],"sku":"2222","amount":10 }]}' AS json
)
SELECT
JsonToItems(json).*
FROM Input;
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
6779 次 |
| 最近记录: |