Gav*_*vin 6 postgresql postgresql-9.4 postgresql-9.5
我一直试图找到一种可靠且经济的方法来计算 7 天的行数,到目前为止,这是我能想到的最好方法:
架构
CREATE TABLE IF NOT EXISTS "data" (
"id" SERIAL,
"hash" CHARACTER VARYING(255) NOT NULL,
"source" CHARACTER VARYING(255) NOT NULL,
"isFiltered" BOOLEAN NOT NULL,
"campaignId" INTEGER NOT NULL,
"data" JSON NOT NULL,
"meta" JSON NOT NULL,
"modifiedReason" CHARACTER VARYING(255) NULL DEFAULT NULL,
"createdAt" TIMESTAMP WITH TIME ZONE NOT NULL,
"updatedAt" TIMESTAMP WITH TIME ZONE NOT NULL,
PRIMARY KEY ("id")
);
CREATE TABLE IF NOT EXISTS "campaign" (
"id" SERIAL,
"userId" INTEGER NOT NULL,
"userCompanyId" INTEGER NOT NULL,
"type" CHARACTER VARYING(255) NULL DEFAULT NULL,
"title" CHARACTER VARYING(255) NULL DEFAULT NULL,
"description" CHARACTER VARYING(255) NULL DEFAULT NULL,
"sources" CHARACTER VARYING(255)[] NULL DEFAULT NULL,
"configuration" JSON NOT NULL,
"active" BOOLEAN NULL DEFAULT true,
"excludedUserNames" CHARACTER VARYING(255)[] NULL DEFAULT NULL,
"limit" INTEGER NULL DEFAULT 0,
"startAt" TIMESTAMP WITH TIME ZONE NOT NULL,
"endAt" TIMESTAMP WITH TIME ZONE NOT NULL,
"removedAt" TIMESTAMP WITH TIME ZONE NULL DEFAULT NULL,
"createdAt" TIMESTAMP WITH TIME ZONE NOT NULL,
"updatedAt" TIMESTAMP WITH TIME ZONE NOT NULL,
PRIMARY KEY ("id")
);
Run Code Online (Sandbox Code Playgroud)
要测试的数据
INSERT INTO "campaign" ("id", "userId", "userCompanyId", "type", "title", "description", "sources", "configuration", "active", "excludedUserNames", "limit", "startAt", "endAt", "removedAt", "createdAt", "updatedAt") VALUES
(1, 1, 1, E'test', E'Test', E'Test', E'{a}', E'{"query":{"accounts":[],"hashtags":["GavinTest1234","XYZ"]}}', E'true', NULL, 0, E'2016-01-25 15:06:00+00', E'2016-01-27 23:59:59+00', NULL, E'2016-01-25 15:06:27.474+00', E'2016-01-26 16:48:19.693+00');
INSERT INTO "data" ("id", "hash", "source", "isFiltered", "campaignId", "data", "meta", "modifiedReason", "createdAt", "updatedAt") VALUES
(1, E'dHdpdHRlci02OTE5MTQ3ODcwNjA1ODQ0NDg=', E'a', E'false', 1, E'{}', E'{"profile":{"url":"xxx","image":"xxx","username":"xxx","name":"xxx","createdAt":"2015-10-05T10:30:11.000Z"},"posts":{"total":32,"perDay":0},"friends":0,"favourites":0,"createdAt":"2016-01-26T09:25:15.000Z","matchedOn":{"hashtags":["GavinTest123"],"accounts":[]}}', NULL, E'2016-01-26 09:25:15.539+00', E'2016-01-26 09:25:15.539+00'),
(2, E'dHdpdHRlci02OTE5MjAwNDAwNTcyNzAyNzI=', E'a', E'false', 1, E'{}', E'{"profile":{"url":"xxx","image":"xxx","username":"xxx","name":"xxx","createdAt":"2015-10-05T10:30:11.000Z"},"posts":{"total":34,"perDay":0},"friends":0,"favourites":0,"createdAt":"2016-01-26T09:46:07.000Z","matchedOn":{"hashtags":["GavinTest123"],"accounts":[]}}', NULL, E'2016-01-26 09:46:07.942+00', E'2016-01-26 09:46:07.942+00'),
(3, E'dHdpdHRlci02OTE5NjI4NjM5OTc1NTg3ODQ=', E'a', E'false', 1, E'{}', E'{"profile":{"url":"xxx","image":"xxx","username":"xxx","name":"xxx","createdAt":"2015-10-05T10:30:11.000Z"},"posts":{"total":36,"perDay":0},"friends":0,"favourites":0,"createdAt":"2016-01-26T12:36:17.000Z","matchedOn":{"hashtags":["GavinTest1234"],"accounts":[]}}', NULL, E'2016-01-26 12:36:17.724+00', E'2016-01-26 12:36:17.724+00');
Run Code Online (Sandbox Code Playgroud)
查询
SELECT q."date",
q."hashtag",
Max(q."count") AS "count"
FROM (
SELECT To_char(d::date, 'DD/MM/YYYY') AS "date",
0 AS "count",
c_h.hashtag::text AS "hashtag"
FROM generate_series('2016-01-20', '2016-01-26', '1 day'::interval) d
INNER JOIN "campaign" c
ON (
c."id" = 1)
INNER JOIN Json_array_elements(c.configuration->'query'->'hashtags') c_h(hashtag)
ON true
UNION ALL
SELECT To_char(cdi."createdAt"::date, 'DD/MM/YYYY') AS "date",
Count(cdi_h.hashtag::text) AS "count",
cdi_h.hashtag::text AS "hashtag"
FROM "data" cdi
INNER JOIN "campaign" c
ON (
c."id" = cdi."campaignId")
INNER JOIN json_array_elements(c.configuration->'query'->'hashtags') c_h(hashtag)
ON true
INNER JOIN json_array_elements(cdi.meta->'matchedOn'->'hashtags') cdi_h(hashtag)
ON (
c_h.hashtag::text = cdi_h.hashtag::text)
WHERE c."id" = 1
AND (
cdi."createdAt"::date >= '2016-01-20'
AND cdi."createdAt"::date <= '2016-01-26')
GROUP BY to_char(cdi."createdAt":: date, 'DD/MM/YYYY'),
cdi_h.hashtag::text
ORDER BY "date" ASC,
"hashtag" ASC,
"count" ASC ) q
GROUP BY q."date",
q."hashtag";
Run Code Online (Sandbox Code Playgroud)
结果
因此,正如您从查询中看到的那样,我首先返回一个结果集,其中包含每个日期和主题标签的行。
然后,我想查询我的数据表并返回每天有多少匹配项的计数,覆盖默认值。
查询有效,而且成本并不太高,但是它太丑了,我相信有更好的方法让我看不见。
除了这个,还有没有人有更好的建议,或者我的替代方法是只返回我们在 7 天内拥有的结果(分组和计数)并在服务器端做一些处理。
- 编辑 -
抱歉,SQL Fiddle 只有 9.3 并且在我尝试为大家创建一个 fiddle 时崩溃了。
您的查询可以在多个方面得到简化:
SELECT to_char(day, 'DD/MM/YYYY') AS date
, hashtag
, count(d.*)::int AS count
FROM (
campaign c
CROSS JOIN json_array_elements_text(c.configuration#>'{query,hashtags}') ch(hashtag)
CROSS JOIN (SELECT g::date AS day
FROM generate_series(timestamp '2016-01-20', '2016-01-26', interval '1 day') g) day
)
NATURAL LEFT JOIN (
SELECT "createdAt"::date AS day, dh.hashtag
FROM data, json_array_elements_text(meta#>'{matchedOn,hashtags}') dh(hashtag)
WHERE "campaignId" = 1
AND "createdAt" >= '2016-01-20'
AND "createdAt" < '2016-01-27'
) d
WHERE c.id = 1
GROUP BY day, hashtag
ORDER BY day, hashtag, count;
Run Code Online (Sandbox Code Playgroud)
由于多种原因,这应该更快。尤其是它可以使用多列索引data("campaignId", "createdAt")
- 除非您拥有它,否则您应该创建它。
LEFT JOIN
是您需要的核心功能。然后对列进行计数,仅非空值计数...
顺便说一句,这是 a 有用的极少数情况之一NATURAL JOIN
。不过你不需要它。这只是一个小语法快捷方式。
而是使用timestamp
输入generate_series()
:
归档时间: |
|
查看次数: |
109 次 |
最近记录: |