Der*_*ney 7 postgresql partitioning
我正在考虑将我所有服务器的日志记录设置到一个集中的 postgresql 数据库中。能够按日期或主机删除日志是有意义的,所以我想用partitioning来设置它,但是一个多层分区:host-service inherits service, host-service-yyyymm inherits host-service.
我已经为特定服务(在这种情况下为 php 错误)制定了一个示例方案,并且正在寻找一些 PostgreSQL 专家来批评它的明显性能瓶颈。
// SET UP MASTER PHP LOG TABLE //
CREATE TABLE php (
log_id int not null,
host char(5),
logdate date not null,
message text
);
// SET UP HOST-SPECIFIC 'PARTITIONS' //
CREATE TABLE host1_php (
CHECK ( host = 'host1' )
) INHERITS (php);
CREATE TABLE host2_php (
CHECK ( host = 'host2' )
) INHERITS (php);
// SET UP HOST-SPECIFIC TIME 'PARTITIONS' //
CREATE TABLE host1_php_2011m12 (
CHECK ( logdate >= DATE '2011-12-01' AND logdate < DATE '2012-01-01' )
) INHERITS (host1_php);
CREATE TABLE host1_php_2012m01 (
CHECK ( logdate >= DATE '2012-01-01' AND logdate < DATE '2012-02-01' )
) INHERITS (host1_php);
CREATE TABLE host2_php_2011m12 (
CHECK ( logdate >= DATE '2011-12-01' AND logdate < DATE '2012-01-01' )
) INHERITS (host2_php);
CREATE TABLE host2_php_2012m01 (
CHECK ( logdate >= DATE '2012-01-01' AND logdate < DATE '2012-02-01' )
) INHERITS (host2_php);
CREATE INDEX host1_php_2011m12_logdate ON host1_php_2011m12 (logdate);
CREATE INDEX host1_php_2012m01_logdate ON host1_php_2012m01 (logdate);
CREATE INDEX host2_php_2011m12_logdate ON host2_php_2011m12 (logdate);
CREATE INDEX host2_php_2012m01_logdate ON host2_php_2012m01 (logdate);
Run Code Online (Sandbox Code Playgroud)
我还将添加诸如 apache 访问/错误之类的服务。
我想我必须触发插入到host_service(利用主机检查约束),然后每个host_service必须触发插入到host_service_yyyymm表中。
对于这种触发器/分区方案,我可以期待什么?
聊天中讨论的一些额外信息:
对于这种触发器/分区方案,我可以期待什么?
上下文切换意味着使用触发器总是比简单的insert
. 下面的脚本可用于量化将产生的影响 - 还演示了使用触发器自动创建分区并比较两种方式的性能。
请注意,我没有包括任何索引,或任何update
陈述的考虑。
begin;
set role dba;
create role stack;
grant stack to dba;
create schema authorization stack;
set role stack;
--
--******** the above creates a nice clean schema as a test area
--
set client_min_messages to warning; --******** or you get a lot of "NOTICE: merging column "xyz" with inherited definition" notices
--
create table phpheap(log_id serial not null, host text not null, logdate date not null, message text not null); --******** This table is used to compare 'insert' performance with that on the partitioned version
create table php(log_id serial not null, host text not null, logdate date not null, message text not null);
--
create function php_host_insert() returns trigger language plpgsql security definer as $$
begin
set search_path to 'stack';
execute 'insert into php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
exception when undefined_table then
execute 'create table php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id int not null, host text not null check(host='''||new.host||'''), logdate date not null check(to_char(logdate, ''YYYYmMM'')='''||to_char(new.logdate, 'YYYYmMM')||'''), message text not null) inherits (php_'||new.host||')';
execute 'insert into php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
end;$$;
--
create function php_insert() returns trigger language plpgsql security definer as $$
begin
set search_path to 'stack';
execute 'insert into php_'||new.host||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
exception when undefined_table then
execute 'create table php_'||new.host||'(log_id int not null, host text not null check(host='''||new.host||'''), logdate date not null, message text not null) inherits(php)';
execute 'create trigger trig_insert_php_'||new.host||' before insert on php_'||new.host||' for each row execute procedure php_host_insert()';
execute 'insert into php_'||new.host||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
end;$$;
--
create trigger trig_insert_php before insert on php for each row execute procedure php_insert();
--
\timing on
insert into phpheap(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--******** output
--INSERT 0 100000
--Time: 1102.140 ms
insert into php(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--******** output
--INSERT 0 0
--Time: 35615.498 ms
insert into php(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--******** output
--INSERT 0 0
--Time: 34074.579 ms
\timing off
--
--******** Now we replace the trigger functions with the 'normal' kind that don't auto-create partitions
--
create or replace function php_host_insert() returns trigger language plpgsql security definer as $$
begin
set search_path to 'stack';
execute 'insert into php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
end;$$;
--
create or replace function php_insert() returns trigger language plpgsql security definer as $$
begin
set search_path to 'stack';
execute 'insert into php_'||new.host||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
end;$$;
--
\timing on
insert into php(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--******** output
--INSERT 0 0
--Time: 28457.146 ms
\timing off
--
rollback;
Run Code Online (Sandbox Code Playgroud)