检测内联、内联压缩和 TOAST 存储

Jam*_*aly 8 postgresql

想象一下,我在 Postgres 13 中有一个像这样的表:

CREATE TABLE public.people (
    id integer PRIMARY KEY,
    full_name character varying(255),
    bio text
);
Run Code Online (Sandbox Code Playgroud)

然后,我插入一行,其中包含足够的字符,以便将 Bio 写入 TOAST 表(4000 个随机字节,应压缩到 > 2Kb):

# insert into people values (1, 'joe toast', (SELECT array_to_string(ARRAY(SELECT chr((65 + round(random() * 25)) :: integer) FROM generate_series(1,4000)), '')));
INSERT 0 1
Run Code Online (Sandbox Code Playgroud)

然后插入一行,其中包含足够的字符用于 Bio Fit 内联(3000 个重复字节,应压缩至 < 2Kb):

# insert into people values (2, 'joe compressed', (SELECT array_to_string(ARRAY(SELECT chr(65) FROM generate_series(1,3000)), '')));
INSERT 0 1
Run Code Online (Sandbox Code Playgroud)

最后在简介中插入一行仅包含几个字符的行,以便它将存储内联(10 个重复字节):

# insert into people values (3, 'joe inline', 'aaaaaaaaaa');
INSERT 0 1
Run Code Online (Sandbox Code Playgroud)

有什么方法可以让我检测每个元组中bio的存储策略吗?我可以报告内联或 TOAST 中行的百分比(“22% 的元组存储内联生物,78% 在 TOAST 中”)吗?

一个相关的问题:我是否可以知道磁盘上按内联、内联压缩和 TOAST 存储分解的元组的字节数?

上下文:我正在使用一个总计超过 10 亿行的分区表,并且我有兴趣了解特定列内联存储与 TOAST 中存储的频率。

研究

我可以获取每个生物在磁盘上的大小,在一种情况下,它显然是内联压缩的大小:

# select id, full_name, pg_column_size(bio) from people order by id;
 id |   full_name    | pg_column_size 
----+----------------+----------------
  1 | joe toast      |           4000
  2 | joe compressed |             44
  3 | joe inline     |             11
(3 rows)
Run Code Online (Sandbox Code Playgroud)

将该大小与未压缩数据的大小进行比较可以告诉我们有关压缩的一些信息,但是它可以告诉我们有关 TOAST 状态的任何信息吗?

# select id, full_name, pg_column_size(bio), length(bio) from people order by id;
 id |   full_name    | pg_column_size | length 
----+----------------+----------------+--------
  1 | joe toast      |           4000 |   4000
  2 | joe compressed |             44 |   3000
  3 | joe inline     |             11 |     10
Run Code Online (Sandbox Code Playgroud)

我可以手动检查 TOAST 表中的一些行:

# select relname from pg_class where oid = (select reltoastrelid from pg_class where relname='people');
    relname     
----------------
 pg_toast_20138

# select chunk_id, sum(length(chunk_data)) from pg_toast.pg_toast_20138 group by chunk_id;
 chunk_id | sum  
----------+------
    20149 | 4000
Run Code Online (Sandbox Code Playgroud)

在一般情况下,以下说法正确吗?

# select id, full_name, pg_column_size(bio), length(bio),
case
  when pg_column_size(bio) < length(bio) then 'inline-compressed'
  when pg_column_size(bio) = length(bio) then 'toast'
  else 
    'inline'
end as storage_strategy
from people order by id;

 id |   full_name    | pg_column_size | length | storage_strategy  
----+----------------+----------------+--------+-------------------
  1 | joe toast      |           4000 |   4000 | toast
  2 | joe compressed |             44 |   3000 | inline-compressed
  3 | joe inline     |             11 |     10 | inline
Run Code Online (Sandbox Code Playgroud)

Sta*_*sev 6

关于方法

  • 它适用于 Little Endian 字节顺序。在某些时候必须使其适用于 Big Endian(告诉我您的系统是否是 Big Endian)
  • out_of_line表示数据存储在TOAST中
  • bytes_on_disk并且uncompressed_bytes 可能包含一些元数据长度(1 或 4 字节),需要有一天对其进行完善。
  • 它使用inner join people,如果您想查看不可见的行(例如已删除但尚未清理),请使用left join people
+--+--------------+------------------+----------+-----------+-------------+
|id|full_name     |uncompressed_bytes|compressed|out_of_line|bytes_on_disk|
+--+--------------+------------------+----------+-----------+-------------+
|1 |joe toast     |4004              |false     |true       |4000         |
|2 |joe compressed|3000              |true      |false      |44           |
|3 |joe inline    |10                |false     |false      |11           |
Run Code Online (Sandbox Code Playgroud)

执行

首先打开pageinspect检查并创建函数以从列元数据中获取信息:

create extension pageinspect;

create or replace function is_toasted(datum_header bytea) returns bool as $$ begin
  return get_byte(datum_header, 0) = 1;
end; $$ language plpgsql;

create or replace function is_1b_meta(datum_header bytea) returns bool as $$ begin
  return not is_toasted(datum_header) and get_byte(datum_header, 0) & 1 > 0;
end; $$ LANGUAGE plpgsql;

create or replace function is_compressed(datum_header bytea) returns bool as $$ begin
  if(is_1b_meta(datum_header)) then
    return false;
  elsif(not is_toasted(datum_header)) then
    return get_byte(datum_header, 0) & 2 > 0;
  else
    return bytes_on_disk(datum_header)+4 != toasted_original_len(datum_header);
  end if;
end; $$ LANGUAGE plpgsql;

create or replace function meta_len(datum_header bytea) returns int as $$begin
  if is_1b_meta(datum_header) then return 1;
  else                        return 4;
  end if;
end;$$ language plpgsql;

create or replace function bytes_on_disk(datum_header bytea) returns int language plpgsql as $$begin
  if(is_1b_meta(datum_header)) then
    return get_byte(datum_header, 0) >> 1;
  elsif(not is_toasted(datum_header)) then
    return (get_byte(datum_header, 0) >> 2)
         | (get_byte(datum_header, 1) << 6)
         | (get_byte(datum_header, 2) << 14)
         | (get_byte(datum_header, 3) << 22);
  else
    return get_byte(datum_header, 6)
         | (get_byte(datum_header, 7) << 8)
         | (get_byte(datum_header, 8) << 16)
         | (get_byte(datum_header, 9) << 24);
  end if;
end;$$;

create or replace function toasted_original_len(datum_header bytea) returns integer language plpgsql as $$ begin
  if(not is_toasted(datum_header)) then
    return get_byte(datum_header, 0) >> 1;--not needed anymore
  else
    return get_byte(datum_header, 2)
         | (get_byte(datum_header, 3) << 8)
         | (get_byte(datum_header, 4) << 16)
         | (get_byte(datum_header, 5) << 24);
  end if;
end;$$;

create or replace function meta_bits(datum_header bytea) returns bit as $$
declare
  len int;
  i int;
  res bit varying(32);
begin
  i = 0;
  res = '';
  len = meta_len(datum_header);
  while i < len loop
    res = res || get_byte(datum_header, i)::bit(8);
    i = i+1;
  end loop;
  return res;
end; $$ language plpgsql;
Run Code Online (Sandbox Code Playgroud)

现在您可以选择某些列([3]表示第 3 列),获取二进制数据并解析标题:

with bits as(
  select t_ctid as ctid,
         (tuple_data_split('people'::regclass, t_data, t_infomask, t_infomask2, t_bits))[3] as bits
  from generate_series(0, (select max((ctid::text::point)[0]::int) from people)) as page,
  lateral heap_page_items(get_raw_page('people', page))
)
select p.id, p.full_name,
       case when is_toasted(bits) then toasted_original_len(bits)
            else                       length(p.bio)
       end as uncompressed_bytes,
       --meta_bits(bits),
       is_compressed(bits) compressed, is_toasted(bits) out_of_line,
       bytes_on_disk(bits)
from bits
inner join people p on p.ctid=bits.ctid;
Run Code Online (Sandbox Code Playgroud)

Postgres 内部

该信息由 Postgres 内部存储的内容确定。varlena(可变长度字段)元数据有 3 个选项(代码文档演示文稿):

  • 1 字节。数据本身是内联的,最大 126 字节。从来没有压缩过。
  • 4字节。数据是内联的,可以压缩也可以不压缩。
  • 18 个字节(第一个字节仅设置了 1 位),数据存储在 TOAST 中。可以压缩也可以不压缩。