Соответствие SQL-запросов записи и PromQL-запросов чтения#

Kintsugi использует Pods сервиса collector для выполнения SQL-запросов на получение метрик. Далее для визуального представления метрик ui обращается к kmetrics с PromQL-запросом, возвращающим необходимые серии данных.

В соответствии с данным поведением устанавливается следующее соответствие между SQL-запросом сбора метрик Pods сервиса collector и PromQL-запросом получения значений из timeseries-хранилища.

Сопоставление запросов метрик#

SQL

Название метрики

PromQL

Доступные метки для хранения

Query 1

pg_settings_autovacuum_freeze_max_age

PromQL 1

name datname server

Query 1

pg_settings_autovacuum_max_workers

PromQL 2

name datname server

Query 2

pg_autovacuum_limit

PromQL 4

name datname server

Query 3

pg_vacuum_for_each_table_vacuum_count

PromQL 5

name datname relname schemaname server

Query 3

pg_vacuum_for_each_table_autovacuum_count

PromQL 6

name datname relname schemaname server

Query 4

pg_vacuum_regular

PromQL 7

name datname server

Query 5

pg_vacuum_user

PromQL 8

name datname server

Query 6

pg_vacuum_wraparound

PromQL 9

name datname server

Query 7

pg_stat_bgwriter_buffers_backend

PromQL 10

name datname server

Query 7

pg_stat_bgwriter_buffers_clean

PromQL 11

name datname server

Query 7

pg_stat_bgwriter_buffers_checkpoint

PromQL 12

name datname server

Query 7

pg_stat_bgwriter_checkpoints_req

PromQL 13

name datname server

Query 7

pg_stat_bgwriter_checkpoints_timed

PromQL 14

name datname server

Query 7

pg_stat_bgwriter_checkpoint_write_time

PromQL 15

name datname server

Query 7

pg_stat_bgwriter_checkpoint_sync_time

PromQL 16

name datname server

Query 8

pg_stat_database_temp_bytes

PromQL 17

name datname server

Query 8

pg_stat_database_hit_ratio

PromQL 18

name datname server

Query 8

pg_stat_database_deadlocks

PromQL 19

name datname server

Query 9

pg_dirtypages_dirty

PromQL 20

name datname queryid rolname server

Query 10

pg_stat_archiver_archived_count

PromQL 21

name datname server

Query 10

pg_stat_archiver_failed_count

PromQL 22

name datname server

Query 11

pg_statio_user_tables_heap_hit_ratio

PromQL 23

name datname relname server

Query 11

pg_statio_user_tables_idx_hit_ratio

PromQL 24

name datname relname server

Query 12

pg_stat_statements_total_time_sec

PromQL 25

name datname queryid rolname server

Query 13

pg_stat_statements_rows

PromQL 26

name datname queryid rolname server

Query 14

pg_status_available

PromQL 27

name server

Query 15

pg_tablespace_size

PromQL 28

name spcname server

Query 16

pg_activity_count

PromQL 29

name state server

Query 8

pg_stat_database_checksum_failures

PromQL 30

name datname server

Query 8

pg_stat_database_checksum_conflicts

PromQL 31

name datname server

Query 8

pg_stat_database_tup_returned

PromQL 32

name datname server

Query 8

pg_stat_database_tup_fetched

PromQL 33

name datname server

Query 8

pg_stat_database_tup_inserted

PromQL 34

name datname server

Query 8

pg_stat_database_tup_updated

PromQL 35

name datname server

Query 8

pg_stat_database_tup_deleted

PromQL 36

name datname server

Query 17

pg_stat_replication_total_lag_bytes

PromQL 37

name client_addr server

Query 8

pg_stat_database_xact_commit

PromQL 38

name datname server

Query 8

pg_stat_database_xact_rollback

PromQL 39

name datname server

Место хранения#

Kintsugi хранит все метрики в timeseries-хранилище (системы VictoriaMetrics или TimescaleDB).

В случае применения TimescaleDB сервис collector использует маршрут /write сервиса tsdbmon для сохранения метрик, в ином случае, при работе с VictoriaMetrics collector использует маршрут /import сервиса prometheus.

SQL-запросы получения значений метрик#

SQL Query 1#

SELECT (
        SELECT setting::int
            FROM    pg_settings
            WHERE   name = 'autovacuum_freeze_max_age') autovacuum_freeze_max_age,
        (
        SELECT setting::int
            FROM    pg_settings
            WHERE   name = 'autovacuum_max_workers') autovacuum_max_workers;

SQL Query 2#

SELECT datname, 2147483647 - age(datfrozenxid) AS limit FROM pg_database ORDER BY 2 ASC LIMIT 25;

SQL Query 3#

SELECT schemaname, relname, vacuum_count, autovacuum_count FROM pg_stat_user_tables;

SQL Query 4#

SELECT datname, COUNT(query) AS regular FROM pg_stat_activity WHERE query ~* '^autovacuum*(?:(?!.*\bwraparound\b))' GROUP BY datname ORDER BY 2 DESC LIMIT 25;

SQL Query 5#

SELECT datname, COUNT(query) AS user FROM pg_stat_activity WHERE query ~* '^vacuum' GROUP BY datname ORDER BY 2 DESC LIMIT 25;

SQL Query 6#

SELECT datname, COUNT(query) AS wraparound FROM pg_stat_activity WHERE query ~* '^autovacuum.*to prevent wraparound' GROUP BY datname ORDER BY 2 DESC LIMIT 25;

SQL Query 7#

SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, buffers_backend FROM pg_stat_bgwriter;

SQL Query 8#

SELECT datname, deadlocks, round(100*blks_hit/(blks_hit + blks_read),2) AS hit_ratio, temp_bytes, conflicts, xact_commit, xact_rollback, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted FROM pg_stat_database WHERE datname NOT IN ('template0', 'template1') ORDER BY deadlocks DESC, hit_ratio DESC, temp_bytes DESC, xact_commit+xact_rollback DESC, tup_returned+tup_fetched+tup_inserted+tup_updated+tup_deleted DESC LIMIT 25;

Запрос для PostgreSQL 12+ версии

SELECT datname, deadlocks, round(100*blks_hit/(blks_hit + blks_read),2) AS hit_ratio, temp_bytes, checksum_failures, conflicts, xact_commit, xact_rollback, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted FROM pg_stat_database WHERE datname NOT IN ('template0', 'template1') ORDER BY deadlocks DESC, hit_ratio DESC, temp_bytes DESC, checksum_failures DESC, conflicts DESC, xact_commit+xact_rollback DESC, tup_returned+tup_fetched+tup_inserted+tup_updated+tup_deleted DESC LIMIT 25;

SQL Query 9#

SELECT t2.rolname,t3.datname,queryid::text,(t1.shared_blks_dirtied+t1.local_blks_dirtied) AS dirty FROM pg_stat_statements t1 JOIN pg_roles t2
                 ON t1.userid=t2.oid JOIN pg_database t3 ON t1.dbid=t3.oid ORDER by dirty DESC LIMIT 5;

SQL Query 10#

SELECT archived_count, failed_count FROM pg_stat_archiver;

SQL Query 11#

SELECT relname, COALESCE(ROUND(100*heap_blks_hit/NULLIF((heap_blks_hit + heap_blks_read),0),2),0) AS heap_hit_ratio, COALESCE(ROUND(100*idx_blks_hit/NULLIF((idx_blks_hit + idx_blks_read),0),2),0) AS idx_hit_ratio FROM pg_statio_user_tables ORDER BY heap_hit_ratio DESC, idx_hit_ratio DESC LIMIT 25;

SQL Query 12#

SELECT t2.rolname,t3.datname,total_time/1000 AS total_time_sec,left(query,32) AS query,queryid
          FROM pg_stat_statements t1 JOIN pg_roles t2 ON t1.userid=t2.oid JOIN pg_database t3 ON
            t1.dbid = t3.oid ORDER by total_time DESC LIMIT 5;

Запрос для PostgreSQL 13+ версии:

SELECT t2.rolname,t3.datname,(total_plan_time + total_exec_time)/1000 AS total_time_sec,left(query,32) AS
query,queryid FROM pg_stat_statements t1 JOIN pg_roles t2 ON t1.userid=t2.oid JOIN pg_database t3 ON
t1.dbid = t3.oid ORDER by total_exec_time DESC LIMIT 5;

SQL Query 13#

SELECT t2.rolname,t3.datname,rows,left(query,32) AS query,queryid::text FROM pg_stat_statements t1 JOIN pg_roles t2 ON t1.userid=t2.oid JOIN pg_database t3 ON t1.dbid=t3.oid ORDER by rows DESC LIMIT 5;

SQL Query 14#

SELECT 1 as available;

SQL Query 15#

SELECT spcname, pg_tablespace_size(spcname) AS size FROM pg_tablespace ORDER BY size DESC LIMIT 25;

SQL Query 16#

SELECT state,count(*) FROM pg_stat_activity GROUP BY state;

SQL Query 17#

SELECT client_addr, pg_current_wal_lsn() - replay_lsn AS total_lag_bytes FROM pg_stat_replication;

PromQL-запросы получения метрик#

PromQL-запросы используются сервисом kmetrics для извлечения значений метрик из БД timeseries-хранилища.

PromQL Query 1#

pg_settings_autovacuum_freeze_max_age{$LABELS}

PromQL Query 2#

pg_settings_autovacuum_max_workers{$LABELS}

PromQL Query 3#

pg_settings_block_size{$LABELS}

PromQL Query 4#

"pg_autovacuum_limit_to_limit{$LABELS}

PromQL Query 5#

floor(increase(pg_vacuum_for_each_table_vacuum_count{$LABELS}[30s]))

PromQL Query 6#

floor(increase(pg_vacuum_for_each_table_autovacuum_count{$LABELS}[30s]))

PromQL Query 7#

pg_vacuum_regular{$LABELS}

PromQL Query 8#

pg_vacuum_user{$LABELS}

PromQL Query 9#

pg_vacuum_wraparound{$LABELS}

PromQL Query 10#

rate(pg_stat_bgwriter_buffers_backend{$LABELS}[30s])*pg_settings_block_size

PromQL Query 11#

rate(pg_stat_bgwriter_buffers_clean{$LABELS}[30s])*pg_settings_block_size

PromQL Query 12#

rate(pg_stat_bgwriter_buffers_checkpoint{$LABELS}[30s])*pg_settings_block_size

PromQL Query 13#

ceil(rate(pg_stat_bgwriter_checkpoints_req{$LABELS}[1m]))

PromQL Query 14#

ceil(rate(pg_stat_bgwriter_checkpoints_timed{$LABELS}[1m]))

PromQL Query 15#

rate(pg_stat_bgwriter_checkpoint_write_time{$LABELS}[1m])

PromQL Query 16#

rate(pg_stat_bgwriter_checkpoint_sync_time{$LABELS}[1m])

PromQL Query 17#

sum(rate(pg_stat_database_temp_bytes{$LABELS}[30s]))

PromQL Query 18#

pg_stat_database_blks_hit_ratio{$LABELS}

PromQL Query 19#

irate(pg_stat_database_deadlocks{$LABELS}[30s])

PromQL Query 20#

rate(pg_dirtypages_dirty{$LABELS}[30s])*8192

PromQL Query 21#

ceil(rate(pg_stat_archiver_archived_count{$LABELS}[30s]))

PromQL Query 22#

ceil(rate(pg_stat_archiver_failed_count{$LABELS}[30s]))

PromQL Query 23#

pg_statio_user_tables_heap_hit_ratio{$LABELS}

PromQL Query 24#

pg_statio_user_tables_idx_hit_ratio{$LABELS}

PromQL Query 25#

pg_stat_statements_total_time_sec{datname!~\"template.*\",$LABELS}

PromQL Query 26#

pg_stat_statements_rows{datname!~\"template.*\",$LABELS}

PromQL Query 27#

pg_status_available{$LABELS}

PromQL Query 28#

pg_tablespace_size{$LABELS}

PromQL Query 29#

pg_activity_count{$LABELS}

PromQL Query 30#

Примечание:

Только для PostgreSQL 12+.

irate(pg_stat_database_checksum_failures{$LABELS}[30s])

PromQL Query 31#

irate(pg_stat_database_conflicts{$LABELS}[30s])

PromQL Query 32#

irate(pg_stat_database_tup_returned{$LABELS}[30s])

PromQL Query 33#

irate(pg_stat_database_tup_fetched{$LABELS}[30s])

PromQL Query 34#

irate(pg_stat_database_tup_inserted{$LABELS}[30s])

PromQL Query 35#

irate(pg_stat_database_tup_updated{$LABELS}[30s])

PromQL Query 36#

irate(pg_stat_database_tup_deleted{$LABELS}[30s])

PromQL Query 37#

pg_stat_replication_total_lag_bytes{$LABELS}

PromQL Query 38#

irate(pg_stat_database_xact_commit[30s])

PromQL Query 39#

irate(pg_stat_database_xact_rollback[30s])