PostgreSQL物化视图Materialized View
·
PostgreSQL 物化视图(Materialized View)完整指南
一、什么是物化视图
物化视图:把视图的查询结果真实存储到磁盘,查询时直接读取结果,不需要重新执行底层查询。
普通视图 vs 物化视图
普通视图 (VIEW)
↓
仅保存 SQL 定义,每次查询都重新执行底层 SQL
↓
实时性强,但慢
物化视图 (MATERIALIZED VIEW)
↓
保存 SQL 定义 + 实际数据
↓
查询快(直接读取),需要手动/定时刷新数据
| 特性 | 普通视图 | 物化视图 |
|---|---|---|
| 存储数据 | ❌ 不存 | ✅ 存储 |
| 查询性能 | 慢(每次重算) | 快(读已有数据) |
| 数据实时性 | ✅ 实时 | ❌ 取决于刷新频率 |
| 占用空间 | 几乎 0 | 与数据量正相关 |
| 支持索引 | ❌(依赖底层表) | ✅ 可建独立索引 |
| 适用场景 | 简单封装、权限隔离 | 报表、复杂聚合、汇总 |
二、基础语法
创建
CREATE MATERIALIZED VIEW [IF NOT EXISTS] 视图名
AS <SELECT 查询>
[WITH [NO] DATA];
-- WITH DATA(默认):创建时立即填充数据
-- WITH NO DATA:仅创建结构,后续手动刷新
刷新
-- 全量刷新(默认,会锁表)
REFRESH MATERIALIZED VIEW 视图名;
-- 并发刷新(不锁表,但要求有唯一索引)
REFRESH MATERIALIZED VIEW CONCURRENTLY 视图名;
-- 仅刷新结构,清空数据
REFRESH MATERIALIZED VIEW 视图名 WITH NO DATA;
删除与修改
DROP MATERIALIZED VIEW [IF EXISTS] 视图名 [CASCADE];
-- 改名
ALTER MATERIALIZED VIEW 旧名 RENAME TO 新名;
-- 修改属主
ALTER MATERIALIZED VIEW 视图名 OWNER TO 新用户;
三、完整入门示例
准备表
CREATE TABLE sales (
id BIGSERIAL PRIMARY KEY,
product_id INT,
customer_id INT,
amount NUMERIC(10,2),
sale_date DATE,
region VARCHAR(20)
);
-- 插入 100 万条测试数据
INSERT INTO sales (product_id, customer_id, amount, sale_date, region)
SELECT
(random()*1000)::INT,
(random()*10000)::INT,
(random()*1000)::NUMERIC(10,2),
NOW() - (random() * interval '730 days'),
(ARRAY['华北','华东','华南','西南','西北'])[1+(random()*4)::INT]
FROM generate_series(1, 1000000);
创建物化视图
-- 按日期 + 地区聚合的销售汇总
CREATE MATERIALIZED VIEW mv_daily_region_sales AS
SELECT
sale_date,
region,
COUNT(*) AS order_count,
SUM(amount) AS total_amount,
AVG(amount) AS avg_amount,
MIN(amount) AS min_amount,
MAX(amount) AS max_amount
FROM sales
GROUP BY sale_date, region;
-- 查询变得非常快
SELECT * FROM mv_daily_region_sales
WHERE sale_date = '2026-04-01';
性能对比
-- 直接查源表(每次都聚合 100 万行)
EXPLAIN ANALYZE
SELECT region, SUM(amount)
FROM sales
WHERE sale_date = '2026-04-01'
GROUP BY region;
-- 耗时: ~80 ms
-- 查物化视图(直接读已聚合的结果)
EXPLAIN ANALYZE
SELECT region, total_amount
FROM mv_daily_region_sales
WHERE sale_date = '2026-04-01';
-- 耗时: ~0.5 ms,提升 150 倍
四、刷新策略
1. 全量刷新(默认)
REFRESH MATERIALIZED VIEW mv_daily_region_sales;
特点:
- ✅ 简单可靠
- ❌ 加 ACCESS EXCLUSIVE 锁,刷新期间无法查询
- ❌ 数据量大时耗时长
适用:夜间任务、查询量小的时段。
2. 并发刷新(CONCURRENTLY)⭐
-- 必须先有唯一索引
CREATE UNIQUE INDEX idx_mv_daily_region
ON mv_daily_region_sales (sale_date, region);
-- 并发刷新
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_daily_region_sales;
特点:
- ✅ 刷新期间不阻塞查询
- ✅ 用户体验好
- ❌ 必须有 UNIQUE 索引
- ❌ 速度比 REFRESH 慢一些(要做 DIFF)
- ❌ 临时占用约 2 倍空间
适用:生产环境,需要 7x24 可查询。
3. 增量刷新(PG 原生不支持,需扩展或自己实现)
PostgreSQL 官方版本不支持原生的增量刷新。常见替代方案:
方案 A:用扩展 pg_ivm(推荐)
-- 安装扩展(PG 13+)
CREATE EXTENSION pg_ivm;
-- 创建增量维护物化视图
SELECT create_immv('mv_daily_sales', $$
SELECT sale_date, region, SUM(amount) AS total
FROM sales
GROUP BY sale_date, region
$$);
-- 之后源表的 INSERT/UPDATE/DELETE 会自动维护视图
INSERT INTO sales (...) VALUES (...);
SELECT * FROM mv_daily_sales; -- 数据已自动更新
方案 B:触发器手动实现
-- 创建汇总表(手动维护)
CREATE TABLE summary_sales (
sale_date DATE,
region VARCHAR(20),
total NUMERIC,
PRIMARY KEY (sale_date, region)
);
-- INSERT 触发器
CREATE OR REPLACE FUNCTION trg_summary_insert()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO summary_sales (sale_date, region, total)
VALUES (NEW.sale_date, NEW.region, NEW.amount)
ON CONFLICT (sale_date, region)
DO UPDATE SET total = summary_sales.total + NEW.amount;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER tr_sales_insert
AFTER INSERT ON sales
FOR EACH ROW EXECUTE FUNCTION trg_summary_insert();
五、定时刷新方案
方案 1:操作系统 crontab
# 每天凌晨 2:00 刷新
0 2 * * * psql -U postgres -d mydb -c \
"REFRESH MATERIALIZED VIEW CONCURRENTLY mv_daily_region_sales;"
# 每 10 分钟刷新
*/10 * * * * psql -U postgres -d mydb -c \
"REFRESH MATERIALIZED VIEW CONCURRENTLY mv_realtime_dashboard;"
方案 2:pg_cron 扩展(数据库内调度)
-- 安装
CREATE EXTENSION pg_cron;
-- 每小时刷新一次
SELECT cron.schedule(
'refresh-sales-mv',
'0 * * * *',
$$REFRESH MATERIALIZED VIEW CONCURRENTLY mv_daily_region_sales$$
);
-- 查看任务
SELECT * FROM cron.job;
-- 删除任务
SELECT cron.unschedule('refresh-sales-mv');
方案 3:触发器异步通知 + 后台进程
-- 源表变更通知队列
CREATE TABLE mv_refresh_queue (
id BIGSERIAL PRIMARY KEY,
mv_name TEXT,
queued_at TIMESTAMP DEFAULT NOW(),
processed_at TIMESTAMP
);
-- 触发器在源表写入时入队
CREATE OR REPLACE FUNCTION enqueue_mv_refresh()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO mv_refresh_queue (mv_name) VALUES ('mv_daily_region_sales')
ON CONFLICT DO NOTHING;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER tr_sales_change
AFTER INSERT OR UPDATE OR DELETE ON sales
FOR EACH STATEMENT EXECUTE FUNCTION enqueue_mv_refresh();
-- 后台进程消费队列(应用层)
六、索引与查询优化
1. 物化视图可独立建索引(核心优势)
-- 在物化视图上建索引
CREATE INDEX idx_mv_date ON mv_daily_region_sales (sale_date);
CREATE INDEX idx_mv_region ON mv_daily_region_sales (region);
CREATE INDEX idx_mv_amount ON mv_daily_region_sales (total_amount DESC);
-- 还可以建 GIN/GiST/部分索引
CREATE INDEX idx_mv_recent ON mv_daily_region_sales (sale_date)
WHERE sale_date >= '2026-01-01';
2. 物化视图也可被查询计划器优化
EXPLAIN SELECT * FROM mv_daily_region_sales
WHERE sale_date >= '2026-04-01' AND region = '华东';
-- 会用上 idx_mv_date 和 idx_mv_region
七、查询元数据
-- 查询数据库中所有物化视图
SELECT schemaname, matviewname, matviewowner,
ispopulated, definition
FROM pg_matviews;
-- 查询大小
SELECT
matviewname,
pg_size_pretty(pg_relation_size(schemaname||'.'||matviewname)) AS size
FROM pg_matviews;
-- 查询最后刷新时间(需要自己记录,PG 不自动维护)
-- 创建元数据表
CREATE TABLE mv_metadata (
mv_name TEXT PRIMARY KEY,
last_refresh TIMESTAMP
);
-- 包装刷新过程
CREATE OR REPLACE PROCEDURE refresh_mv(mv TEXT)
LANGUAGE plpgsql AS $$
BEGIN
EXECUTE 'REFRESH MATERIALIZED VIEW CONCURRENTLY ' || mv;
INSERT INTO mv_metadata (mv_name, last_refresh)
VALUES (mv, NOW())
ON CONFLICT (mv_name)
DO UPDATE SET last_refresh = NOW();
END;
$$;
-- 使用
CALL refresh_mv('mv_daily_region_sales');
八、实战场景
场景 1:BI 报表加速
-- 销售月报
CREATE MATERIALIZED VIEW mv_monthly_report AS
SELECT
date_trunc('month', sale_date) AS month,
region,
product_category,
COUNT(DISTINCT customer_id) AS customer_count,
COUNT(*) AS order_count,
SUM(amount) AS revenue,
AVG(amount) AS avg_order_value
FROM sales s
JOIN products p ON s.product_id = p.id
GROUP BY 1, 2, 3;
CREATE UNIQUE INDEX idx_mv_monthly
ON mv_monthly_report (month, region, product_category);
-- 每天凌晨 1 点刷新
SELECT cron.schedule('mv_monthly', '0 1 * * *',
$$REFRESH MATERIALIZED VIEW CONCURRENTLY mv_monthly_report$$);
场景 2:实时 Dashboard(5 分钟级)
CREATE MATERIALIZED VIEW mv_realtime_dashboard AS
SELECT
date_trunc('hour', created_at) AS hour,
COUNT(*) AS orders,
SUM(amount) AS revenue,
COUNT(DISTINCT user_id) AS active_users
FROM orders
WHERE created_at >= NOW() - INTERVAL '24 hours'
GROUP BY 1;
CREATE UNIQUE INDEX idx_mv_realtime ON mv_realtime_dashboard (hour);
-- 每 5 分钟刷新
SELECT cron.schedule('mv_realtime', '*/5 * * * *',
$$REFRESH MATERIALIZED VIEW CONCURRENTLY mv_realtime_dashboard$$);
场景 3:缓存复杂 JOIN 结果
-- 6 张表 JOIN 的复杂查询,每次执行 10 秒
CREATE MATERIALIZED VIEW mv_customer_360 AS
SELECT
c.id AS customer_id,
c.name,
c.email,
a.address,
SUM(o.amount) AS total_spent,
COUNT(o.id) AS order_count,
MAX(o.created_at) AS last_order_date,
AVG(r.score) AS avg_rating,
array_agg(DISTINCT p.category) AS preferred_categories
FROM customers c
LEFT JOIN addresses a ON a.customer_id = c.id
LEFT JOIN orders o ON o.customer_id = c.id
LEFT JOIN reviews r ON r.customer_id = c.id
LEFT JOIN order_items oi ON oi.order_id = o.id
LEFT JOIN products p ON p.id = oi.product_id
GROUP BY c.id, c.name, c.email, a.address;
CREATE UNIQUE INDEX idx_mv_c360 ON mv_customer_360 (customer_id);
CREATE INDEX idx_mv_c360_spent ON mv_customer_360 (total_spent DESC);
场景 4:缓存外部数据查询
-- 通过 postgres_fdw 查询远程数据库
CREATE MATERIALIZED VIEW mv_remote_user_stats AS
SELECT user_id, COUNT(*) AS activities
FROM remote_db.user_activities -- 远程表,跨网络慢
WHERE created_at >= NOW() - INTERVAL '30 days'
GROUP BY user_id;
-- 本地查询变快,且降低远程库压力
九、何时该用,何时不该用
✅ 推荐使用
- 复杂聚合:COUNT/SUM/AVG over millions of rows
- 多表 JOIN:复杂关联查询(5+ 表)
- 报表/BI 系统:可接受一定延迟
- 重复查询:相同的复杂查询被高频执行
- 跨库查询:FDW 查询外部数据源
❌ 不推荐使用
- OLTP 实时查询:要求秒级一致性
- 数据变更极频繁:刷新成本 > 查询收益
- 简单查询:底层查询本身就快
- 空间敏感场景:会增加存储成本
十、监控与运维
监控刷新性能
-- 创建监控表
CREATE TABLE mv_refresh_log (
id BIGSERIAL PRIMARY KEY,
mv_name TEXT,
started_at TIMESTAMP,
finished_at TIMESTAMP,
duration_sec NUMERIC,
error_msg TEXT
);
-- 包装刷新过程
CREATE OR REPLACE PROCEDURE safe_refresh_mv(mv TEXT)
LANGUAGE plpgsql AS $$
DECLARE
v_start TIMESTAMP := NOW();
v_err TEXT;
BEGIN
BEGIN
EXECUTE format('REFRESH MATERIALIZED VIEW CONCURRENTLY %I', mv);
INSERT INTO mv_refresh_log (mv_name, started_at, finished_at, duration_sec)
VALUES (mv, v_start, NOW(), EXTRACT(EPOCH FROM (NOW() - v_start)));
EXCEPTION WHEN OTHERS THEN
GET STACKED DIAGNOSTICS v_err = MESSAGE_TEXT;
INSERT INTO mv_refresh_log (mv_name, started_at, finished_at, error_msg)
VALUES (mv, v_start, NOW(), v_err);
RAISE;
END;
END;
$$;
-- 查看最近刷新情况
SELECT mv_name, MAX(finished_at) AS last_refresh,
AVG(duration_sec) AS avg_duration
FROM mv_refresh_log
WHERE finished_at >= NOW() - INTERVAL '7 days'
GROUP BY mv_name;
找出值得物化的查询
-- 启用 pg_stat_statements 后查询
SELECT
LEFT(query, 100) AS sql,
calls,
ROUND(mean_exec_time::numeric, 2) AS avg_ms,
ROUND(total_exec_time::numeric, 2) AS total_ms
FROM pg_stat_statements
WHERE mean_exec_time > 1000 -- 平均 > 1 秒
AND calls > 100 -- 调用 > 100 次
ORDER BY total_exec_time DESC
LIMIT 20;
-- 这些 SQL 是物化视图的最佳候选
十一、常见陷阱
1. 忘记创建唯一索引
-- ❌ 报错: cannot refresh materialized view "..." concurrently
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_name;
-- ✅ 必须先建唯一索引
CREATE UNIQUE INDEX idx_unique ON mv_name (col1, col2);
2. WITH NO DATA 后忘记刷新
CREATE MATERIALIZED VIEW mv ... WITH NO DATA;
SELECT * FROM mv;
-- ❌ 报错: materialized view "mv" has not been populated
-- ✅ 先刷新
REFRESH MATERIALIZED VIEW mv;
3. 并发刷新性能下降
-- CONCURRENTLY 内部做 DIFF,数据量大时慢
-- 如果允许短暂锁表,业务低峰期可考虑普通 REFRESH
REFRESH MATERIALIZED VIEW mv_name; -- 快但锁表
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_name; -- 慢但不锁
4. 物化视图嵌套刷新顺序
-- 如果 mv_b 依赖 mv_a
-- 必须先刷 mv_a 再刷 mv_b
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_a;
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_b;
5. 长事务阻塞刷新
-- 监控阻塞刷新的事务
SELECT pid, usename, query_start, state, query
FROM pg_stat_activity
WHERE wait_event_type = 'Lock'
OR state = 'idle in transaction';
十二、物化视图 vs 其他方案对比
| 方案 | 实时性 | 查询性能 | 复杂度 | 维护成本 |
|---|---|---|---|---|
| 普通视图 | ✅ 实时 | ❌ 慢 | ⭐ | ⭐ |
| 物化视图 | ❌ 延迟 | ✅ 快 | ⭐⭐ | ⭐⭐ |
| 触发器维护汇总表 | ✅ 准实时 | ✅ 快 | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ |
| 应用层缓存(Redis) | ❌ 延迟 | ✅ 极快 | ⭐⭐⭐ | ⭐⭐⭐ |
| 数据仓库(ClickHouse) | ❌ 延迟 | ✅ 极快 | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ |
十三、PostgreSQL 17 新特性预告
PG 17 在物化视图方面有几个改进点(截至我的知识范围):
pg_dump选项优化:支持仅导出物化视图定义- MAINTAIN 权限:可单独授权刷新权限,无需全部 OWNER
-- PG 17+
GRANT MAINTAIN ON mv_name TO some_user;
具体新版本请以官方 Release Notes 为准。
一句话总结
物化视图 = 视图 + 持久化存储,是 PostgreSQL 加速复杂查询和报表的核心工具。
用法套路:
- 创建:
CREATE MATERIALIZED VIEW ... AS SELECT ...- 加唯一索引(必备,为了 CONCURRENTLY 刷新)
- 定时刷新:
pg_cron+REFRESH MATERIALIZED VIEW CONCURRENTLY- 建查询索引(加速访问)
适用场景:复杂聚合、多表 JOIN、BI 报表、跨库查询。不适合实时性要求高的 OLTP。如果需要原生增量刷新,可以用 pg_ivm 扩展。
openEuler 是由开放原子开源基金会孵化的全场景开源操作系统项目,面向数字基础设施四大核心场景(服务器、云计算、边缘计算、嵌入式),全面支持 ARM、x86、RISC-V、loongArch、PowerPC、SW-64 等多样性计算架构
更多推荐


所有评论(0)