跳到主要内容

合并数据

学习如何使用 JOIN、公用表表达式(CTE)和高级查询结构来合并多个数据源的数据。

核心概念

JOIN

连接多个表的数据

-- 内连接(Inner Join)(最常用)
SELECT e.name, d.department_name
FROM employees e
JOIN departments d ON e.dept_id = d.id;

涵盖:Inner、Left、Right、Full Outer、Semi、Anti 和 AsOf 连接

公用表表达式(Common Table Expressions, CTE)

使用 WITH 子句构建复杂查询

-- 将复杂逻辑拆分为多个步骤
WITH high_performers AS (
SELECT * FROM employees WHERE rating > 4.0
)
SELECT department, COUNT(*)
FROM high_performers
GROUP BY department;

涵盖:基本 CTE、递归 CTE、物化 CTE

高级组合

子查询

-- 相关子查询
SELECT name, salary,
(SELECT AVG(salary) FROM employees e2
WHERE e2.department = e1.department) as dept_avg
FROM employees e1;

-- EXISTS 子句
SELECT * FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.id
);

集合操作

-- 合并多个查询的结果
SELECT name FROM employees WHERE department = 'Sales'
UNION
SELECT name FROM contractors WHERE active = true;

-- 其他集合操作
INTERSECT -- 仅共同行
EXCEPT -- 第一个查询有而第二个查询没有的行

实践模式

数据增强

-- 向主表添加查找数据
WITH region_lookup AS (
SELECT zip_code, region_name
FROM zip_regions
)
SELECT
c.customer_name,
c.zip_code,
r.region_name,
SUM(o.amount) as total_sales
FROM customers c
LEFT JOIN region_lookup r ON c.zip_code = r.zip_code
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.customer_name, c.zip_code, r.region_name;

层级数据

-- 用于组织结构的递归 CTE
WITH RECURSIVE org_chart AS (
-- 基本情况:顶级管理者
SELECT id, name, manager_id, 1 as level
FROM employees WHERE manager_id IS NULL

UNION ALL

-- 递归情况:添加直接下属
SELECT e.id, e.name, e.manager_id, o.level + 1
FROM employees e
JOIN org_chart o ON e.manager_id = o.id
)
SELECT * FROM org_chart ORDER BY level, name;