道格拉斯-普克算法 - 把一堆复杂的线条变得简单,同时尽量保持原来的样子
flyfish
道格拉斯-普克算法(Douglas-Peucker Algorithm解决的问题其实很日常:把一堆复杂的线条(比如地图上的道路、河流,或者GPS记录的轨迹)变得简单,同时尽量保持原来的样子。
举个例子,假设用GPS记录了一条徒步路线,每走1米就记一个点,最后生成了1000个点的折线。但其实很多相邻的点几乎在一条直线上,完全没必要都保留——存起来占空间,画出来也累赘。这时候这个算法就派上用场了:它能自动删掉那些“多余”的点,比如直线段中间的点,只留下关键的拐点,让线条变简单,但看起来还是你走的那条路。
大概是上世纪70年代初。1972年有个叫乌尔斯·拉默的人先提出了类似思路,1973年道格拉斯和普克两个人又完善了这个方法,所以后来就用他们的名字命名了。
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import io
# 设置中文字体,确保中文正常显示
plt.rcParams["font.family"] = ["SimHei", "sans-serif"]
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
def point_to_segment_dist(point, start, end):
"""计算点到线段的垂直距离"""
if np.allclose(start, end):
return np.linalg.norm(point - start)
# 计算线段的单位向量
line_vec = end - start
line_len = np.linalg.norm(line_vec)
unit_line_vec = line_vec / line_len
# 计算从起点到点的向量
point_vec = point - start
# 计算投影长度
projection_length = np.dot(point_vec, unit_line_vec)
# 如果投影长度超出线段范围,则计算到端点的距离
if projection_length < 0:
return np.linalg.norm(point - start)
elif projection_length > line_len:
return np.linalg.norm(point - end)
# 计算投影点
projection = start + projection_length * unit_line_vec
# 计算点到投影点的距离
return np.linalg.norm(point - projection)
def douglas_peucker(points, epsilon, frames=None):
"""
道格拉斯-普克算法实现,并记录每一步的处理过程用于可视化
参数:
points: 待简化的点集
epsilon: 距离阈值
frames: 存储每一步处理结果的列表
"""
if len(points) < 3:
if frames is not None:
frames.append({
'points': points.copy(),
'start_idx': 0,
'end_idx': len(points) - 1,
'max_dist_idx': None,
'is_terminal': True
})
return points
start_point = points[0]
end_point = points[-1]
# 计算所有中间点到线段的距离
distances = []
for i in range(1, len(points) - 1):
dist = point_to_segment_dist(points[i], start_point, end_point)
distances.append((dist, i))
if not distances:
if frames is not None:
frames.append({
'points': points.copy(),
'start_idx': 0,
'end_idx': len(points) - 1,
'max_dist_idx': None,
'is_terminal': True
})
return np.array([start_point, end_point])
# 找到最大距离的点
max_dist, max_dist_idx = max(distances, key=lambda x: x[0])
# 记录当前步骤
if frames is not None:
frames.append({
'points': points.copy(),
'start_idx': 0,
'end_idx': len(points) - 1,
'max_dist_idx': max_dist_idx if max_dist > epsilon else None,
'is_terminal': max_dist <= epsilon
})
# 如果最大距离大于阈值,则递归处理
if max_dist > epsilon:
left_points = points[:max_dist_idx + 1]
right_points = points[max_dist_idx:]
left_simplified = douglas_peucker(left_points, epsilon, frames)
right_simplified = douglas_peucker(right_points, epsilon, frames)
# 合并结果(去掉重复的分割点)
return np.vstack([left_simplified[:-1], right_simplified])
else:
# 所有点都足够接近线段,直接返回起点和终点
return np.array([start_point, end_point])
def create_frames(points, epsilon):
"""创建算法执行过程的帧列表"""
frames = []
douglas_peucker(points, epsilon, frames)
return frames
def draw_frame(frame, frames, points, epsilon, step_num, total_steps, figsize=(10, 6)):
"""绘制单个帧"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize)
# 左侧子图:当前处理的线段
ax1.set_title("当前处理的线段")
ax1.set_xlabel("X")
ax1.set_ylabel("Y")
ax1.set_xlim(points[:, 0].min() - 1, points[:, 0].max() + 1)
ax1.set_ylim(points[:, 1].min() - 1, points[:, 1].max() + 1)
# 绘制原始曲线(半透明)
ax1.plot(points[:, 0], points[:, 1], 'b-', alpha=0.3, label='原始曲线')
# 绘制当前处理的线段
current_points = frame['points']
start_idx = frame['start_idx']
end_idx = frame['end_idx']
start_point = current_points[start_idx]
end_point = current_points[end_idx]
ax1.plot([start_point[0], end_point[0]], [start_point[1], end_point[1]],
'r-', linewidth=2, label='当前线段')
# 绘制最大距离点
if frame['max_dist_idx'] is not None:
max_dist_idx = frame['max_dist_idx']
max_point = current_points[max_dist_idx]
# 绘制最大距离点
ax1.plot(max_point[0], max_point[1], 'go', markersize=8, label='最远点')
# 计算并绘制垂直线
projection = compute_projection(max_point, start_point, end_point)
ax1.plot([max_point[0], projection[0]], [max_point[1], projection[1]],
'g--', linewidth=1, label='垂直距离')
# 显示距离值
dist = point_to_segment_dist(max_point, start_point, end_point)
mid_x = (max_point[0] + projection[0]) / 2
mid_y = (max_point[1] + projection[1]) / 2
ax1.text(mid_x, mid_y, f'd={dist:.2f}', ha='center', va='bottom',
bbox=dict(facecolor='white', alpha=0.8))
ax1.legend()
# 右侧子图:累积简化结果
ax2.set_title("累积简化结果")
ax2.set_xlabel("X")
ax2.set_ylabel("Y")
ax2.set_xlim(points[:, 0].min() - 1, points[:, 0].max() + 1)
ax2.set_ylim(points[:, 1].min() - 1, points[:, 1].max() + 1)
# 绘制原始曲线(半透明)
ax2.plot(points[:, 0], points[:, 1], 'b-', alpha=0.3, label='原始曲线')
# 收集所有已处理的线段
simplified_points = []
# 从第一步到当前步,收集所有终端节点(即已处理完的线段)
for f in frames[:step_num + 1]:
if f['is_terminal']:
p = f['points']
simplified_points.append(p[0])
simplified_points.append(p[-1])
# 去重并排序(按X坐标)
if simplified_points:
simplified_points = np.array(simplified_points)
_, idx = np.unique(simplified_points[:, 0], return_index=True)
simplified_points = simplified_points[np.sort(idx)]
# 绘制简化后的曲线
ax2.plot(simplified_points[:, 0], simplified_points[:, 1],
'r-', linewidth=2, label='简化曲线')
ax2.plot(simplified_points[:, 0], simplified_points[:, 1],
'ro', markersize=5)
ax2.legend()
# 添加标题和步骤信息
if frame['is_terminal']:
step_text = f"步骤 {step_num + 1}/{total_steps}: 所有点距离 ≤ ε,保留首尾点"
else:
step_text = f"步骤 {step_num + 1}/{total_steps}: 保留最远点,分割曲线"
fig.suptitle(f"道格拉斯-普克算法 (阈值 ε = {epsilon})", fontsize=14)
plt.figtext(0.5, 0.01, step_text, ha="center", fontsize=12)
# 保存当前帧为图像
buf = io.BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
image = Image.open(buf)
plt.close(fig)
return image
def compute_projection(point, start, end):
"""计算点在直线上的投影"""
if np.allclose(start, end):
return start
line_vec = end - start
point_vec = point - start
line_len_sq = np.sum(line_vec ** 2)
# 计算投影系数
t = np.dot(point_vec, line_vec) / line_len_sq
# 限制投影在端点之间
t = max(0, min(1, t))
return start + t * line_vec
def create_gif(points, epsilon, output_path='douglas_peucker.gif', duration=1000):
"""创建道格拉斯-普克算法执行过程的GIF动画"""
# 生成所有帧
frames = create_frames(points, epsilon)
# 绘制每一帧并保存为GIF
images = []
for i, frame in enumerate(frames):
image = draw_frame(frame, frames, points, epsilon, i, len(frames))
images.append(image)
# 保存为GIF
images[0].save(
output_path,
save_all=True,
append_images=images[1:],
duration=duration,
loop=0 # 0表示无限循环
)
print(f"GIF动画已保存至: {output_path}")
return output_path
# 示例:创建一个带噪声的正弦曲线并生成GIF
if __name__ == "__main__":
# 生成示例数据
np.random.seed(42) # 设置随机种子,确保结果可重现
x = np.linspace(0, 10, 100)
y = np.sin(x) + np.random.normal(0, 0.3, size=len(x)) # 添加随机噪声
points = np.column_stack([x, y])
# 设置阈值
epsilon = 0.5
# 创建GIF动画
create_gif(points, epsilon, output_path='douglas_peucker.gif', duration=1000)