图像扭曲增强处理流程
├── 1. 初始化与加载
│ ├── 读取输入图像
│ ├── 检查图像有效性
│ ├── 提取文件名和扩展名
│ └── 获取图像尺寸信息
│
├── 2. 扭曲变换方法
│ ├── 弹性变换(Elastic Transform)
│ │ ├── 生成随机仿射变换矩阵
│ │ ├── 创建复杂位移场(添加随机和周期性扰动)
│ │ └── 应用非线性扭曲
│ │
│ ├── 透视变换(Perspective Transform)
│ │ ├── 随机选择原图四个角点
│ │ ├── 计算透视变换矩阵(增强极端变换概率)
│ │ └── 应用透视扭曲
│ │
│ └── 仿射变换(Affine Transform)
│ ├── 随机缩放(0.4-2.2倍)
│ ├── 随机旋转(-70°至70°)
│ └── 随机剪切(-50°至50°)
│
├── 3. 裁剪与缩放
│ ├── 随机选择缩放因子(1.0-2.5倍原始尺寸)
│ ├── 若图像尺寸不足则直接缩放
│ └── 随机裁剪到目标尺寸
│
├── 4. 随机变换组合
│ ├── 高概率选择弹性变换(50%)
│ ├── 中概率选择透视变换(30%)
│ └── 低概率选择仿射变换(20%)
│
└── 5. 批量生成与保存
├── 创建输出目录
├── 循环生成指定数量的扭曲图像
├── 保存图像到指定路径
└── 每100张输出一次进度信息
安装好包,修改好文件路径后,可直接使用的代码展示:
import cv2
import numpy as np
import os
import random
from typing import Tuple, List, Callable, Dict
#扭曲更强,图片范围1-2倍
class ImageDistortion:
def __init__(self, input_path: str, output_dir: str):
"""
初始化图像扭曲增强器
参数:
input_path: 输入图像路径
output_dir: 输出目录路径
"""
self.input_path = input_path
self.output_dir = output_dir
self.image = self._load_image()
self.filename, self.ext = os.path.splitext(os.path.basename(input_path))
self.height, self.width = self.image.shape[:2]
def _load_image(self) -> np.ndarray:
"""加载输入图像"""
image = cv2.imread(self.input_path)
if image is None:
raise FileNotFoundError(f"无法加载图像: {self.input_path}")
return image
def elastic_transform(self, alpha: float = 200, sigma: float = 8,
alpha_affine: float = 20, random_state: np.random.RandomState = None) -> np.ndarray:
"""
应用增强的弹性变换
参数:
alpha: 位移场强度
sigma: 位移场平滑度
alpha_affine: 仿射变换强度
random_state: 随机状态
返回:
变换后的图像
"""
if random_state is None:
random_state = np.random.RandomState(None)
shape = self.image.shape
shape_size = shape[:2]
# 增强弹性变换效果
center_square = np.float32(shape_size) // 2
square_size = min(shape_size) // 3
# 随机仿射变换
pts1 = np.float32([center_square + square_size,
[center_square[0] + square_size, center_square[1] - square_size],
center_square - square_size])
pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, size=pts1.shape).astype(np.float32)
M = cv2.getAffineTransform(pts1, pts2)
image = cv2.warpAffine(self.image, M, shape_size[::-1], borderMode=cv2.BORDER_REFLECT_101)
# 创建更复杂的位移场
dx = cv2.GaussianBlur((random_state.rand(*shape_size) * 2 - 1), (0, 0), sigma) * alpha
dy = cv2.GaussianBlur((random_state.rand(*shape_size) * 2 - 1), (0, 0), sigma) * alpha
# 添加周期性扰动增强扭曲效果
grid_size = random_state.randint(2, 6)
x, y = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))
# 添加更强的正弦波扰动
wave_strength = random_state.uniform(10, 30)
wave_freq = random_state.uniform(0.01, 0.07)
dx += np.sin(x * wave_freq) * wave_strength
dy += np.cos(y * wave_freq) * wave_strength
# 添加多个频率的波
if random.random() < 0.5:
wave_strength2 = random_state.uniform(5, 15)
wave_freq2 = random_state.uniform(0.03, 0.1)
dx += np.sin(y * wave_freq2) * wave_strength2
dy += np.cos(x * wave_freq2) * wave_strength2
# 创建网格
map_x = np.float32(x + dx)
map_y = np.float32(y + dy)
# 应用变换
return cv2.remap(image, map_x, map_y, interpolation=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REFLECT_101)
def perspective_transform(self, scale: float = 0.3, tilt_prob: float = 0.7) -> np.ndarray:
"""
应用增强的透视变换
参数:
scale: 变换比例
tilt_prob: 倾斜概率
返回:
变换后的图像
"""
height, width = self.image.shape[:2]
# 原图四个角点
pts1 = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
# 增加极端透视变换的可能性
if random.random() < tilt_prob:
# 强烈倾斜变换
pts2 = np.float32([
[random.uniform(0, width * scale), random.uniform(0, height * scale)],
[width - random.uniform(0, width * scale), random.uniform(0, height * scale)],
[random.uniform(0, width * scale), height - random.uniform(0, height * scale)],
[width - random.uniform(0, width * scale), height - random.uniform(0, height * scale)]
])
else:
# 常规透视变换
pts2 = np.float32([
[random.uniform(0, width * scale * 0.7), random.uniform(0, height * scale * 0.7)],
[width - random.uniform(0, width * scale * 0.7), random.uniform(0, height * scale * 0.7)],
[random.uniform(0, width * scale * 0.7), height - random.uniform(0, height * scale * 0.7)],
[width - random.uniform(0, width * scale * 0.7), height - random.uniform(0, height * scale * 0.7)]
])
# 计算透视变换矩阵
matrix = cv2.getPerspectiveTransform(pts1, pts2)
# 应用变换
return cv2.warpPerspective(self.image, matrix, (width, height),
borderMode=cv2.BORDER_REFLECT_101,
flags=cv2.INTER_CUBIC)
def affine_transform(self, scale_range: Tuple[float, float] = (0.5, 2.0),
rotation_range: Tuple[float, float] = (-60, 60),
shear_range: Tuple[float, float] = (-45, 45)) -> np.ndarray:
"""
应用增强的仿射变换
参数:
scale_range: 缩放范围
rotation_range: 旋转角度范围
shear_range: 剪切角度范围
返回:
变换后的图像
"""
height, width = self.image.shape[:2]
# 随机选择变换参数
scale = random.uniform(*scale_range)
rotation = random.uniform(*rotation_range)
shear_x = random.uniform(*shear_range)
shear_y = random.uniform(*shear_range)
# 计算旋转矩阵
rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), rotation, scale)
# 应用旋转变换
rotated = cv2.warpAffine(self.image, rotation_matrix, (width, height),
borderMode=cv2.BORDER_REFLECT_101,
flags=cv2.INTER_CUBIC)
# 应用剪切变换
shear_matrix = np.float32([
[1, shear_x / 100, 0],
[shear_y / 100, 1, 0]
])
return cv2.warpAffine(rotated, shear_matrix, (width, height),
borderMode=cv2.BORDER_REFLECT_101,
flags=cv2.INTER_CUBIC)
def crop_and_resize(self, image: np.ndarray, scale_factor: float = None) -> np.ndarray:
"""
裁剪并调整图像大小,使最终图像尺寸在原始尺寸的1倍到2.5倍之间
参数:
image: 输入图像
scale_factor: 缩放因子,如果为None则随机选择
返回:
裁剪并调整大小后的图像
"""
if scale_factor is None:
# 在1.0到2.5倍之间随机选择缩放因子
scale_factor = random.uniform(1.0, 2.5)
h, w = image.shape[:2]
# 计算目标尺寸
target_h = int(self.height * scale_factor)
target_w = int(self.width * scale_factor)
# 如果图像尺寸小于目标尺寸,直接调整大小
if h < target_h or w < target_w:
return cv2.resize(image, (target_w, target_h), interpolation=cv2.INTER_CUBIC)
# 随机裁剪
y = random.randint(0, h - target_h)
x = random.randint(0, w - target_w)
cropped = image[y:y + target_h, x:x + target_w]
return cropped
def apply_random_distortion(self) -> np.ndarray:
"""
应用随机选择的扭曲变换
返回:
变换后的图像
"""
# 增加弹性变换的概率,因其效果更丰富
distortion_methods = ['elastic'] * 5 + ['perspective'] * 3 + ['affine'] * 2
distortion_method = random.choice(distortion_methods)
if distortion_method == 'elastic':
# 随机调整弹性变换参数
alpha = random.uniform(150, 250)
sigma = random.uniform(5, 10)
alpha_affine = random.uniform(15, 30)
distorted = self.elastic_transform(alpha, sigma, alpha_affine)
elif distortion_method == 'perspective':
# 随机调整透视变换参数
scale = random.uniform(0.2, 0.4)
tilt_prob = random.uniform(0.6, 0.9)
distorted = self.perspective_transform(scale, tilt_prob)
else: # affine
# 随机调整仿射变换参数
scale_range = (random.uniform(0.4, 0.8), random.uniform(1.5, 2.2))
rotation_range = (random.uniform(-70, -30), random.uniform(30, 70))
shear_range = (random.uniform(-50, -20), random.uniform(20, 50))
distorted = self.affine_transform(scale_range, rotation_range, shear_range)
# 应用裁剪和缩放
return self.crop_and_resize(distorted)
def generate_and_save(self, count: int) -> None:
"""
生成指定数量的扭曲图像并保存到输出目录
参数:
count: 要生成的图像数量
"""
# 确保输出目录存在
os.makedirs(self.output_dir, exist_ok=True)
print(f"开始生成 {count} 张扭曲图像...")
for i in range(count):
# 应用随机扭曲
distorted_image = self.apply_random_distortion()
# 构建输出文件名
output_filename = f"{self.filename}_distorted_{i:04d}{self.ext}"
output_path = os.path.join(self.output_dir, output_filename)
# 保存图像
cv2.imwrite(output_path, distorted_image)
# 每生成100张图像打印一次进度
if (i + 1) % 100 == 0:
print(f"已生成 {i + 1}/{count} 张图像")
print(f"所有 {count} 张扭曲图像已保存到: {self.output_dir}")
def main():
# 输入图像路径
input_image_path = r"E:\project1\photo01\0_defect_1.bmp"
# 确保路径中的目录分隔符正确
input_image_path = input_image_path.replace('\\', '/')
# 输出目录(与输入图像在同一目录)
output_directory = os.path.dirname(input_image_path)
# 创建图像扭曲增强器实例
enhancer = ImageDistortion(input_image_path, output_directory)
# 生成并保存3000张扭曲图像
enhancer.generate_and_save(3000)
if __name__ == "__main__":
main()