常用算法概念
1. 传统机器学习算法
SIFT (Scale-Invariant Feature Transform)
- 概念:尺度不变特征变换,提取图像中的关键点和描述符
- 特点:对尺度、旋转、光照变化具有不变性
HOG (Histogram of Oriented Gradients)
- 概念:方向梯度直方图,统计图像局部区域的梯度方向分布
- 特点:对光照变化和阴影有良好鲁棒性,常用于行人检测
SVM (Support Vector Machine)
- 概念:支持向量机,用于分类和回归分析
- 特点:在高维空间中有效,内存使用效率高
2. 深度学习算法
CNN (Convolutional Neural Networks)
- 概念:卷积神经网络,专为图像处理设计的深度学习模型
- 特点:自动学习图像特征,层次化特征提取
ResNet (Residual Networks)
- 概念:残差网络,通过跳跃连接解决深层网络训练问题
- 特点:可训练非常深的网络(超过100层)
MobileNet
- 概念:轻量级CNN架构,使用深度可分离卷积
- 特点:适合移动和嵌入式设备,计算效率高
简单示例
使用OpenCV和SVM进行简单图像分类
import cv2
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
def extract_hog_features(image):
win_size = (64, 64)
block_size = (16, 16)
block_stride = (8, 8)
cell_size = (8, 8)
nbins = 9
hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
features = hog.compute(image)
return features.flatten()
positive_samples = []
negative_samples = []
positive_features = [extract_hog_features(img) for img in positive_samples]
negative_features = [extract_hog_features(img) for img in negative_samples]
X = np.vstack([positive_features, negative_features])
y = np.hstack([np.ones(len(positive_features)), np.zeros(len(negative_features))])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
classifier = svm.SVC(kernel='rbf')
classifier.fit(X_train, y_train)
def predict_image(image):
features = extract_hog_features(image)
prediction = classifier.predict([features])
return prediction[0]
使用PyTorch实现简单CNN图像分类
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
class SimpleCNN(nn.Module):
def __init__(self, num_classes=10):
super(SimpleCNN, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(),
nn.AdaptiveAvgPool2d((4, 4))
)
self.classifier = nn.Sequential(
nn.Linear(128 * 4 * 4, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
model.train()
for epoch in range(num_epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')
def predict(model, image):
model.eval()
with torch.no_grad():
output = model(image.unsqueeze(0))
_, predicted = torch.max(output, 1)
return predicted.item()
使用预训练模型进行图像分类
import torch
from torchvision import models, transforms
from PIL import Image
model = models.resnet18(pretrained=True)
model.eval()
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
image = Image.open('example.jpg')
input_tensor = preprocess(image)
input_batch = input_tensor.unsqueeze(0)
with torch.no_grad():
output = model(input_batch)
probabilities = torch.nn.functional.softmax(output[0], dim=0)
_, predicted_class = torch.max(output, 1)
print(f"Predicted class index: {predicted_class.item()}")