Pydantic 完整指南
目录
概述
Pydantic 是一个现代的 Python 数据验证库,它使用 Python 类型注解来验证数据。它主要解决以下问题:
- ✅ 数据验证:确保数据符合预期格式
- ✅ 类型转换:自动将数据转换为正确的类型
- ✅ 错误处理:提供清晰的错误信息
- ✅ 序列化:将对象转换为 JSON、字典等格式
- ✅ 文档生成:自动生成 JSON Schema
核心优势
特性 | 描述 |
---|---|
🚀 高性能 | 底层使用 Rust 实现,比纯 Python 快 50-200 倍 |
🔧 类型安全 | 基于 Python 类型注解,IDE 友好 |
📝 简洁 API | 易学易用的接口设计 |
🔌 可扩展 | 支持自定义验证器和序列化器 |
安装
# 基础安装
pip install pydantic
# 包含邮箱验证功能
pip install pydantic[email]
# 完整安装(包含所有可选依赖)
pip install pydantic[all]
基础用法
创建基本模型
from pydantic import BaseModel
from typing import Optional
from datetime import datetime
class User(BaseModel):
id: int
name: str
email: str
age: Optional[int] = None
created_at: datetime = datetime.now()
# 创建用户实例
user_data = {
"id": "123", # 字符串会自动转换为 int
"name": "张三",
"email": "zhangsan@example.com",
"age": "25" # 字符串会自动转换为 int
}
user = User(**user_data)
print(user.id) # 123 (int 类型)
print(user.name) # 张三
数据序列化
# 转换为字典
user_dict = user.model_dump()
print(user_dict)
# 转换为 JSON
user_json = user.model_dump_json()
print(user_json)
# 只包含特定字段
user_partial = user.model_dump(include={'name', 'email'})
数据验证
内置验证器
from pydantic import BaseModel, Field, validator
from typing import List
class Product(BaseModel):
name: str = Field(..., min_length=1, max_length=100)
price: float = Field(..., gt=0, description="价格必须大于0")
tags: List[str] = Field(default_factory=list)
@validator('name')
def validate_name(cls, v):
if not v.strip():
raise ValueError('产品名称不能为空')
return v.strip()
@validator('price')
def validate_price(cls, v):
if v > 10000:
raise ValueError('价格不能超过10000')
return round(v, 2)
自定义验证器
from pydantic import BaseModel, validator
import re
class UserRegistration(BaseModel):
username: str
password: str
email: str
phone: str
@validator('username')
def validate_username(cls, v):
if len(v) < 3:
raise ValueError('用户名至少3个字符')
if not re.match(r'^[a-zA-Z0-9_]+$', v):
raise ValueError('用户名只能包含字母、数字和下划线')
return v
@validator('password')
def validate_password(cls, v):
if len(v) < 8:
raise ValueError('密码至少8个字符')
if not re.search(r'[A-Z]', v):
raise ValueError('密码必须包含大写字母')
if not re.search(r'[0-9]', v):
raise ValueError('密码必须包含数字')
return v
@validator('phone')
def validate_phone(cls, v):
phone_pattern = r'^1[3-9]\d{9}$'
if not re.match(phone_pattern, v):
raise ValueError('手机号格式不正确')
return v
数据类型
基础类型
from pydantic import BaseModel
from typing import List, Dict, Union, Optional
from datetime import datetime, date
from decimal import Decimal
from uuid import UUID
class DataTypes(BaseModel):
# 基础类型
integer: int
floating: float
string: str
boolean: bool
# 时间类型
datetime_field: datetime
date_field: date
# 集合类型
list_field: List[str]
dict_field: Dict[str, int]
# 特殊类型
uuid_field: UUID
decimal_field: Decimal
union_field: Union[str, int]
optional_field: Optional[str] = None
嵌套模型
from pydantic import BaseModel
from typing import List
class Address(BaseModel):
street: str
city: str
country: str
postal_code: str
class Contact(BaseModel):
email: str
phone: str
class Person(BaseModel):
name: str
age: int
address: Address
contacts: List[Contact]
# 使用嵌套模型
person_data = {
"name": "李四",
"age": 30,
"address": {
"street": "中山路123号",
"city": "北京",
"country": "中国",
"postal_code": "100001"
},
"contacts": [
{"email": "lisi@example.com", "phone": "13800138000"},
{"email": "lisi.work@example.com", "phone": "13900139000"}
]
}
person = Person(**person_data)
高级功能
数据转换器
from pydantic import BaseModel, Field, validator
from typing import List
class TagModel(BaseModel):
tags: List[str]
@validator('tags', pre=True)
def split_tags(cls, v):
if isinstance(v, str):
return [tag.strip() for tag in v.split(',')]
return v
# 自动将逗号分隔的字符串转换为列表
tag_data = {"tags": "python, pydantic, 验证, 数据"}
model = TagModel(**tag_data)
print(model.tags) # ['python', 'pydantic', '验证', '数据']
根验证器
from pydantic import BaseModel, root_validator
class UserAuth(BaseModel):
username: str
password: str
confirm_password: str
@root_validator
def validate_passwords(cls, values):
password = values.get('password')
confirm_password = values.get('confirm_password')
if password != confirm_password:
raise ValueError('密码和确认密码不匹配')
return values
配置类
from pydantic import BaseModel
class User(BaseModel):
name: str
age: int
class Config:
# 验证赋值
validate_assignment = True
# 使用枚举值
use_enum_values = True
# 允许字段别名
allow_population_by_field_name = True
# 禁止额外字段
extra = 'forbid'
# JSON 编码器
json_encoders = {
datetime: lambda v: v.isoformat()
}
实际应用案例
API 请求验证
from pydantic import BaseModel, Field
from typing import Optional
from enum import Enum
class OrderStatus(str, Enum):
PENDING = "pending"
PROCESSING = "processing"
SHIPPED = "shipped"
DELIVERED = "delivered"
CANCELLED = "cancelled"
class CreateOrderRequest(BaseModel):
product_id: int = Field(..., gt=0)
quantity: int = Field(..., ge=1, le=100)
customer_email: str = Field(..., regex=r'^[^@]+@[^@]+\.[^@]+$')
shipping_address: str = Field(..., min_length=10)
notes: Optional[str] = Field(None, max_length=500)
class OrderResponse(BaseModel):
order_id: int
status: OrderStatus
total_amount: float
created_at: datetime
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
配置文件验证
from pydantic import BaseModel, Field
from typing import Optional
class DatabaseConfig(BaseModel):
host: str = Field(default="localhost")
port: int = Field(default=5432, ge=1, le=65535)
username: str
password: str
database: str
class RedisConfig(BaseModel):
host: str = Field(default="localhost")
port: int = Field(default=6379, ge=1, le=65535)
password: Optional[str] = None
db: int = Field(default=0, ge=0, le=15)
class AppConfig(BaseModel):
app_name: str
debug: bool = Field(default=False)
secret_key: str = Field(..., min_length=32)
database: DatabaseConfig
redis: RedisConfig
@validator('secret_key')
def validate_secret_key(cls, v):
if not v.strip():
raise ValueError('密钥不能为空')
return v
# 从字典加载配置
config_data = {
"app_name": "我的应用",
"debug": True,
"secret_key": "your-super-secret-key-here-32-chars-long",
"database": {
"host": "localhost",
"port": 5432,
"username": "user",
"password": "password",
"database": "myapp"
},
"redis": {
"host": "localhost",
"port": 6379,
"db": 0
}
}
config = AppConfig(**config_data)
最佳实践
1. 使用类型注解
# ✅ 好的做法
from typing import List, Optional, Dict
from pydantic import BaseModel
class GoodModel(BaseModel):
items: List[str]
metadata: Optional[Dict[str, str]] = None
# ❌ 避免的做法
class BadModel(BaseModel):
items: list # 不够具体
metadata: dict = {} # 可变默认值
2. 合理使用 Field
from pydantic import BaseModel, Field
class ProductModel(BaseModel):
name: str = Field(..., description="产品名称", example="iPhone 14")
price: float = Field(..., gt=0, description="价格(元)", example=5999.0)
stock: int = Field(0, ge=0, description="库存数量", example=100)
3. 错误处理
from pydantic import ValidationError
try:
user = User(**invalid_data)
except ValidationError as e:
print("验证错误:")
for error in e.errors():
print(f"字段: {error['loc'][0]}")
print(f"错误: {error['msg']}")
print(f"输入值: {error['input']}")
4. 性能优化
from pydantic import BaseModel
class OptimizedModel(BaseModel):
name: str
age: int
class Config:
# 使用更快的 JSON 序列化
json_loads = orjson.loads
json_dumps = orjson.dumps
# 验证时允许重用
copy_on_model_validation = False
# 禁用不必要的验证
validate_assignment = False
总结
Pydantic 是一个强大而灵活的数据验证库,它能够:
- 🔍 自动验证数据类型和格式
- 🔄 智能转换数据类型
- 📋 生成文档和 JSON Schema
- 🚀 提升性能通过 Rust 实现
- 🛡️ 保证安全通过类型检查
无论是构建 API、处理配置文件还是进行数据清洗,Pydantic 都是 Python 开发者的优秀选择。