Pydantic

发布于:2025-06-17 ⋅ 阅读:(13) ⋅ 点赞:(0)

Pydantic 完整指南

目录

概述

Pydantic 是一个现代的 Python 数据验证库,它使用 Python 类型注解来验证数据。它主要解决以下问题:

  • 数据验证:确保数据符合预期格式
  • 类型转换:自动将数据转换为正确的类型
  • 错误处理:提供清晰的错误信息
  • 序列化:将对象转换为 JSON、字典等格式
  • 文档生成:自动生成 JSON Schema

核心优势

特性 描述
🚀 高性能 底层使用 Rust 实现,比纯 Python 快 50-200 倍
🔧 类型安全 基于 Python 类型注解,IDE 友好
📝 简洁 API 易学易用的接口设计
🔌 可扩展 支持自定义验证器和序列化器

安装

# 基础安装
pip install pydantic

# 包含邮箱验证功能
pip install pydantic[email]

# 完整安装(包含所有可选依赖)
pip install pydantic[all]

基础用法

创建基本模型

from pydantic import BaseModel
from typing import Optional
from datetime import datetime

class User(BaseModel):
    id: int
    name: str
    email: str
    age: Optional[int] = None
    created_at: datetime = datetime.now()

# 创建用户实例
user_data = {
    "id": "123",  # 字符串会自动转换为 int
    "name": "张三",
    "email": "zhangsan@example.com",
    "age": "25"   # 字符串会自动转换为 int
}

user = User(**user_data)
print(user.id)  # 123 (int 类型)
print(user.name)  # 张三

数据序列化

# 转换为字典
user_dict = user.model_dump()
print(user_dict)

# 转换为 JSON
user_json = user.model_dump_json()
print(user_json)

# 只包含特定字段
user_partial = user.model_dump(include={'name', 'email'})

数据验证

内置验证器

from pydantic import BaseModel, Field, validator
from typing import List

class Product(BaseModel):
    name: str = Field(..., min_length=1, max_length=100)
    price: float = Field(..., gt=0, description="价格必须大于0")
    tags: List[str] = Field(default_factory=list)
    
    @validator('name')
    def validate_name(cls, v):
        if not v.strip():
            raise ValueError('产品名称不能为空')
        return v.strip()
    
    @validator('price')
    def validate_price(cls, v):
        if v > 10000:
            raise ValueError('价格不能超过10000')
        return round(v, 2)

自定义验证器

from pydantic import BaseModel, validator
import re

class UserRegistration(BaseModel):
    username: str
    password: str
    email: str
    phone: str
    
    @validator('username')
    def validate_username(cls, v):
        if len(v) < 3:
            raise ValueError('用户名至少3个字符')
        if not re.match(r'^[a-zA-Z0-9_]+$', v):
            raise ValueError('用户名只能包含字母、数字和下划线')
        return v
    
    @validator('password')
    def validate_password(cls, v):
        if len(v) < 8:
            raise ValueError('密码至少8个字符')
        if not re.search(r'[A-Z]', v):
            raise ValueError('密码必须包含大写字母')
        if not re.search(r'[0-9]', v):
            raise ValueError('密码必须包含数字')
        return v
    
    @validator('phone')
    def validate_phone(cls, v):
        phone_pattern = r'^1[3-9]\d{9}$'
        if not re.match(phone_pattern, v):
            raise ValueError('手机号格式不正确')
        return v

数据类型

基础类型

from pydantic import BaseModel
from typing import List, Dict, Union, Optional
from datetime import datetime, date
from decimal import Decimal
from uuid import UUID

class DataTypes(BaseModel):
    # 基础类型
    integer: int
    floating: float
    string: str
    boolean: bool
    
    # 时间类型
    datetime_field: datetime
    date_field: date
    
    # 集合类型
    list_field: List[str]
    dict_field: Dict[str, int]
    
    # 特殊类型
    uuid_field: UUID
    decimal_field: Decimal
    union_field: Union[str, int]
    optional_field: Optional[str] = None

嵌套模型

from pydantic import BaseModel
from typing import List

class Address(BaseModel):
    street: str
    city: str
    country: str
    postal_code: str

class Contact(BaseModel):
    email: str
    phone: str

class Person(BaseModel):
    name: str
    age: int
    address: Address
    contacts: List[Contact]
    
# 使用嵌套模型
person_data = {
    "name": "李四",
    "age": 30,
    "address": {
        "street": "中山路123号",
        "city": "北京",
        "country": "中国",
        "postal_code": "100001"
    },
    "contacts": [
        {"email": "lisi@example.com", "phone": "13800138000"},
        {"email": "lisi.work@example.com", "phone": "13900139000"}
    ]
}

person = Person(**person_data)

高级功能

数据转换器

from pydantic import BaseModel, Field, validator
from typing import List

class TagModel(BaseModel):
    tags: List[str]
    
    @validator('tags', pre=True)
    def split_tags(cls, v):
        if isinstance(v, str):
            return [tag.strip() for tag in v.split(',')]
        return v

# 自动将逗号分隔的字符串转换为列表
tag_data = {"tags": "python, pydantic, 验证, 数据"}
model = TagModel(**tag_data)
print(model.tags)  # ['python', 'pydantic', '验证', '数据']

根验证器

from pydantic import BaseModel, root_validator

class UserAuth(BaseModel):
    username: str
    password: str
    confirm_password: str
    
    @root_validator
    def validate_passwords(cls, values):
        password = values.get('password')
        confirm_password = values.get('confirm_password')
        
        if password != confirm_password:
            raise ValueError('密码和确认密码不匹配')
        return values

配置类

from pydantic import BaseModel

class User(BaseModel):
    name: str
    age: int
    
    class Config:
        # 验证赋值
        validate_assignment = True
        # 使用枚举值
        use_enum_values = True
        # 允许字段别名
        allow_population_by_field_name = True
        # 禁止额外字段
        extra = 'forbid'
        # JSON 编码器
        json_encoders = {
            datetime: lambda v: v.isoformat()
        }

实际应用案例

API 请求验证

from pydantic import BaseModel, Field
from typing import Optional
from enum import Enum

class OrderStatus(str, Enum):
    PENDING = "pending"
    PROCESSING = "processing"
    SHIPPED = "shipped"
    DELIVERED = "delivered"
    CANCELLED = "cancelled"

class CreateOrderRequest(BaseModel):
    product_id: int = Field(..., gt=0)
    quantity: int = Field(..., ge=1, le=100)
    customer_email: str = Field(..., regex=r'^[^@]+@[^@]+\.[^@]+$')
    shipping_address: str = Field(..., min_length=10)
    notes: Optional[str] = Field(None, max_length=500)

class OrderResponse(BaseModel):
    order_id: int
    status: OrderStatus
    total_amount: float
    created_at: datetime
    
    class Config:
        json_encoders = {
            datetime: lambda v: v.isoformat()
        }

配置文件验证

from pydantic import BaseModel, Field
from typing import Optional

class DatabaseConfig(BaseModel):
    host: str = Field(default="localhost")
    port: int = Field(default=5432, ge=1, le=65535)
    username: str
    password: str
    database: str

class RedisConfig(BaseModel):
    host: str = Field(default="localhost")
    port: int = Field(default=6379, ge=1, le=65535)
    password: Optional[str] = None
    db: int = Field(default=0, ge=0, le=15)

class AppConfig(BaseModel):
    app_name: str
    debug: bool = Field(default=False)
    secret_key: str = Field(..., min_length=32)
    database: DatabaseConfig
    redis: RedisConfig
    
    @validator('secret_key')
    def validate_secret_key(cls, v):
        if not v.strip():
            raise ValueError('密钥不能为空')
        return v

# 从字典加载配置
config_data = {
    "app_name": "我的应用",
    "debug": True,
    "secret_key": "your-super-secret-key-here-32-chars-long",
    "database": {
        "host": "localhost",
        "port": 5432,
        "username": "user",
        "password": "password",
        "database": "myapp"
    },
    "redis": {
        "host": "localhost",
        "port": 6379,
        "db": 0
    }
}

config = AppConfig(**config_data)

最佳实践

1. 使用类型注解

# ✅ 好的做法
from typing import List, Optional, Dict
from pydantic import BaseModel

class GoodModel(BaseModel):
    items: List[str]
    metadata: Optional[Dict[str, str]] = None

# ❌ 避免的做法
class BadModel(BaseModel):
    items: list  # 不够具体
    metadata: dict = {}  # 可变默认值

2. 合理使用 Field

from pydantic import BaseModel, Field

class ProductModel(BaseModel):
    name: str = Field(..., description="产品名称", example="iPhone 14")
    price: float = Field(..., gt=0, description="价格(元)", example=5999.0)
    stock: int = Field(0, ge=0, description="库存数量", example=100)

3. 错误处理

from pydantic import ValidationError

try:
    user = User(**invalid_data)
except ValidationError as e:
    print("验证错误:")
    for error in e.errors():
        print(f"字段: {error['loc'][0]}")
        print(f"错误: {error['msg']}")
        print(f"输入值: {error['input']}")

4. 性能优化

from pydantic import BaseModel

class OptimizedModel(BaseModel):
    name: str
    age: int
    
    class Config:
        # 使用更快的 JSON 序列化
        json_loads = orjson.loads
        json_dumps = orjson.dumps
        # 验证时允许重用
        copy_on_model_validation = False
        # 禁用不必要的验证
        validate_assignment = False

总结

Pydantic 是一个强大而灵活的数据验证库,它能够:

  • 🔍 自动验证数据类型和格式
  • 🔄 智能转换数据类型
  • 📋 生成文档和 JSON Schema
  • 🚀 提升性能通过 Rust 实现
  • 🛡️ 保证安全通过类型检查

无论是构建 API、处理配置文件还是进行数据清洗,Pydantic 都是 Python 开发者的优秀选择。


网站公告

今日签到

点亮在社区的每一天
去签到