JavaScript系列(83)--正则表达式高级详解

发布于:2025-02-24 ⋅ 阅读:(15) ⋅ 点赞:(0)

JavaScript 正则表达式高级详解 🎯

正则表达式是处理文本的强大工具,掌握其高级特性可以让我们更高效地处理复杂的文本匹配和处理任务。让我们深入探讨JavaScript中正则表达式的高级应用。

正则表达式基础回顾 🌟

💡 小知识:正则表达式由模式和标志组成。模式定义了要匹配的文本规则,标志决定了匹配的行为(如是否区分大小写、是否全局匹配等)。

高级匹配模式 📊

// 1. 前瞻和后顾
class LookAroundPatterns {
    static examples = {
        // 正向前瞻
        positiveAhead: /\d+(?=px)/g,  // 匹配后面跟着"px"的数字
        
        // 负向前瞻
        negativeAhead: /\d+(?!px)/g,  // 匹配后面不是"px"的数字
        
        // 正向后顾
        positiveBehind: /(?<=\$)\d+/g,  // 匹配前面是"$"的数字
        
        // 负向后顾
        negativeBehind: /(?<!\$)\d+/g  // 匹配前面不是"$"的数字
    };
    
    static test() {
        const text = '100px 200em $300 400';
        
        console.log('Numbers followed by px:', 
            text.match(this.examples.positiveAhead));
        
        console.log('Numbers not followed by px:',
            text.match(this.examples.negativeAhead));
        
        console.log('Numbers with $ prefix:',
            text.match(this.examples.positiveBehind));
        
        console.log('Numbers without $ prefix:',
            text.match(this.examples.negativeBehind));
    }
}

// 2. 条件匹配
class ConditionalPatterns {
    static patterns = {
        // 基于捕获组的条件
        ifGroup: /(\d+)?(?:(?(1)\w+|[A-Z]+))/,
        
        // 基于前瞻的条件
        ifLookahead: /\d+(?(?=px)px|em)/,
        
        // 嵌套条件
        nestedCondition: /(\d+)?(?:(?(1)\w+(?(?=px)px|em)|[A-Z]+))/
    };
    
    static validate(input) {
        return Object.entries(this.patterns).map(([name, pattern]) => ({
            name,
            matches: pattern.test(input)
        }));
    }
}

// 3. 命名捕获组
class NamedGroups {
    static parseDate(dateString) {
        const pattern = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/;
        const match = dateString.match(pattern);
        
        if (match) {
            const { year, month, day } = match.groups;
            return new Date(year, month - 1, day);
        }
        
        return null;
    }
    
    static parseURL(url) {
        const pattern = /^(?<protocol>https?):\/\/(?<domain>[^\/]+)(?<path>\/.*)?$/;
        const match = url.match(pattern);
        
        return match ? match.groups : null;
    }
}

正则表达式优化 ⚡

// 1. 性能优化
class RegexOptimization {
    static optimizePattern(pattern) {
        // 避免回溯灾难
        const optimized = pattern
            .replace(/\w+/, '\\w+?')  // 使用非贪婪匹配
            .replace(/([^])+/, '$1+?')  // 避免无限回溯
            .replace(/\s+/, '\\s+?');   // 优化空白匹配
            
        return new RegExp(optimized);
    }
    
    static measurePerformance(pattern, text, iterations = 1000) {
        const start = performance.now();
        
        for (let i = 0; i < iterations; i++) {
            pattern.test(text);
        }
        
        return performance.now() - start;
    }
    
    static comparePatterns(patterns, text) {
        return patterns.map(pattern => ({
            pattern: pattern.toString(),
            time: this.measurePerformance(pattern, text)
        }));
    }
}

// 2. 缓存和重用
class RegexCache {
    constructor() {
        this.cache = new Map();
    }
    
    getPattern(pattern) {
        if (this.cache.has(pattern)) {
            return this.cache.get(pattern);
        }
        
        const regex = new RegExp(pattern);
        this.cache.set(pattern, regex);
        return regex;
    }
    
    clearCache() {
        this.cache.clear();
    }
}

// 3. 动态构建
class RegexBuilder {
    constructor() {
        this.parts = [];
    }
    
    addLiteral(text) {
        this.parts.push(RegExp.escape(text));
        return this;
    }
    
    addDigits(length) {
        this.parts.push(`\\d{${length}}`);
        return this;
    }
    
    addWord() {
        this.parts.push('\\w+');
        return this;
    }
    
    addOptional(pattern) {
        this.parts.push(`(?:${pattern})?`);
        return this;
    }
    
    addGroup(name, pattern) {
        this.parts.push(`(?<${name}>${pattern})`);
        return this;
    }
    
    build(flags = '') {
        return new RegExp(this.parts.join(''), flags);
    }
}

实战应用示例 💼

// 1. 表单验证
class FormValidator {
    static patterns = {
        email: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/,
        phone: /^(?:\+?\d{1,3}[-. ]?)?\(?\d{3}\)?[-. ]?\d{3}[-. ]?\d{4}$/,
        password: /^(?=.*[A-Za-z])(?=.*\d)(?=.*[@$!%*#?&])[A-Za-z\d@$!%*#?&]{8,}$/,
        username: /^[a-zA-Z0-9_]{3,16}$/
    };
    
    static validate(type, value) {
        const pattern = this.patterns[type];
        if (!pattern) {
            throw new Error(`Unknown validation type: ${type}`);
        }
        
        return {
            isValid: pattern.test(value),
            type,
            value
        };
    }
    
    static validateAll(data) {
        return Object.entries(data).map(([type, value]) => 
            this.validate(type, value)
        );
    }
}

// 2. 文本解析器
class TextParser {
    static parseMarkdown(text) {
        const patterns = {
            heading: /^#{1,6}\s+(.+)$/gm,
            bold: /\*\*(.+?)\*\*/g,
            italic: /\_(.+?)\_/g,
            link: /\[(.+?)\]\((.+?)\)/g,
            code: /`(.+?)`/g
        };
        
        let html = text;
        
        // 转换标题
        html = html.replace(patterns.heading, (match, content, level) => {
            const count = match.indexOf(' ');
            return `<h${count}>${content}</h${count}>`;
        });
        
        // 转换其他格式
        html = html
            .replace(patterns.bold, '<strong>$1</strong>')
            .replace(patterns.italic, '<em>$1</em>')
            .replace(patterns.link, '<a href="$2">$1</a>')
            .replace(patterns.code, '<code>$1</code>');
            
        return html;
    }
    
    static extractData(text) {
        const patterns = {
            dates: /\b\d{4}-\d{2}-\d{2}\b/g,
            emails: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
            urls: /https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)/g,
            hashtags: /#[a-zA-Z0-9_]+/g
        };
        
        return Object.entries(patterns).reduce((acc, [key, pattern]) => {
            acc[key] = text.match(pattern) || [];
            return acc;
        }, {});
    }
}

// 3. 代码分析器
class CodeAnalyzer {
    static patterns = {
        functions: /function\s+(\w+)\s*\([^)]*\)\s*{/g,
        classes: /class\s+(\w+)(?:\s+extends\s+(\w+))?\s*{/g,
        imports: /import\s+(?:{[^}]+}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
        exports: /export\s+(?:default\s+)?(?:class|function|const|let|var)\s+(\w+)/g
    };
    
    static analyze(code) {
        const analysis = {};
        
        for (const [type, pattern] of Object.entries(this.patterns)) {
            analysis[type] = [];
            let match;
            
            while ((match = pattern.exec(code)) !== null) {
                analysis[type].push({
                    name: match[1],
                    position: match.index,
                    full: match[0]
                });
            }
        }
        
        return analysis;
    }
    
    static findDependencies(code) {
        const imports = [...code.matchAll(this.patterns.imports)];
        
        return imports.map(match => ({
            module: match[1],
            position: match.index
        }));
    }
}

调试与测试 🔍

// 1. 正则表达式测试器
class RegexTester {
    static test(pattern, input) {
        const regex = pattern instanceof RegExp ? pattern : new RegExp(pattern);
        const start = performance.now();
        
        const result = {
            pattern: regex.toString(),
            input,
            matches: [],
            groups: null,
            time: 0
        };
        
        try {
            if (regex.global) {
                let match;
                while ((match = regex.exec(input)) !== null) {
                    result.matches.push({
                        text: match[0],
                        index: match.index,
                        groups: match.groups
                    });
                }
            } else {
                const match = input.match(regex);
                if (match) {
                    result.matches.push({
                        text: match[0],
                        index: match.index,
                        groups: match.groups
                    });
                }
            }
        } catch (error) {
            result.error = error.message;
        }
        
        result.time = performance.now() - start;
        return result;
    }
    
    static visualize(pattern) {
        return pattern.toString()
            .replace(/\((?!\?)/g, '(捕获组 ')
            .replace(/\(\?<(\w+)>/g, '(命名组 $1 ')
            .replace(/\(\?:/g, '(非捕获组 ')
            .replace(/\(\?=/g, '(正向前瞻 ')
            .replace(/\(\?!/g, '(负向前瞻 ')
            .replace(/\(\?<=/g, '(正向后顾 ')
            .replace(/\(\?<!/g, '(负向后顾 ');
    }
}

// 2. 错误处理
class RegexError extends Error {
    constructor(message, pattern) {
        super(message);
        this.name = 'RegexError';
        this.pattern = pattern;
    }
    
    static validate(pattern) {
        try {
            new RegExp(pattern);
            return true;
        } catch (error) {
            throw new RegexError(error.message, pattern);
        }
    }
}

// 3. 单元测试
class RegexTesting {
    static runTests(tests) {
        return tests.map(test => {
            const { pattern, input, expected } = test;
            
            try {
                const regex = new RegExp(pattern);
                const actual = regex.test(input);
                
                return {
                    ...test,
                    passed: actual === expected,
                    actual
                };
            } catch (error) {
                return {
                    ...test,
                    passed: false,
                    error: error.message
                };
            }
        });
    }
    
    static generateTestCases(pattern) {
        // 生成测试用例
        return [
            {
                input: '',
                expected: false,
                description: '空字符串'
            },
            {
                input: 'test',
                expected: pattern.test('test'),
                description: '基本测试'
            },
            // 添加更多边界情况
        ];
    }
}

最佳实践 ⭐

// 1. 模式库
class RegexPatterns {
    static common = {
        email: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/,
        url: /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$/,
        ipv4: /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/,
        date: /^(?:\d{4})-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])$/
    };
    
    static getPattern(name) {
        return this.common[name] || null;
    }
}

// 2. 安全考虑
class RegexSecurity {
    static isSafe(pattern) {
        // 检查是否存在潜在的灾难性回溯
        const dangerous = [
            /^(a+)+$/,  // 嵌套重复
            /^([a-z]+)*$/,  // 无限重复
            /^(a|a)*$/  // 重叠选择
        ];
        
        return !dangerous.some(d => 
            d.toString() === pattern.toString()
        );
    }
    
    static sanitize(pattern) {
        // 添加超时保护
        const timeout = setTimeout(() => {
            throw new Error('Regex timeout');
        }, 1000);
        
        try {
            return pattern;
        } finally {
            clearTimeout(timeout);
        }
    }
}

// 3. 文档生成
class RegexDocumentation {
    static generate(pattern) {
        const docs = {
            pattern: pattern.toString(),
            flags: pattern.flags,
            description: this.describePattern(pattern),
            examples: this.generateExamples(pattern)
        };
        
        return docs;
    }
    
    static describePattern(pattern) {
        const parts = [];
        const str = pattern.toString();
        
        if (str.includes('^')) parts.push('从开始位置匹配');
        if (str.includes('$')) parts.push('匹配到结束位置');
        if (str.includes('(?:')) parts.push('包含非捕获组');
        if (str.includes('(?<')) parts.push('包含命名捕获组');
        if (str.includes('(?=')) parts.push('包含前瞻断言');
        if (str.includes('(?!')) parts.push('包含负向前瞻');
        
        return parts.join(',');
    }
}

结语 📝

正则表达式是一个强大的文本处理工具,掌握其高级特性可以让我们更高效地处理复杂的文本匹配需求。我们学习了:

  1. 高级匹配模式的使用
  2. 正则表达式的优化技巧
  3. 实战应用场景
  4. 调试和测试方法
  5. 安全性考虑
  6. 最佳实践和模式库

💡 学习建议:

  1. 从基础模式开始,逐步掌握高级特性
  2. 注意性能和安全性问题
  3. 多进行实战练习
  4. 建立自己的模式库
  5. 保持代码的可维护性

如果你觉得这篇文章有帮助,欢迎点赞收藏,也期待在评论区看到你的想法和建议!👇

终身学习,共同成长。

咱们下一期见

💻