PHP 拆词搜索(常用于搜索内容)

发布于:2025-04-14 ⋅ 阅读:(19) ⋅ 点赞:(0)
$mkey = $_GET["mkey"];
if ($mkey) {
    $tj_mkey = 'and (xtitle like "%' . $mkey . '%" or danhao like "%' . $mkey . '%" or addr like "%' . $mkey . '%")';
    //分词搜索
    include_once dirname(__FILE__, 3) .'/DetachStr.class.php';
    $Detach = new DetachStr;
    $mkeyArr = $Detach->dualDecom($mkey);
    if ($mkeyArr) {
        $tj_param = ",(";
        foreach ($mkeyArr as $key => $val) {
            if ($key > 0) {
                $tj_param .= "+";
            }
            $tj_param .= "IF(CONCAT_WS(' ', xtitle, danhao, addr) LIKE '%{$val}%', 1, 0)";
        }
        $tj_param .= ") AS keyNum";
        $tj_mkey = "and (xtitle REGEXP '" . implode("|", $mkeyArr) . "' or danhao REGEXP '" . implode("|", $mkeyArr) . "' or addr REGEXP '" . implode("|", $mkeyArr) . "')";
        $tj_sort = "keyNum DESC,";
    }
}

//SQL语句修改
$sql = "select *{$tj_param} from php_jigui where jia=1 $tj_mkey";
$sql .= " order by {$tj_sort}id desc limit $start,$count";

执行结果
 
DetachStr.class.php文件内容

<?php

/**
 * 文本分词类
 * @version 1.0.1
 */
class DetachStr
{
	/**
	 * 分词处理
	 * @param string $text 输入文本
	 * @return array 分词结果数组
	 */
	public function dualDecom($text)
	{
		$result = array();

		// 先处理英文单词、数字和表情符号
		$pattern = '/([a-zA-Z]+)|([0-9]+)|([\x{1F600}-\x{1F64F}\x{1F300}-\x{1F5FF}\x{1F680}-\x{1F6FF}\x{2600}-\x{26FF}])/u';
		preg_match_all($pattern, $text, $matches, PREG_OFFSET_CAPTURE);

		// 保存特殊元素及其位置
		foreach ($matches[0] as $match) {
			$result[] = $match[0];
		}

		// 非中文字符替换为空格
		$text = preg_replace('/[^\x{4e00}-\x{9fa5}]+/u', ' ', $text);

		// 去除连续的空格
		$text = preg_replace('/\s+/', ' ', $text);

		// 按照空格分割文本
		$words = explode(' ', trim($text));
		foreach ($words as $word) {
			$length = mb_strlen($word, 'UTF-8');

			// 只有一个或两个字的词语直接添加
			if ($length == 1 || $length == 2) {
				$result[] = $word;
				continue;
			}

			// 生成2-5个字的中文词语
			for ($i = 0; $i < $length; $i++) {
				for ($j = 2; $j <= 5; $j++) {
					if ($i + $j <= $length) {
						$result[] = mb_substr($word, $i, $j, 'UTF-8');
					}
				}
			}
		}

		// 返回
		return array_unique($result);
	}
}