在nodejs中使用ElasticSearch(一)安装,使用

发布于:2025-02-21 ⋅ 阅读:(14) ⋅ 点赞:(0)

使用docker安装ElasticSearch和Kibana

1)创建相应的data文件夹和子文件夹用来持久化ElasticSearch和kibana数据

2)提前创建好elasticsearch配置文件 data/elasticsearch/config/elasticsearch.yml文件

# ======================== Elasticsearch Configuration =========================
#
# NOTE: Elasticsearch comes with reasonable defaults for most settings.
#       Before you set out to tweak and tune the configuration, make sure you
#       understand what are you trying to accomplish and the consequences.
#
# The primary way of configuring a node is via this file. This template lists
# the most important settings you may want to configure for a production cluster.
#
# Please consult the documentation for further information on configuration options:
# https://www.elastic.co/guide/en/elasticsearch/reference/index.html
#
xpack.security.enabled: false
network.host: 0.0.0.0
# xpack.security.enrollment.enabled: true

# xpack.security.http.ssl.enabled: true
# xpack.security.http.sslkeystore.path: certs/http.p12

# xpack.security.transport.ssl.enabled: true
# xpack.security.transport.ssl.verification_mode: certificate
# xpack.security.transport.ssl.client_authentication: required
# xpack.security.transport.ssl.keystore.path: elastic-certificates.p12
# xpack.security.transport.ssl.truststore.path: elastic-certificates.p12

# 禁用磁盘水位检查
cluster.routing.allocation.disk.watermark.low: 100%
cluster.routing.allocation.disk.watermark.high: 100%
cluster.routing.allocation.disk.watermark.flood_stage: 100%

# 禁用基于磁盘水位的磁盘卸载
cluster.routing.allocation.disk.threshold_enabled: false

# ---------------------------------- Cluster -----------------------------------
#
# Use a descriptive name for your cluster:
#
#cluster.name: my-application
#
# ------------------------------------ Node ------------------------------------
#
# Use a descriptive name for the node:
#
#node.name: node-1
#
# Add custom attributes to the node:
#
#node.attr.rack: r1
#
# ----------------------------------- Paths ------------------------------------
#
# Path to directory where to store the data (separate multiple locations by comma):
#
#path.data: /path/to/data
#
# Path to log files:
#
#path.logs: /path/to/logs
#
# ----------------------------------- Memory -----------------------------------
#
# Lock the memory on startup:
#
#bootstrap.memory_lock: true
#
# Make sure that the heap size is set to about half the memory available
# on the system and that the owner of the process is allowed to use this
# limit.
#
# Elasticsearch performs poorly when the system is swapping the memory.
#
# ---------------------------------- Network -----------------------------------
#
# By default Elasticsearch is only accessible on localhost. Set a different
# address here to expose this node on the network:
#
#network.host: 192.168.0.1
#
# By default Elasticsearch listens for HTTP traffic on the first free port it
# finds starting at 9200. Set a specific HTTP port here:
#
#http.port: 9200
#
# For more information, consult the network module documentation.
#
# --------------------------------- Discovery ----------------------------------
#
# Pass an initial list of hosts to perform discovery when this node is started:
# The default list of hosts is ["127.0.0.1", "[::1]"]
#
#discovery.seed_hosts: ["host1", "host2"]
#
# Bootstrap the cluster using an initial set of master-eligible nodes:
#
#cluster.initial_master_nodes: ["node-1", "node-2"]
#
# For more information, consult the discovery and cluster formation module documentation.
#
# ---------------------------------- Various -----------------------------------
#
# Allow wildcard deletion of indices:
#
#action.destructive_requires_name: false

3)提前创建好kibana配置文件 data/kibana/config/kibana.yml文件,配置为中文在kibana.yml中添加i18n.locale: "zh-CN" 参数

#
# ** THIS IS AN AUTO-GENERATED FILE **
#

# Default Kibana configuration for docker target
server.host: "0.0.0.0"
server.shutdownTimeout: "5s"
elasticsearch.hosts: [ "http://elasticsearch:9200" ]
monitoring.ui.container.elasticsearch.enabled: true
i18n.locale: "zh-CN" # 设置中文

4)创建docker-compose.yml文件,volumes下持久化数据保存到本机的路径可以自定义。

services:
  elasticsearch:
    image: elasticsearch:8.17.2
    container_name: elasticsearch
    environment:
      - ES_JAVA_OPTS=-Xms1g -Xmx1g
      - discovery.type=single-node
      - ELASTICSEARCH_PASSWORD=admin1234 # elastic 用户密码相同
    volumes:
      - D:/VScode/nodejs/intensify/docker/elasticsearch/data/elasticsearch/data:/usr/share/elasticsearch/data
      - D:/VScode/nodejs/intensify/docker/elasticsearch/data/elasticsearch/plugins:/usr/share/elasticsearch/plugins
      - D:/VScode/nodejs/intensify/docker/elasticsearch/data/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
    privileged: true
    networks:
      - elastic-net
    ports:
      - "9200:9200"
      - "9300:9300"

  kibana:
    image: kibana:8.17.2
    container_name: kibana
    environment:
      - ELASTICSEARCH_HOSTS=http://elasticsearch:9200
    volumes:
      - D:/VScode/nodejs/intensify/docker/elasticsearch/data/kibana/data:/usr/share/kibana/data
      - D:/VScode/nodejs/intensify/docker/elasticsearch/data/kibana/config/kibana.yml:/usr/share/kibana/config/kibana.yml
    networks:
      - elastic-net
    ports:
      - "5601:5601"

networks:
  elastic-net:
    driver: bridge

5)在docker-compose.yml所在位置运行下述命令,即可直接访问

docker compose up -d

​​​

分词插件安装

一、analysis-icu分词器 

analysis-icu 插件是基于 ICU(International Components for Unicode) 库的分词插件,主要用于处理 Unicode 字符串,支持多种语言的字符集和复杂的文本处理。ICU 是一个开源的跨平台库,支持国际化和本地化特性,尤其擅长处理多语言环境下的文本。

1)进入到运行的elasticSearch容器中 

docker exec -it containerID sh

 2)在ElasticSearch安装的根目录下,执行安装分词插件,安装完后重启

./bin/elasticsearch-plugin install analysis-icu

3)查看已安装的插件 

./bin/elasticsearch-plugin list

4)删除插件

./bin/elasticsearch-plugin remove analysis-icu

二、analysis-ik分词器 

analysis-ik 是一个基于 IK Analyzer 的中文分词插件,IK 分词器是一个非常流行且高效的中文分词器,它基于词典和规则的方式进行中文分词,尤其适合处理中文文本。IK 分词器的优势是精确度高、性能优秀,特别适合中文文本分析。 

下载地址(注:要和elasticsearch版本一致):Index of: analysis-ik/stable/

下载完后加压直接放在根目录的plugins文件下,然后重启elasticsearch即可生效。(因为之前已经设置过volume持久化了,docker会自动同步到容器中 )

analysis-icu 和 analysis-ik 分词器的主要区别

特性 analysis-icu analysis-ik
语言支持 支持多语言,特别适合多语言环境。 专注于中文分词,支持中文分词的各种优化。
分词精度 对于中文的精度较低,适合多语言环境。 对于中文分词精度较高,适合中文文本分析。
适用场景 多语言的文本分析,尤其是Unicode字符。 主要用于中文语料库,精细化中文分词。
分词模式 基于 ICU 字符库的分词器,语言无关。 提供细粒度和粗粒度两种分词模式。
扩展性 支持复杂的字符过滤和 Unicode 操作。 支持用户自定义词典,适合特定行业的分词。
性能 性能较为一般,适合多语言环境。 高效,专为中文设计,性能优越。

示例

 在kibana左侧菜单management,开发工具中测试

# 创建索引
PUT /my-index

# 添加索引
POST /my-index/_doc
{
    "id": "park_rocky-mountain",
    "title": "Rocky Mountain",
    "description": "Bisected north to south by the Continental Divide, this portion of the Rockies has ecosystems varying from over 150 riparian lakes to montane and subalpine forests to treeless alpine tundra."
}

# 通过索引查询数据
GET /my-index/_search?q="rocky mountain"

# 使用icu分词器
POST /_analyze -H 'Content-Type: application/json' -d'
{
  "analyzer": "icu_analyzer",
  "text": "中华人民共和国"
}

# 默认分词会单字拆分
POST /_analyze -H 'Content-Type: application/json' -d'
{
  "analyzer": "standard",
  "text": "中华人民共和国"
}

# 使用ik粗粒度分词
POST /_analyze -H 'Content-Type: application/json' -d'
{
  "analyzer": "ik_smart",
  "text": "中华人民共和国"
}

# 使用ik细粒度分词
POST /_analyze -H 'Content-Type: application/json' -d'
{
  "analyzer": "ik_max_word",
  "text": "中华人民共和国"
}

在nodejs中连接elasticsearch(注:bun存在兼容性问,无法正确执行,deno,node可以),@elastic/elasticsearch文档:https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/getting-started-js.html#_indexing_documents

import { Client } from '@elastic/elasticsearch';


/**
 * 创建 Elasticsearch 客户端实例。
 * 注意:
 * - 在 elasticsearch.yml 配置文件中设置了 `xpack.security.enabled: false`,
 *   因此 Elasticsearch 不会进行账号密码验证。
 * - 虽然配置了 `elastic` 用户的密码,但在关闭安全验证的情况下,`auth` 参数可以不传。
 * - 并且这里设置的密码是错误的,elasticsearch是可以正常连接的。
 */
const client = new Client({
  node: 'http://localhost:9200',
  auth: {
    // 连接云端使用
    // apiKey: {
    //   id: 'foo',
    //   api_key: 'bar',
    // },
    username: 'elastic',
    password: 'elastic1234',
  },
});


// 创建索引
console.log(await client.indices.create({ index: 'my_index' }));

// 创建数据
console.log(await client.index({
  index: 'my_index',
  id: 'my_document_id',
  document: {
    foo: 'foo',
    bar: 'bar',
  },
}));

// 查询数据
console.log(await client.get({
  index: 'my_index',
  id: 'my_document_id',
}));

// 搜索数据
console.log(await client.search({
  query: {
    match: {
      foo: 'foo'
    }
  }
}));

// 修改数据
console.log(await client.update({
  index: 'my_index',
  id: 'my_document_id',
  doc: {
    foo: 'bar',
    new_field: 'new value'
  }
}));

// 删除数据
console.log(await client.delete({
  index: 'my_index',
  id: 'my_document_id',
}));

// 删除索引
console.log(await client.indices.delete({ index: 'my_index' }));

// 文本分析
// 定义分析文本的函数
async function analyzeText(analyzer, text) {
  try {
    const response = await client.indices.analyze({
      body: {
        analyzer: analyzer,
        text: text,
      }
    });
    console.log(`Using analyzer: ${analyzer}`);
    console.log(response);
  } catch (error) {
    console.error(`Error using analyzer ${analyzer}:`, error);
  }
}

// 分析文本
async function performAnalysis() {
  // ICU 分词器分析
  await analyzeText('icu_analyzer', '中华人民共和国');

  // 标准分词器分析
  await analyzeText('standard', '中华人民共和国');

  // IK 粗粒度分词器分析
  await analyzeText('ik_smart', '中华人民共和国');

  // IK 细粒度分词器分析
  await analyzeText('ik_max_word', '中华人民共和国');
}

// 执行分析
performAnalysis();