一、部署 milvus-standalone
1、安装 docker
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
sudo yum install docker-ce docker-ce-cli containerd.io
systemctl start docker
systemctl enable docker
docker version
2、配置 daemon.json 镜像源
vim /etc/docker/daemon.json
{
"registry-mirrors": [
"https://docker.m.daocloud.io/",
"https://huecker.io/",
"https://dockerhub.timeweb.cloud",
"https://noohub.ru/",
"https://dockerproxy.com",
"https://docker.mirrors.ustc.edu.cn",
"https://docker.nju.edu.cn",
"https://xx4bwyg2.mirror.aliyuncs.com",
"http://f1361db2.m.daocloud.io",
"https://registry.docker-cn.com",
"http://hub-mirror.c.163.com",
"https://docker.mirrors.ustc.edu.cn"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
systemctl daemon-reload
systemctl restart docker
3、安装 docker-compose
# 下载二进制文件
wget https://github.com/docker/compose/releases/download/v2.16.0/docker-compose-linux-x86_64
mv docker-compose-linux-x86_64 /usr/local/bin/docker-compose
# 添加执行权限
chmod +x /usr/local/bin/docker-compose
echo 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc
source ~/.bashrc
# 验证安装是否成功
docker-compose --version
4、安装 Milvus
# 新建一个名为milvus的目录用于存放数据 目录名称可以自定义
mkdir milvus
cd milvus
# 下载并编辑docker-compose.yml
# CPU单机版
wget https://github.com/milvus-io/milvus/releases/download/v2.3.5/milvus-standalone-docker-compose.yml -O docker-compose.yml
# 下载完后修改其内容
修改minio镜像名
image: docker.io/minio/minio:RELEASE.2023-03-20T20-16-18Z
修改milvus镜像名
image: docker.io/milvusdb/milvus:v2.5.2
在倒数第三行上面添加以下内容
attu:
container_name: attu
image: zilliz/attu:v2.4
restart: always
environment:
MILVUS URL: 192.168.32.66:19530
ports:
- "8000:3000"
depends_on:
- "standalone"
5、镜像拉取
docekr pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/minio/minio:RELEASE.2023-03-20T20-16-18Z
docker tag swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/minio/minio:RELEASE.2023-03-20T20-16-18Z docker.io/minio/minio:RELEASE.2023-03-20T20-16-18Z
docker pull quay.io/coreos/etcd:v3.5.5
docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/zilliz/attu:v2.4
docker tag swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/zilliz/attu:v2.4 zilliz/attu:v2.4
docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.5.2
docker tag swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.5.2 docker.io/milvusdb/milvus:v2.5.2
6、docker-compose 部署 milvus
docker compose up -d
7、attu 登录
attu Web 访问:ip:8000
连接数据库:192.168.32.66:19530
8、minio 登录
http://ip:9001
minioadmin/minioadmin
9、创建集合并加载,写入数据
① 创建数据
from pymilvus import Collection, connections, utility, FieldSchema, CollectionSchema, DataType
import numpy as np
import random
# 连接到 Milvus
connections.connect(host='192.168.32.66', port='19530')
# 创建第一个集合
fields1 = [
FieldSchema(name='id', dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name='vector_field', dtype=DataType.FLOAT_VECTOR, dim=128)
]
schema1 = CollectionSchema(fields=fields1)
collection1 = Collection(name='collection1', schema=schema1)
# 创建第二个集合
fields2 = [
FieldSchema(name='id', dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name='vector_field', dtype=DataType.FLOAT_VECTOR, dim=128)
]
schema2 = CollectionSchema(fields=fields2)
collection2 = Collection(name='collection2', schema=schema2)
# 为两个集合创建索引
index_params = {
"index_type": "IVF_FLAT",
"metric_type": "COSINE",
"params": {"nlist": 128}
}
# 为第一个集合创建索引
collection1.create_index(
field_name="vector_field",
index_params=index_params
)
# 为第二个集合创建索引
collection2.create_index(
field_name="vector_field",
index_params=index_params
)
# 准备数据并插入第一个集合 (1-100)
vectors1 = [[random.random() for _ in range(128)] for _ in range(100)]
data1 = [
{
"vector_field": vectors1[i]
} for i in range(100)
]
# 插入数据到第一个集合
collection1.insert(data1)
# 准备数据并插入第二个集合 (101-200)
vectors2 = [[random.random() for _ in range(128)] for _ in range(100)]
data2 = [
{
"vector_field": vectors2[i]
} for i in range(100)
]
# 插入数据到第二个集合
collection2.insert(data2)
import uuid
import numpy as np
from pymilvus import (
connections,
FieldSchema, CollectionSchema, DataType,
Collection,
)
collection_name = "good_lucky"
host = "192.168.32.66"
port = 19530
username = ""
password = ""
num_entities, dim = 3000, 3
def generate_uuids (number_of_uuids):
uuids = [str(uuid.uuid4()) for _ in range (number_of_uuids) ]
return uuids
print ("start connecting to Milvus")
connections.connect ("default", host=host, port=port,user=username,password=password)
fields = [
FieldSchema (name="pk", dtype=DataType.INT64, is_primary=True, auto_id=False),
FieldSchema (name="random", dtype=DataType.DOUBLE),
FieldSchema (name="comment", dtype=DataType.VARCHAR, max_length=200),
FieldSchema (name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = CollectionSchema (fields, "good_lucky is the simplest demo to introduce the APIs")
print(" Create collection `good_lucky`")
coll = Collection(collection_name, schema, consistency_level="Bounded",shards_num=1)
print("Create partition `blue` and `red`")
coll.create_partition (partition_name ="blue")
coll.create_partition (partition_name ="red")
print ("Start linserting entities")
rng = np.random.default_rng(seed=19530)
entities1= [
[ i for i in range(num_entities) ],
rng.random(num_entities).tolist(),
generate_uuids(num_entities),
rng.random((num_entities,dim)),
]
coll.insert(data = entities1,partition_name = "blue")
entities2 = [
[i+3000 for i in range(num_entities)],
rng.random(num_entities).tolist(),
generate_uuids(num_entities),
rng.random((num_entities, dim)),
]
coll.insert(data = entities2,partition_name ="red")
print("Start flush")
coll.flush()
print("Start creating index")
index_params = {
"index_type": "HNSW",
"metric_type": "L2",
"params": {
"M": 16,
"efConstruction": 40
}
}
coll.create_index(
field_name="embeddings",
index_params=index_params,
index_name="idx_em"
)
coll.load(replica_number=1)
print("done")
import uuid
import numpy as np
from pymilvus import (
connections,
FieldSchema, CollectionSchema, DataType,
Collection,
)
collection_name = "hi_milvus"
host = "192.168.32.66"
port = 19530
username = ""
password = ""
num_entities1 = 100
num_entities2 = 5000
num_entities3 = 20000
dim = 3
def generate_uuids (number_of_uuids):
uuids = [str(uuid.uuid4()) for _ in range (number_of_uuids) ]
return uuids
print ("start connecting to Milvus")
connections.connect ("default", host=host, port=port,user=username,password=password)
fields = [
FieldSchema (name="pk", dtype=DataType.INT64, is_primary=True, auto_id=False),
FieldSchema (name="random", dtype=DataType.DOUBLE),
FieldSchema (name="comment", dtype=DataType.VARCHAR, max_length=200),
FieldSchema (name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = CollectionSchema (fields, "hi_milvus is the simplest demo to introduce the APIs")
print(" Create collection `hi_world`")
coll = Collection(collection_name, schema, consistency_level="Bounded",shards_num=1)
print("Create partition `blue` and `red` and `yellow`" )
coll.create_partition (partition_name ="blue")
coll.create_partition (partition_name ="red")
coll.create_partition (partition_name ="yellow")
print ("Start linserting entities")
rng = np.random.default_rng(seed=19530)
entities_1= [
[ i for i in range(num_entities1) ],
rng.random(num_entities1).tolist(),
generate_uuids(num_entities1),
rng.random((num_entities1,dim)),
]
coll.insert(data = entities_1,partition_name = "blue")
entities_2 = [
[i+100 for i in range(num_entities2)],
rng.random(num_entities2).tolist(),
generate_uuids(num_entities2),
rng.random((num_entities2, dim)),
]
coll.insert(data = entities_2,partition_name ="red")
entities_3 = [
[i+5100 for i in range(num_entities3)],
rng.random(num_entities3).tolist(),
generate_uuids(num_entities3),
rng.random((num_entities3, dim)),
]
coll.insert(data = entities_3,partition_name ="yellow")
print("Start flush")
coll.flush()
print("Start creating index")
index_params = {
"index_type": "HNSW",
"metric_type": "L2",
"params": {
"M": 16,
"efConstruction": 40
}
}
coll.create_index(
field_name="embeddings",
index_params=index_params,
index_name="idx_em"
)
coll.load(replica_number=1)
print("done")
② 加载数据
刚创建好的集合没有加载,点击加载即可
③ 查看数据
from pymilvus import Collection, connections, utility
# 连接到 Milvus
connections.connect(host='192.168.32.66', port='19530')
# 列出所有的资源组
rgs = utility.list_resource_groups(using='default')
print(f"@所有资源组: {rgs}")
# 列出所有集合
collections = utility.list_collections()
print(f"@所有集合: {collections}")
info = utility.describe_resource_group(name='__default_resource_group')
print(f"-------------默认资源组-------------: {info}")
二、部署 milvus-distributed (省略)
注意:集群版minio创建好名为test的bucket,ak、sk准备好,milvus搭建好之后保证没有任何数据存在(参考我之前写的集群版milvus部署的文章)
我这里:
192.168.32.10:30237 是milvus集群的连接地址
192.168.32.20:9000 是minio集群的一个节点连接地址
三、milvus-standalone 到 milvus-distributed 的数据迁移
下载 milvus-backup-0.4.28.zip、 milvus-backup_Linux_x86_64.tar.gz
# 新建备份目录
mkdir -p /root/milvus-back/backup
# 解压出来会有3个文件,分别为:milvus-bakcup、LICENSE、README.md
tar -xf milvus-backup_Linux_x86_64.tar.gz
# 把 milvus-backup 文件移动到新建的目录
mv milvus-bakcup /root/milvus-back/backup
# 再解压milvus-backup源码文件
unzip milvus-backup-0.4.28.zip
# 把 configs 移动或复制configs目录到新建目录
cd milvus-backup-0.4.28
cp -r configs /root/milvus-back/backup
如下:
[root@master01 backup]# pwd
/root/milvus-back/backup
[root@master01 backup]# tree
.
├── configs
│ └── backup.yaml
├── logs
│ └── backup.log
└── milvus-backup
2 directories, 3 files
配置 backup.yaml 文件
# Configures the system log output.
log:
level: info # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
console: true # whether print log to console
file:
rootPath: "logs/backup.log"
http:
simpleResponse: true
# milvus proxy address, compatible to milvus.yaml
milvus:
address: 192.168.32.66
port: 19530
authorizationEnabled: false
# tls mode values [0, 1, 2]
# 0 is close, 1 is one-way authentication, 2 is two-way authentication.
tlsMode: 0
user: ""
password: ""
# Related configuration of minio, which is responsible for data persistence for Milvus.
minio:
# Milvus storage configs, make them the same with milvus config
storageType: "minio" # support storage type: local, minio, s3, aws, gcp, ali(aliyun), azure, tc(tencent)
address: 192.168.32.66 # Address of MinIO/S3
port: 9000 # Port of MinIO/S3
accessKeyID: minioadmin
secretAccessKey: minioadmin
useSSL: false # Access to MinIO/S3 with SSL
useIAM: false
iamEndpoint: ""
bucketName: "a-bucket" # Milvus Bucket name in MinIO/S3, make it the same as your milvus instance
rootPath: "files" # Milvus storage root path in MinIO/S3, make it the same as your milvus instance
# Backup storage configs, the storage you want to put the backup data
backupStorageType: "minio" # support storage type: local, minio, s3, aws, gcp, ali(aliyun), azure, tc(tencent)
backupAddress: 192.168.32.20 # Address of MinIO/S3
backupPort: 9000 # Port of MinIO/S3
backupAccessKeyID: BHWn0og7yzc02HlaMxN8
backupSecretAccessKey: 9DXRxfvlCIrAFi4J7vWvcpMErII8OfWZzXZpOruV
backupBucketName: "test" # Bucket name to store backup data. Backup data will store to backupBucketName/backupRootPath
backupRootPath: "backup" # Rootpath to store backup data. Backup data will store to backupBucketName/backupRootPath
# If you need to back up or restore data between two different storage systems, direct client-side copying is not supported.
# Set this option to true to enable data transfer through Milvus Backup.
# Note: This option will be automatically set to true if `minio.storageType` and `minio.backupStorageType` differ.
# However, if they are the same but belong to different services, you must manually set this option to `true`.
crossStorage: "true"
backup:
maxSegmentGroupSize: 2G
parallelism:
# collection level parallelism to backup
backupCollection: 4
# thread pool to copy data. reduce it if blocks your storage's network bandwidth
copydata: 128
# Collection level parallelism to restore
restoreCollection: 2
# keep temporary files during restore, only use to debug
keepTempFiles: false
# Pause GC during backup through Milvus Http API.
gcPause:
enable: true
seconds: 7200
address: http://localhost:9091
# Configures the system log output.
log:
level: info # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
console: true # whether print log to console
file:
rootPath: "logs/backup.log"
http:
simpleResponse: true
# milvus proxy address, compatible to milvus.yaml
milvus:
address: 192.168.32.10
port: 30237
authorizationEnabled: false
# tls mode values [0, 1, 2]
# 0 is close, 1 is one-way authentication, 2 is two-way authentication.
tlsMode: 0
user: ""
password: ""
# Related configuration of minio, which is responsible for data persistence for Milvus.
minio:
# Milvus storage configs, make them the same with milvus config
storageType: "minio" # support storage type: local, minio, s3, aws, gcp, ali(aliyun), azure, tc(tencent)
address: 192.168.32.20 # Address of MinIO/S3
port: 9000 # Port of MinIO/S3
accessKeyID: BHWn0og7yzc02HlaMxN8
secretAccessKey: 9DXRxfvlCIrAFi4J7vWvcpMErII8OfWZzXZpOruV
useSSL: false # Access to MinIO/S3 with SSL
useIAM: false
iamEndpoint: ""
bucketName: "test" # Milvus Bucket name in MinIO/S3, make it the same as your milvus instance
rootPath: "file" # Milvus storage root path in MinIO/S3, make it the same as your milvus instance
# Backup storage configs, the storage you want to put the backup data
backupStorageType: "minio" # support storage type: local, minio, s3, aws, gcp, ali(aliyun), azure, tc(tencent)
backupAddress: 192.168.32.20 # Address of MinIO/S3
backupPort: 9000 # Port of MinIO/S3
backupAccessKeyID: BHWn0og7yzc02HlaMxN8
backupSecretAccessKey: 9DXRxfvlCIrAFi4J7vWvcpMErII8OfWZzXZpOruV
backupBucketName: "test" # Bucket name to store backup data. Backup data will store to backupBucketName/backupRootPath
backupRootPath: "backup" # Rootpath to store backup data. Backup data will store to backupBucketName/backupRootPath
# If you need to back up or restore data between two different storage systems, direct client-side copying is not supported.
# Set this option to true to enable data transfer through Milvus Backup.
# Note: This option will be automatically set to true if `minio.storageType` and `minio.backupStorageType` differ.
# However, if they are the same but belong to different services, you must manually set this option to `true`.
crossStorage: "false"
backup:
maxSegmentGroupSize: 2G
parallelism:
# collection level parallelism to backup
backupCollection: 4
# thread pool to copy data. reduce it if blocks your storage's network bandwidth
copydata: 128
# Collection level parallelism to restore
restoreCollection: 2
# keep temporary files during restore, only use to debug
keepTempFiles: false
# Pause GC during backup through Milvus Http API.
gcPause:
enable: true
seconds: 7200
address: http://localhost:9091
第一个是从单机版milvus的minio中把数据迁移到集群版的minio的配置文件
第二个是把迁移过来的数据恢复到milvus里面的配置文件
- 先对第一个文件执行:
./milvus-backup create -n my_backup
- 查看 minio 中的备份数据
- 再对第二个文件执行,进行数据恢复
./milvus-backup restore -n my_backup --restore_index
# -s 重命名
# --restore_index 恢复索引
# --config 指定配置文件
- 查看恢复的数据
至此,milvus 单机到集群的数据迁移完成!