BGE-M3+Milvus上传知识到指定的collection

发布于:2025-03-20 ⋅ 阅读:(19) ⋅ 点赞:(0)
package org.example.deepseek4jdemo2.controller;

import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.StrUtil;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import io.github.pigmesh.ai.deepseek.core.EmbeddingClient;
import io.github.pigmesh.ai.deepseek.core.embedding.EmbeddingRequest;
import io.milvus.client.MilvusClient;
import io.milvus.param.dml.InsertParam;
import io.milvus.v2.service.vector.request.InsertReq;
import org.example.deepseek4jdemo2.config.MilvusConnectPool;
import org.springframework.ai.document.Document;
import org.springframework.ai.embedding.EmbeddingResponse;
import org.springframework.ai.ollama.OllamaEmbeddingModel;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.UUID;

@RestController
@RequestMapping("/embedding")
public class EmbeddingController {


    @Autowired
    private MilvusConnectPool milvusConnectPool;
    @Autowired
    private OllamaEmbeddingModel ollamaEmbeddingModel;

    @Value("${file.uploads}")
    private String fileUploads;

    @RequestMapping("/test")
    public String  testConnection() {
//        EmbeddingRequest request = EmbeddingRequest.builder().input("验证文本").build();
//        EmbeddingResponse response = ollamaEmbeddingModel.embed(request);
//        System.out.println("向量维度:" + response.data().get(0).embedding().size());  // 预期输出1024‌:ml-citation{ref="5,7" data="citationList"}
        List<String> list = new ArrayList<>();
        list.add("白日依山尽,黄河入海流。欲穷千里目,更上一层楼。");
        EmbeddingResponse embeddingResponse = ollamaEmbeddingModel.embedForResponse(list);
        System.out.println(embeddingResponse.getResults().size());
        return embeddingResponse.getResults().toString();
    }

    @RequestMapping(value = "/uploadFile")
    public ResponseEntity<String> uploadFile(@RequestParam("file") MultipartFile file){
        // 这里以 2025最新的我司保密条例演示,可以换成你自己的
//        String law = FileUtil.readString("/Users/lengleng/Downloads/law.txt", Charset.defaultCharset());
        // 1. 获取原始文件名
        String fileName = file.getOriginalFilename();

        // 2. 指定保存路径(示例:保存到 `/tmp/uploads` 目录)
        File uploadDir = new File(fileUploads);
        if (!uploadDir.exists()) {
            uploadDir.mkdirs(); // 确保目录存在
        }

        // 3. 创建目标文件对象
        File destFile = new File(uploadDir.getAbsolutePath() + File.separator + fileName);

        try {
            // 4. 将 MultipartFile 内容保存到目标文件
            file.transferTo(destFile);
            // 从上传的文件中读取内容
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        String law = FileUtil.readString(destFile,Charset.defaultCharset());
        String[] lawSplits = StrUtil.split(law, 300);


        List<JsonObject> data = new ArrayList<>();
        for (String lawSplit : lawSplits) {
//            List<Float> floatList = embeddingClient.embed(lawSplit);
            float[] embeds = ollamaEmbeddingModel.embed(lawSplit);
            JsonObject jsonObject = new JsonObject();

            // 将 List<Float> 转换为 JsonArray
            JsonArray jsonArray = new JsonArray();
//            for (Float value : floatList) {
//                jsonArray.add(value);
//            }
            for (float embed : embeds) {
                jsonArray.add(embed);
            }
            jsonObject.addProperty("id", Math.abs(UUID.randomUUID().hashCode()));
            jsonObject.add("vector", jsonArray);
            jsonObject.addProperty("fileName", fileName);
            jsonObject.addProperty("text", lawSplit);

            data.add(jsonObject);
        }
        String collectionName = "crm_pro";

        // 准备插入的数据
        InsertParam insertParam = InsertParam.newBuilder()
                .withCollectionName(collectionName)
//                .withFields(
//                        Arrays.asList(
//                                InsertParam.Field.builder().name("vector").build(),
//                                InsertParam.Field.builder().name("fileName").build(),
//                                InsertParam.Field.builder().name("text").build()
//                        )
//                )
                .withRows(data)
                .build();
        //申明
        MilvusClient milvusClient= null;
        try {
            //获取
            milvusClient = milvusConnectPool.getMilvusClient();
            milvusClient.insert(insertParam);
        }catch (Exception e){
            e.printStackTrace();
        }
        return ResponseEntity.ok("成功"+fileName);
    }

}

package org.example.deepseek4jdemo2.config;

import io.github.pigmesh.ai.deepseek.core.EmbeddingClient;
import org.springframework.ai.ollama.OllamaEmbeddingModel;
import org.springframework.ai.ollama.api.OllamaApi;
import org.springframework.ai.ollama.api.OllamaOptions;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;

@Configuration
public class AppConfig {

    @Value("${spring.ai.ollama.base-url}")
    private String baseUrl;
    @Value("${spring.ai.ollama.embedding.model}")
    private String model;

    @Bean
    @Primary // 标记为主 Bean
    public OllamaEmbeddingModel primaryEmbeddingClient() {
        // 创建Ollama API客户端
        OllamaApi ollamaApi = new OllamaApi(baseUrl);
        OllamaOptions ollamaOptions = new OllamaOptions();
        ollamaOptions.setModel(model);
        OllamaEmbeddingModel ollamaEmbeddingModel = new OllamaEmbeddingModel(ollamaApi,ollamaOptions);
        return ollamaEmbeddingModel;
    }
}

application.yml 

server:
  port: 8889
spring:
  #Milvus 连接
  datasource:
    milvus-connect-pool:
      max-idle: 5
      min-idle: 2
      max-total: 10
      milvus:
        username: root
        password: 
        host: 192.168.1.44
        port: 19530
  application:
    name: Cleaner-AI
  ai:
    ollama:
      # ollama API Server 地址默认的他就是11434
      base-url: http://192.168.1.44:11434
      # 向量模型链接信息
      embedding:
        model: bge-m3:latest
        options:
          num-ctx: 4096
          num-g-p-u: 1
          low-v-r-a-m: true
          temperature: 0.0
      #  options.setModel("bge-m3");  // 必须与本地模型名称一致
#  options.setNumCtx(4096);     // 上下文长度
#  options.setNumGPU(1);        // 使用 1 个 GPU
#  options.setLowVRAM(true);    // 启用低显存模式
#  options.setTemperature(0.0); // 确定性输出(适合嵌入任务)
      chat:
        enabled: true
          # 使用的模型名称
        model: deepseek-r1:14b
#            deepseek-v2:16b
        options:
          temperature: 0.7
file:
  uploads: E:\home\uploads