本文使用.Net + 向量数据库Qdrant,实现使用本地大模型(Onnx)实现简单RAG,代码仅实现基本演示功能。
一、下载大模型文件
首先从 huggingface国内镜像 找到需要大模型,进行文件下载。本文使用 all-MiniLM-L6-v2
模型。需要下载两个文件:
all-MiniLM-L6-v2.onnx
tokenizer.json
二、新建.NetCore WebApi项目,并导入Nuget包
项目中需要导入的包如下:
Microsoft.Bcl.AsyncInterfaces
Microsoft.Extensions.VectorData.Abstractions
Tokenizers.HuggingFace
Microsoft.ML.OnnxRuntime
三、调用本地大模型进行文本向量化
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System.Numerics;
using Tokenizers.HuggingFace.Tokenizer;
namespace AI.Study.Demo
{
public class OnnxEmbeddingGenerator : IDisposable
{
private readonly InferenceSession _session;
private readonly Tokenizer _tokenizer;
public OnnxEmbeddingGenerator(string onnxPath, string tokenizerPath)
{
_session = new InferenceSession(onnxPath);
_tokenizer = Tokenizer.FromFile(tokenizerPath);
}
public void Dispose()
{
_session?.Dispose();
GC.SuppressFinalize(this);
}
/// <summary>
/// 获取文本向量
/// </summary>
/// <param name="text">文本内容</param>
/// <param name="embeddingDimension">大模型纬度</param>
/// <returns></returns>
public float[] GenerateEmbeddings(string text, int embeddingDimension = 384)
{
return GetEmbeddings(text, embeddingDimension);
}
private (int, NamedOnnxValue[]) PrepareInputs(string text)
{
var encodings = _tokenizer.Encode(text, true, include_type_ids: true, include_attention_mask: true).Encodings[0];
var sequenceLenght = encodings.Ids.Count;
ReadOnlySpan<int> dimensions = new int[] { 1, sequenceLenght };
var input_ids = new DenseTensor<long>(encodings.Ids.Select(t => (long)t).ToArray(), dimensions);
var type_ids = new DenseTensor<long>(encodings.TypeIds.Select(t => (long)t).ToArray(), dimensions);
var attention_mask = new DenseTensor<long>(encodings.AttentionMask.Select(t => (long)t).ToArray(), dimensions);
var inputs = new List<NamedOnnxValue>();
inputs.Add(NamedOnnxValue.CreateFromTensor("input_ids", input_ids));
inputs.Add(NamedOnnxValue.CreateFromTensor("token_type_ids", type_ids));
inputs.Add(NamedOnnxValue.CreateFromTensor("attention_mask", attention_mask));
return (sequenceLenght, inputs.ToArray());
}
private float[] GetEmbeddings(string text, int embeddingDimension)
{
var (sequenceLenght, inputs) = PrepareInputs(text);
using IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = _session.Run(inputs);
var outputTensor = results.First().AsEnumerable<float>().ToArray();
int subVector = embeddingDimension / Vector<float>.Count;
float[] data = new float[embeddingDimension];
for (int i = 0; i < sequenceLenght; i++)
{
for (int j = 0; j < subVector; j++)
{
Vector<float> result = new(data, j * Vector<float>.Count);
result += new Vector<float>(outputTensor, i * embeddingDimension + j * Vector<float>.Count);
result.CopyTo(data, j * Vector<float>.Count);
}
}
for (int i = 0; i < subVector; i++)
{
Vector<float> divisor = new Vector<float>(sequenceLenght);
Vector<float> result = new Vector<float>(data, i * Vector<float>.Count) / divisor;
result.CopyTo(data, i * Vector<float>.Count);
}
return data;
}
public double CosineSimilarity(float[] a, float[] b, int embeddingDimension = 384)
{
if (a.Length != embeddingDimension || b.Length != embeddingDimension)
{
throw new ArgumentException($"向量维度必须为{embeddingDimension}");
}
int subVector = embeddingDimension / Vector<float>.Count;
double ab = 0, aa = 0, bb = 0;
for (int i = 0; i < subVector; i++)
{
Vector<float> vecA = new(a, i * Vector<float>.Count);
Vector<float> vecB = new(b, i * Vector<float>.Count);
ab += Vector.Dot(vecA, vecB);
aa += Vector.Dot(vecA, vecA);
bb += Vector.Dot(vecB, vecB);
}
return ab / (Math.Sqrt(aa) * Math.Sqrt(bb));
}
}
Controller中进行测试
[HttpPost]
public float[] EmbeddingsGenerate([FromBody] string text)
{
using OnnxEmbeddingGenerator onnxEmbeddingGenerator = new OnnxEmbeddingGenerator(OnnxPath, TokenizerPath);
var embeddings = onnxEmbeddingGenerator.GenerateEmbeddings(text);
return embeddings;
}
四、保存入Qdrant中
定义模型实体
因 all-MiniLM-L6-v2
是向量纬度是384,所以以下模型设置[VectorStoreVector(384)]
public class Movie384
{
[VectorStoreKey]
public Guid Key { get; set; }
[VectorStoreData]
public string Title { get; set; }
[VectorStoreData]
public string Description { get; set; }
[VectorStoreVector(384)]
public ReadOnlyMemory<float> Vector { get; set; }
}
上测试数据,测试数据为AI生成
private readonly Movie384[] Movies384 = new Movie384[] {
new Movie384() {
Key = Guid.NewGuid(),
Title = "星际穿越",
Description = "一部关于宇航员穿越虫洞寻找新家园的科幻史诗,探索了时间膨胀、五维空间和父女情感的深刻主题,由克里斯托弗·诺兰执导,马修·麦康纳主演"
},
new Movie384() {
Key = Guid.NewGuid(),
Title = "教父",
Description = "黑帮电影经典之作,讲述科莱昂家族在美国的崛起与权力斗争,马龙·白兰度饰演的维托·科莱昂创造了电影史上最令人难忘的角色之一"
},
new Movie384() {
Key = Guid.NewGuid(),
Title = "机器人总动员",
Description = "皮克斯动画工作室制作的感人科幻动画,描述地球上最后一个垃圾处理机器人瓦力与高级探测机器人伊娃之间的故事,几乎没有对白却充满深意"
},
new Movie384() {
Key = Guid.NewGuid(),
Title = "盗梦空间",
Description = "诺兰执导的复杂心理惊悚片,探索梦境共享技术,团队必须在多层梦境中执行不可能的\"植入\"任务,模糊了现实与梦境的界限"
},
new Movie384() {
Key = Guid.NewGuid(),
Title = "泰坦尼克号",
Description = "詹姆斯·卡梅隆执导的灾难爱情片,以1912年泰坦尼克号沉没为背景,讲述不同社会阶层的年轻恋人杰克和露丝的悲剧爱情故事"
},};
保存进Qdrant中
public string OnnxPath = Path.Combine(AppContext.BaseDirectory, "Onnx/all-MiniLM-L6-v2.onnx");
public string TokenizerPath = Path.Combine(AppContext.BaseDirectory, "Onnx/tokenizer.json");
var vectorStore = new QdrantVectorStore(new QdrantClient("192.168.4.9", 6334), true);
var ragVectorRecordCollection = vectorStore.GetCollection<Guid, Movie384>("Movies384");
await ragVectorRecordCollection.EnsureCollectionExistsAsync();
using OnnxEmbeddingGenerator onnxEmbeddingGenerator = new OnnxEmbeddingGenerator(OnnxPath, TokenizerPath);
foreach (var item in Movies384)
{
item.Vector = onnxEmbeddingGenerator.GenerateEmbeddings(item.Description);
await ragVectorRecordCollection.UpsertAsync(item);
}
进入Qdrant查看数据,成功存入数据库中
五、向量搜索
[HttpGet]
public async IAsyncEnumerable<VectorSearchResult<Movie384>> QueryQdrant([FromQuery] string query, [FromQuery] int top = 2)
{
var vectorStore = new QdrantVectorStore(new QdrantClient("192.168.4.9", 6334), true);
var ragVectorRecordCollection = vectorStore.GetCollection<Guid, Movie384>("Movies384");
await ragVectorRecordCollection.EnsureCollectionExistsAsync();
using OnnxEmbeddingGenerator onnxEmbeddingGenerator = new OnnxEmbeddingGenerator(OnnxPath, TokenizerPath);
ReadOnlyMemory<float> searchVector = onnxEmbeddingGenerator.GenerateEmbeddings(query);
var searchResult = ragVectorRecordCollection.SearchAsync(searchVector, top);
await foreach (var result in searchResult)
{
yield return result;
}
}
六、下一步动作
当前基于向量数据库直接搜索的内容,可能存在很多问题,下一步进行增强代码。
搜索完向量数据库以后,将内容组合成prompt,提交给问答大模型,交由大模型进行整理,清洗数据,再进行更精准回答。例如:
[HttpGet]
public async Task<string> QueryQdrant([FromQuery] string query, [FromQuery] int top )
{
var vectorStore = new QdrantVectorStore(new QdrantClient("192.168.4.9", 6334), true);
var ragVectorRecordCollection = vectorStore.GetCollection<Guid, Movie384>("Movies384");
await ragVectorRecordCollection.EnsureCollectionExistsAsync();
using OnnxEmbeddingGenerator onnxEmbeddingGenerator = new OnnxEmbeddingGenerator(OnnxPath, TokenizerPath);
ReadOnlyMemory<float> searchVector = onnxEmbeddingGenerator.GenerateEmbeddings(query);
var searchResult = ragVectorRecordCollection.SearchAsync(searchVector, top);
var resultsList = new List<VectorSearchResult<Movie384>>();
await foreach (var result in searchResult)
{
resultsList.Add(result);
}
// 如果没有找到结果
if (!resultsList.Any())
{
return "没有找到相关的电影信息。";
}
// 构建提示词,让大模型整理结果
var promptBuilder = new StringBuilder();
promptBuilder.AppendLine("请整理以下电影信息,用自然语言简洁明了地回答用户的查询:");
promptBuilder.AppendLine($"用户查询:{query}\n");
promptBuilder.AppendLine("相关电影信息:");
foreach (var item in resultsList)
{
promptBuilder.AppendLine($"- 电影名:{item.Record.Title}");
promptBuilder.AppendLine($" 描述:{item.Record.Description}");
promptBuilder.AppendLine($" 相似度:{item.Score:F4}\n");
}
promptBuilder.AppendLine("请根据以上信息,用友好自然的语言回答用户的问题。");
//在这需要更换大模型,all-MiniLM只能实现向量化,不能实现问答。
return 问答大模型返回的结果;
}