C# Qdrant向量数据库实战指南
【代码】C# Qdrant向量数据库实战指南。
·
using LangChain.Databases;
using LangChain.Databases.Qdrant;
using LangChain.DocumentLoaders;
using LangChain.Extensions;
using LangChain.Providers;
using LangChain.Splitters.Text;
using Microsoft.Extensions.AI;
using Microsoft.SemanticKernel.Connectors.Qdrant;
using OllamaSharp;
using Peak.Abp.Silver.AI.Host.Core;
using Peak.Abp.Silver.AI.Model.Request;
using Peak.Abp.Silver.Basic;
using Qdrant.Client;
using Qdrant.Client.Grpc;
namespace Peak.Abp.Silver.AI.Host
{
/// <summary>
/// Qdrant向量
/// </summary>
public class QdrantBaseUtils: IQdrantBaseUtils
{
private Qdrant.Client.QdrantClient client;
public QdrantBaseUtils()
{
var host = ConfigurationUtil.GetSection("Qdrant:Host").ToString();
var port = ConfigurationUtil.GetSection("Qdrant:Port").ToInt();
var apiKey = ConfigurationUtil.GetSection("Qdrant:Key").ToString();
var https = ConfigurationUtil.GetSection("Qdrant:Https").ToInt();
client = new QdrantClient(host: host, port: port, https: https == 1, apiKey: apiKey);
}
public QdrantBaseUtils(System.Uri address, string? apiKey = null)
{
client = new QdrantClient(address: address, apiKey: apiKey);
}
public QdrantBaseUtils(string host = "localhost", int port = 6334, bool https = false, string? apiKey = null)
{
client = new QdrantClient(host: host, port: port, https: https, apiKey: apiKey);
}
#region 向量库
/// <summary>
/// 向量库-创建
/// </summary>
/// <param name="collectName"></param>
/// <param name="size"></param>
/// <returns></returns>
public async Task CreateCollectionAsync(string collectName, ulong size = 768, Distance distance = Distance.Cosine)
{
if (await client.CollectionExistsAsync(collectName))
{
return;
}
await client.CreateCollectionAsync(collectName, new VectorParams { Size = size, Distance = distance });
}
/// <summary>
/// 向量库-判断是否存在
/// </summary>
/// <param name="collectName"></param>
/// <returns></returns>
public async Task<bool> CollectionExistsAsync(string collectName)
{
return await client.CollectionExistsAsync(collectName);
}
/// <summary>
/// 向量库-向量库信息
/// </summary>
/// <param name="collectName"></param>
/// <returns>
/// 向量数量: {info.PointsCount}
/// 向量维度: {info.VectorsConfig?.Params?.Size}
/// 距离度量: {info.VectorsConfig?.Params?.Distance}
/// </returns>
public async Task<CollectionInfo> GetCollectionInfoAsync(string collectName)
{
return await client.GetCollectionInfoAsync(collectName);
}
/// <summary>
/// 向量库-删除库
/// </summary>
/// <param name="collectName"></param>
/// <returns></returns>
public async Task DeleteCollectionAsync(string collectName)
{
if (!await client.CollectionExistsAsync(collectName))
{
return;
}
await client.DeleteCollectionAsync(collectName);
}
/// <summary>
/// 向量库-列出所有向量库
/// </summary>
/// <returns></returns>
public async Task<IReadOnlyList<string>> ListCollectionAsync()
{
return await client.ListCollectionsAsync();
}
#endregion
#region 向量
/// <summary>
/// 向量-获取向量值
/// </summary>
/// <param name="value"></param>
/// <returns></returns>
public async Task<ReadOnlyMemory<float>> GetEmbeddingValueAsync(string value)
{
var embeddingId = ConfigurationUtil.GetSection("Ollama:EmbeddingModel").ToString();
var apiUrl= ConfigurationUtil.GetSection("Ollama:Url").ToString();
// 1. 初始化Ollama客户端
var ollama = new OllamaApiClient(apiUrl);
var queryVector = await ollama.GenerateVectorAsync(value, new EmbeddingGenerationOptions()
{
ModelId = embeddingId
});
return queryVector;
}
/// <summary>
/// 向量-获取向量值
/// </summary>
/// <param name="value"></param>
/// <returns></returns>
public async Task<float[][]> GetEmbeddingValueAsync(List<string> value)
{
var ollama = new OllamaBaseUtils();
var model = await ollama.GetEmbeddingAsync();
var result = await model.CreateEmbeddingsAsync(EmbeddingRequest.ToEmbeddingRequest(value.ToArray()), new EmbeddingSettings());
return result.Values;
}
/// <summary>
/// 向量-插入
/// </summary>
/// <param name="collectName"></param>
/// <param name="points"></param>
/// <returns></returns>
public async Task<bool> InsertVectorsAsync(string collectName, IReadOnlyList<PointVectors> points)
{
if(!await CollectionExistsAsync(collectName))
{
await CreateCollectionAsync(collectName);
}
var result = await client.UpdateVectorsAsync(collectName, points);
if (result.Status == UpdateStatus.Completed)
{
return true;
}
return false;
}
/// <summary>
/// 向量-插入
/// </summary>
/// <param name="collectName"></param>
/// <param name="value"></param>
/// <returns></returns>
public async Task<bool> InsertVectorsAsync(string collectName, string value, long pointId = 0)
{
if (!await CollectionExistsAsync(collectName))
{
await CreateCollectionAsync(collectName);
}
if (pointId <= 0)
{
pointId = DateTime.Now.Ticks;
}
var values = await GetEmbeddingValueAsync(new List<string>() { value });
var points = new List<PointStruct>()
{
new PointStruct
{
Id = (ulong)pointId,
Vectors = values[0],
Payload =
{
["page_content"] = value,
}
}
};
var result = await client.UpsertAsync(collectName, points);
if (result.Status == UpdateStatus.Completed)
{
return true;
}
return false;
}
/// <summary>
/// 向量-批量插入
/// </summary>
/// <param name="collectName"></param>
/// <param name="value"></param>
/// <returns></returns>
public async Task<bool> BatchVectorsAsync(string collectName, List<string> values)
{
if (!await CollectionExistsAsync(collectName))
{
await CreateCollectionAsync(collectName);
}
long pointId = 0;
if (pointId <= 0)
{
pointId = DateTime.Now.Ticks;
}
var value = await GetEmbeddingValueAsync(values);
var points = new List<PointStruct>();
for(int i=0;i< value.Length;i++)
{
pointId++;
points.Add(new PointStruct
{
Id = (ulong)pointId,
Vectors = value[i],
Payload =
{
["page_content"] = values[i],
}
});
}
if (points.Count == 0)
{
return false;
}
var result = await client.UpsertAsync(collectName, points);
if (result.Status == UpdateStatus.Completed)
{
return true;
}
return false;
}
/// <summary>
/// 向量-查询
/// </summary>
/// <param name="collectName"></param>
/// <param name="queryText"></param>
/// <returns></returns>
public async Task<List<string>> SearchVectorsAsync(string collectName, string value, ulong limit = 5,string payloadName= "page_content")
{
if (!await CollectionExistsAsync(collectName))
{
return new List<string>();
}
var vector = await GetEmbeddingValueAsync(value);
var searchResults = await client.SearchAsync(
collectName,
vector,
limit: limit
);
var listValues = new List<string>();
foreach (var point in searchResults)
{
listValues.Add(point.Payload[payloadName].StringValue);
}
return listValues;
}
/// <summary>
/// 向量-查询
/// </summary>
/// <param name="collectName"></param>
/// <param name="queryText"></param>
/// <returns></returns>
public async Task<IReadOnlyList<ScoredPoint>> SearchVectorsAsync(string collectName, string value, ulong limit = 5)
{
if (!await CollectionExistsAsync(collectName))
{
return null;
}
var vector = await GetEmbeddingValueAsync(value);
var searchResults = await client.SearchAsync(
collectName,
vector,
limit: limit
);
return searchResults;
}
#endregion
#region 工具
/// <summary>
/// 文件/网址导入向量库
/// </summary>
/// <param name="info"></param>
/// <returns></returns>
public async Task<IVectorCollection> ImportFileVectorAsync(ImportFileRequest info)
{
OllamaBaseUtils ollamaBase = new OllamaBaseUtils();
var embeddingModel = await ollamaBase.GetEmbeddingAsync();
// 使用文本分割器
var textSplitter = new RecursiveCharacterTextSplitter(
chunkSize: info.ChunkSize,
chunkOverlap: info.ChunkOverlap
);
DataSource source;
if (info.FilePath.ToLower().StartsWith("http"))
{
source = DataSource.FromUrl(info.FilePath);
}
else
{
source = DataSource.FromPath(info.FilePath);
}
var urls = ConfigurationUtil.GetSection("Qdrant:Url").ToString();
var vectorDatabase = new QdrantVectorDatabase(new QdrantMemoryStore(urls, info.VectorSize));
IVectorCollection vectorCollection;
if (info.FileMode == FileTypeSource.PDF)
{
return await vectorDatabase.AddDocumentsFromAsync<PdfPigPdfLoader>(
embeddingModel,
dimensions: info.Dimensions,
dataSource: source,
collectionName: info.CollectName,
textSplitter: textSplitter,
behavior: info.Behavior);
}
else if (info.FileMode == FileTypeSource.WORD)
{
return await vectorDatabase.AddDocumentsFromAsync<WordLoader>(
embeddingModel,
dimensions: info.Dimensions,
dataSource: source,
collectionName: info.CollectName,
textSplitter: textSplitter,
behavior: info.Behavior);
}
else if (info.FileMode == FileTypeSource.EXCELHEAD)
{
return await vectorDatabase.AddDocumentsFromAsync<FirstRowHeaderExcelLoader>(
embeddingModel,
dimensions: info.Dimensions,
dataSource: source,
collectionName: info.CollectName,
textSplitter: textSplitter,
behavior: info.Behavior);
}
else if (info.FileMode == FileTypeSource.EXCELNOHEAD)
{
return await vectorDatabase.AddDocumentsFromAsync<NoHeaderExcelLoader>(
embeddingModel,
dimensions: info.Dimensions,
dataSource: source,
collectionName: info.CollectName,
textSplitter: textSplitter,
behavior: info.Behavior);
}
else if (info.FileMode == FileTypeSource.WEB)
{
return await vectorDatabase.AddDocumentsFromAsync<HtmlLoader>(
embeddingModel,
dimensions: info.Dimensions,
dataSource: source,
collectionName: info.CollectName,
textSplitter: textSplitter,
behavior: info.Behavior);
}
else if (info.FileMode == FileTypeSource.TEXT)
{
return await vectorDatabase.AddDocumentsFromAsync<FileLoader>(
embeddingModel,
dimensions: info.Dimensions,
dataSource: source,
collectionName: info.CollectName,
textSplitter: textSplitter,
behavior: info.Behavior);
}
throw new Exception("文件类型错误");
}
#endregion
}
}
using LangChain.Extensions;
namespace Peak.Abp.Silver.AI.Model.Request
{
public class ImportFileRequest
{
/// <summary>
/// 向量库名称
/// </summary>
public string CollectName { get; set; } = "";
/// <summary>
/// 向量库尺寸
/// </summary>
public int VectorSize { get; set; } = 768;
/// <summary>
/// 尺寸
/// </summary>
public int Dimensions { get; set; } = 1384;
/// <summary>
/// 分段尺寸
/// </summary>
public int ChunkSize { get; set; } = 1000;
/// <summary>
/// 重复尺寸
/// </summary>
public int ChunkOverlap { get; set; } = 200;
/// <summary>
/// 文件类型
/// </summary>
public FileTypeSource FileMode { get; set; } = FileTypeSource.PDF;
/// <summary>
/// 文件路径 支持本地文件和网址文件
/// </summary>
public string FilePath { get; set; } = "";
/// <summary>
/// 写入方式:
/// JustReturnCollectionIfCollectionIsAlreadyExists:如果集合已存在,则将返回而不做任何更改。
/// OverwriteExistingCollection:如果该集合已存在,则将被删除并重新创建。
/// AlwaysAddDocuments:如果集合已存在,则所有文档都将添加到现有集合中。
/// </summary>
public AddDocumentsToDatabaseBehavior Behavior { get; set; } = AddDocumentsToDatabaseBehavior.AlwaysAddDocuments;
/// <summary>
/// 是否无头 Excel 专用
/// </summary>
public bool IsHeader { get; set; }
}
/// <summary>
/// 文件类型
/// </summary>
public enum FileTypeSource
{
PDF = 1,
WORD = 2,
EXCELHEAD = 3,
EXCELNOHEAD = 4,
WEB = 5,
TEXT = 6
}
}
火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。
更多推荐
所有评论(0)