using LangChain.Databases;
using LangChain.Databases.Qdrant;
using LangChain.DocumentLoaders;
using LangChain.Extensions;
using LangChain.Providers;
using LangChain.Splitters.Text;
using Microsoft.Extensions.AI;
using Microsoft.SemanticKernel.Connectors.Qdrant;
using OllamaSharp;
using Peak.Abp.Silver.AI.Host.Core;
using Peak.Abp.Silver.AI.Model.Request;
using Peak.Abp.Silver.Basic;
using Qdrant.Client;
using Qdrant.Client.Grpc;

namespace Peak.Abp.Silver.AI.Host
{
    /// <summary>
    /// Qdrant向量
    /// </summary>
    public class QdrantBaseUtils: IQdrantBaseUtils
    {
        private Qdrant.Client.QdrantClient client;

        public QdrantBaseUtils()
        {
            var host = ConfigurationUtil.GetSection("Qdrant:Host").ToString();
            var port = ConfigurationUtil.GetSection("Qdrant:Port").ToInt();
            var apiKey = ConfigurationUtil.GetSection("Qdrant:Key").ToString();
            var https = ConfigurationUtil.GetSection("Qdrant:Https").ToInt();
            client = new QdrantClient(host: host, port: port, https: https == 1, apiKey: apiKey);
        }

        public QdrantBaseUtils(System.Uri address, string? apiKey = null)
        {
            client = new QdrantClient(address: address, apiKey: apiKey);
        }

        public QdrantBaseUtils(string host = "localhost", int port = 6334, bool https = false, string? apiKey = null)
        {
            client = new QdrantClient(host: host, port: port, https: https, apiKey: apiKey);
        }

        #region 向量库

        /// <summary>
        /// 向量库-创建
        /// </summary>
        /// <param name="collectName"></param>
        /// <param name="size"></param>
        /// <returns></returns>
        public async Task CreateCollectionAsync(string collectName, ulong size = 768, Distance distance = Distance.Cosine)
        {
            if (await client.CollectionExistsAsync(collectName))
            {
                return;
            }
            await client.CreateCollectionAsync(collectName, new VectorParams { Size = size, Distance = distance });
        }

        /// <summary>
        /// 向量库-判断是否存在
        /// </summary>
        /// <param name="collectName"></param>
        /// <returns></returns>
        public async Task<bool> CollectionExistsAsync(string collectName)
        {
            return await client.CollectionExistsAsync(collectName);
        }

        /// <summary>
        /// 向量库-向量库信息
        /// </summary>
        /// <param name="collectName"></param>
        /// <returns>
        /// 向量数量: {info.PointsCount}
        /// 向量维度: {info.VectorsConfig?.Params?.Size}
        /// 距离度量: {info.VectorsConfig?.Params?.Distance}
        /// </returns>
        public async Task<CollectionInfo> GetCollectionInfoAsync(string collectName)
        {
            return await client.GetCollectionInfoAsync(collectName);
        }

        /// <summary>
        /// 向量库-删除库
        /// </summary>
        /// <param name="collectName"></param>
        /// <returns></returns>
        public async Task DeleteCollectionAsync(string collectName)
        {
            if (!await client.CollectionExistsAsync(collectName))
            {
                return;
            }
            await client.DeleteCollectionAsync(collectName);
        }

        /// <summary>
        /// 向量库-列出所有向量库
        /// </summary>
        /// <returns></returns>
        public async Task<IReadOnlyList<string>> ListCollectionAsync()
        {
            return await client.ListCollectionsAsync();
        }

        #endregion

        #region 向量

        /// <summary>
        /// 向量-获取向量值
        /// </summary>
        /// <param name="value"></param>
        /// <returns></returns>
        public async Task<ReadOnlyMemory<float>> GetEmbeddingValueAsync(string value)
        {
            var embeddingId = ConfigurationUtil.GetSection("Ollama:EmbeddingModel").ToString(); 
            var apiUrl= ConfigurationUtil.GetSection("Ollama:Url").ToString();
            // 1. 初始化Ollama客户端
            var ollama = new OllamaApiClient(apiUrl);
            var queryVector = await ollama.GenerateVectorAsync(value, new EmbeddingGenerationOptions()
            {
                ModelId = embeddingId
            });
            return queryVector;
        }

        /// <summary>
        /// 向量-获取向量值
        /// </summary>
        /// <param name="value"></param>
        /// <returns></returns>
        public async Task<float[][]> GetEmbeddingValueAsync(List<string> value)
        {
            var ollama = new OllamaBaseUtils();
            var model = await ollama.GetEmbeddingAsync();
            var result = await model.CreateEmbeddingsAsync(EmbeddingRequest.ToEmbeddingRequest(value.ToArray()), new EmbeddingSettings());
            return result.Values;
        }

        /// <summary>
        /// 向量-插入
        /// </summary>
        /// <param name="collectName"></param>
        /// <param name="points"></param>
        /// <returns></returns>
        public async Task<bool> InsertVectorsAsync(string collectName, IReadOnlyList<PointVectors> points)
        {
            if(!await CollectionExistsAsync(collectName))
            {
                await CreateCollectionAsync(collectName);
            }
            var result = await client.UpdateVectorsAsync(collectName, points);
            if (result.Status == UpdateStatus.Completed)
            {
                return true;
            }
            return false;
        }

        /// <summary>
        /// 向量-插入
        /// </summary>
        /// <param name="collectName"></param>
        /// <param name="value"></param>
        /// <returns></returns>
        public async Task<bool> InsertVectorsAsync(string collectName, string value, long pointId = 0)
        {
            if (!await CollectionExistsAsync(collectName))
            {
                await CreateCollectionAsync(collectName);
            }
            if (pointId <= 0)
            {
                pointId = DateTime.Now.Ticks;
            }
            var values = await GetEmbeddingValueAsync(new List<string>() { value });
            var points = new List<PointStruct>()
            {
                new PointStruct
                {
                    Id = (ulong)pointId,
                    Vectors = values[0],
                    Payload =
                    {
                        ["page_content"] = value,
                    }
                }
            };
            var result = await client.UpsertAsync(collectName, points);
            if (result.Status == UpdateStatus.Completed)
            {
                return true;
            }
            return false;
        }

        /// <summary>
        /// 向量-批量插入
        /// </summary>
        /// <param name="collectName"></param>
        /// <param name="value"></param>
        /// <returns></returns>
        public async Task<bool> BatchVectorsAsync(string collectName, List<string> values)
        {
            if (!await CollectionExistsAsync(collectName))
            {
                await CreateCollectionAsync(collectName);
            }
            long pointId = 0;
            if (pointId <= 0)
            {
                pointId = DateTime.Now.Ticks;
            } 
            var value = await GetEmbeddingValueAsync(values);
            var points = new List<PointStruct>();
            for(int i=0;i< value.Length;i++)
            {
                pointId++;
                points.Add(new PointStruct
                {
                    Id = (ulong)pointId,
                    Vectors = value[i],
                    Payload =
                    {
                        ["page_content"] = values[i],
                    }
                });
            }
            if (points.Count == 0)
            {
                return false;
            }
            var result = await client.UpsertAsync(collectName, points);
            if (result.Status == UpdateStatus.Completed)
            {
                return true;
            }
            return false;
        }

        /// <summary>
        /// 向量-查询
        /// </summary>
        /// <param name="collectName"></param>
        /// <param name="queryText"></param>
        /// <returns></returns>
        public async Task<List<string>> SearchVectorsAsync(string collectName, string value, ulong limit = 5,string payloadName= "page_content")
        {
            if (!await CollectionExistsAsync(collectName))
            {
                return new List<string>();
            }
            var vector = await GetEmbeddingValueAsync(value);
            var searchResults = await client.SearchAsync(
                collectName,
                vector,
                limit: limit
            );
            var listValues = new List<string>();
            foreach (var point in searchResults)
            {
                listValues.Add(point.Payload[payloadName].StringValue);
            }
            return listValues;
        }

        /// <summary>
        /// 向量-查询
        /// </summary>
        /// <param name="collectName"></param>
        /// <param name="queryText"></param>
        /// <returns></returns>
        public async Task<IReadOnlyList<ScoredPoint>> SearchVectorsAsync(string collectName, string value, ulong limit = 5)
        {
            if (!await CollectionExistsAsync(collectName))
            {
                return null;
            }
            var vector = await GetEmbeddingValueAsync(value);
            var searchResults = await client.SearchAsync(
                collectName,
                vector,
                limit: limit
            );
            return searchResults;
        }

        #endregion

        #region 工具

        /// <summary>
        /// 文件/网址导入向量库
        /// </summary>
        /// <param name="info"></param>
        /// <returns></returns>
        public async Task<IVectorCollection> ImportFileVectorAsync(ImportFileRequest info)
        {
            OllamaBaseUtils ollamaBase = new OllamaBaseUtils();
            var embeddingModel = await ollamaBase.GetEmbeddingAsync();
            // 使用文本分割器
            var textSplitter = new RecursiveCharacterTextSplitter(
                chunkSize: info.ChunkSize,
                chunkOverlap: info.ChunkOverlap
            );
            DataSource source;
            if (info.FilePath.ToLower().StartsWith("http"))
            {
                source = DataSource.FromUrl(info.FilePath);
            }
            else
            {
                source = DataSource.FromPath(info.FilePath);
            }
            var urls = ConfigurationUtil.GetSection("Qdrant:Url").ToString();
            var vectorDatabase = new QdrantVectorDatabase(new QdrantMemoryStore(urls, info.VectorSize));
            IVectorCollection vectorCollection;
            if (info.FileMode == FileTypeSource.PDF)
            {
                return await vectorDatabase.AddDocumentsFromAsync<PdfPigPdfLoader>(
                                    embeddingModel,
                                    dimensions: info.Dimensions,
                                    dataSource: source,
                                    collectionName: info.CollectName,
                                    textSplitter: textSplitter,
                                    behavior: info.Behavior);
            }
            else if (info.FileMode == FileTypeSource.WORD)
            {
                return await vectorDatabase.AddDocumentsFromAsync<WordLoader>(
                                        embeddingModel,
                                        dimensions: info.Dimensions,
                                        dataSource: source,
                                        collectionName: info.CollectName,
                                        textSplitter: textSplitter,
                                        behavior: info.Behavior);
            }
            else if (info.FileMode == FileTypeSource.EXCELHEAD)
            {
                return await vectorDatabase.AddDocumentsFromAsync<FirstRowHeaderExcelLoader>(
                                            embeddingModel,
                                            dimensions: info.Dimensions,
                                            dataSource: source,
                                            collectionName: info.CollectName,
                                            textSplitter: textSplitter,
                                            behavior: info.Behavior);
            }
            else if (info.FileMode == FileTypeSource.EXCELNOHEAD)
            {
                return await vectorDatabase.AddDocumentsFromAsync<NoHeaderExcelLoader>(
                                               embeddingModel,
                                               dimensions: info.Dimensions,
                                               dataSource: source,
                                               collectionName: info.CollectName,
                                               textSplitter: textSplitter,
                                               behavior: info.Behavior);
            }
            else if (info.FileMode == FileTypeSource.WEB)
            {
                return await vectorDatabase.AddDocumentsFromAsync<HtmlLoader>(
                                               embeddingModel,
                                               dimensions: info.Dimensions,
                                               dataSource: source,
                                               collectionName: info.CollectName,
                                               textSplitter: textSplitter,
                                               behavior: info.Behavior);
            }
            else if (info.FileMode == FileTypeSource.TEXT)
            {
                return await vectorDatabase.AddDocumentsFromAsync<FileLoader>(
                                               embeddingModel,
                                               dimensions: info.Dimensions,
                                               dataSource: source,
                                               collectionName: info.CollectName,
                                               textSplitter: textSplitter,
                                               behavior: info.Behavior);
            }
            throw new Exception("文件类型错误");
        }
         
        #endregion


    }
}
using LangChain.Extensions;

namespace Peak.Abp.Silver.AI.Model.Request
{
    public class ImportFileRequest
    {
        /// <summary>
        /// 向量库名称
        /// </summary>
        public string CollectName { get; set; } = "";

        /// <summary>
        /// 向量库尺寸
        /// </summary>
        public int VectorSize { get; set; } = 768;

        /// <summary>
        /// 尺寸
        /// </summary>
        public int Dimensions { get; set; } = 1384;

        /// <summary>
        /// 分段尺寸
        /// </summary>
        public int ChunkSize { get; set; } = 1000;

        /// <summary>
        /// 重复尺寸
        /// </summary>
        public int ChunkOverlap { get; set; } = 200;

        /// <summary>
        /// 文件类型
        /// </summary>
        public FileTypeSource FileMode { get; set; } = FileTypeSource.PDF;

        /// <summary>
        /// 文件路径  支持本地文件和网址文件
        /// </summary>
        public string FilePath { get; set; } = "";

        /// <summary>
        /// 写入方式:
        /// JustReturnCollectionIfCollectionIsAlreadyExists:如果集合已存在,则将返回而不做任何更改。
        /// OverwriteExistingCollection:如果该集合已存在,则将被删除并重新创建。
        /// AlwaysAddDocuments:如果集合已存在,则所有文档都将添加到现有集合中。
        /// </summary>
        public AddDocumentsToDatabaseBehavior Behavior { get; set; } = AddDocumentsToDatabaseBehavior.AlwaysAddDocuments;

        /// <summary>
        /// 是否无头 Excel  专用
        /// </summary>
        public bool IsHeader {  get; set; }

    }

    /// <summary>
    /// 文件类型
    /// </summary>
    public enum FileTypeSource
    {
        PDF = 1,
        WORD = 2,
        EXCELHEAD = 3,
        EXCELNOHEAD = 4,
        WEB = 5,
        TEXT = 6
    }


}

Logo

火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。

更多推荐