SpringBoot 集成 LangChain4j 内嵌模型 RAG 搜索

LangChain4j 提供了一系列官方封装的 ONNX 模型包,全部可离线使用。

模型名 artifactId 语言 向量维度 特点
all-MiniLM-L6-v2 langchain4j-embeddings-all-minilm-l6-v2 英文 384 轻量、快速、英文检索推荐
bge-small-en-v1.5-q langchain4j-embeddings-bge-small-en-v15-q 英文 384 比MiniLM准确度更高(特别是长句)
bge-small-zh-v1.5-q langchain4j-embeddings-bge-small-zh-v15-q 中文 384 中文优化模型,性能优异
bge-large-zh-v1.5-q langchain4j-embeddings-bge-large-zh-v15-q 中文 1024 高精度中文语义模型
e5-small-v2 langchain4j-embeddings-e5-small-v2 英文 384 优化了通用信息检索性能(英文)
gte-small langchain4j-embeddings-gte-small 英文 384 平衡速度与精度的通用英文模型
gte-base langchain4j-embeddings-gte-base 英文 768 准确度更高,速度略慢
gte-large langchain4j-embeddings-gte-large 英文 1024 高精度版本(对语义检索更强)
bge-m3 langchain4j-embeddings-bge-m3 多语言 1024 多语言语义统一空间

1 依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">

    <modelVersion>4.0.0</modelVersion>

    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>3.5.6</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>

    <groupId>com.xu</groupId>
    <artifactId>lang-chain-redis</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>lang-chain-redis</name>

    <description>Demo project for Spring Boot</description>

    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>dev.langchain4j</groupId>
                <artifactId>langchain4j-bom</artifactId>
                <version>1.7.1</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
            <dependency>
                <groupId>dev.langchain4j</groupId>
                <artifactId>langchain4j-community-bom</artifactId>
                <version>1.7.1-beta14</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>

    <properties>
        <java.version>25</java.version>
    </properties>

    <dependencies>

        <!--Spring Boot 的 Web starter-->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <!--LangChain4j 的 Spring Boot starter-->
        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j-spring-boot-starter</artifactId>
        </dependency>

        <!--LangChain4j 对 redis 向量扩展(vector)的支持库-->
        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j-community-redis-spring-boot-starter</artifactId>
        </dependency>

        <!-- 内嵌模型 -->
        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
        </dependency>

        <!-- 文档解析 -->
        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j-document-parser-apache-tika</artifactId>
        </dependency>

        <!--Spring Boot 的开发工具,提供热部署、自动重启等功能,加速开发过程-->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-devtools</artifactId>
            <scope>runtime</scope>
            <optional>true</optional>
        </dependency>

        <!--简化 Java 代码的工具库-->
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>

        <!--Spring Boot 的测试 starter-->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <annotationProcessorPaths>
                        <path>
                            <groupId>org.projectlombok</groupId>
                            <artifactId>lombok</artifactId>
                        </path>
                    </annotationProcessorPaths>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <configuration>
                    <excludes>
                        <exclude>
                            <groupId>org.projectlombok</groupId>
                            <artifactId>lombok</artifactId>
                        </exclude>
                    </excludes>
                </configuration>
            </plugin>
        </plugins>
    </build>
    <repositories>
        <repository>
            <id>spring-snapshots</id>
            <name>Spring Snapshots</name>
            <url>https://repo.spring.io/snapshot</url>
            <releases>
                <enabled>false</enabled>
            </releases>
        </repository>
    </repositories>
    <pluginRepositories>
        <pluginRepository>
            <id>spring-snapshots</id>
            <name>Spring Snapshots</name>
            <url>https://repo.spring.io/snapshot</url>
            <releases>
                <enabled>false</enabled>
            </releases>
        </pluginRepository>
    </pluginRepositories>

</project>

2 配置

server:
  port: 8080
  servlet:
    context-path: /

spring:
  application:
    name: lang-chain-rag

logging:
  file:
    name: logs/ lang-chain-rag.log
  level:
    root: INFO
    com.xu: INFO

langchain4j:
  community:
    redis:
      host: localhost
      port: 6379
      user: langchain4j
      password: 123456
      indexName: langchain4j
      prefix: ":lc4j:"
      dimension: 384

3 代码

3.1 RagConf

package com.xu.conf;

import dev.langchain4j.community.store.embedding.redis.RedisEmbeddingStore;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.allminilml6v2.AllMiniLmL6V2EmbeddingModelFactory;
import dev.langchain4j.store.embedding.EmbeddingStore;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import redis.clients.jedis.JedisPooled;

@Configuration
public class RagConf {

    @Bean
    public EmbeddingModel embeddingModel() {
        return new AllMiniLmL6V2EmbeddingModelFactory().create();
    }

    @Bean
    @Primary
    public EmbeddingStore<TextSegment> embeddingStore() {
        return RedisEmbeddingStore.builder()
                .jedisPooled(new JedisPooled("127.0.0.1", 6379, "langchain4j", "123456"))
                .dimension(384)
                .build();
    }

}

3.2 RagController

package com.xu.controller;

import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import dev.langchain4j.data.document.splitter.DocumentByWordSplitter;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;

import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;

@Slf4j
@RestController
@AllArgsConstructor
@RequestMapping("/rag")
public class RagController {

    private final EmbeddingModel embeddingModel;

    private final EmbeddingStore<TextSegment> embeddingStore;

    /**
     * RAG数据入库
     *
     * @return 数量
     */
    @GetMapping("/save")
    public Object chat() {
        // 1️⃣ 使用 Tika 解析 docx/pdf 等文件
        var documents = FileSystemDocumentLoader.loadDocuments(
                "D:\\SourceCode\\简历",
                new ApacheTikaDocumentParser()
        );
        // 2️⃣ 定义自定义的文本拆分器 chunkSize=50 表示每段最大 50 tokens,overlap=10 表示重叠 10 tokens
        var splitter = new DocumentByWordSplitter(50, 10);
        // 3️⃣ 构建带自定义拆分器的 ingestor
        var ingestor = EmbeddingStoreIngestor.builder()
                .documentSplitter(splitter)
                .embeddingStore(embeddingStore)
                .embeddingModel(embeddingModel)
                .build();
        // 4️⃣ 执行嵌入生成与存储
        ingestor.ingest(documents);
        return documents.size();
    }

    /**
     * RAG数据查询
     *
     * @param content 查询
     * @return 结果
     */
    @GetMapping("/search")
    public Object search(@RequestParam String content) {
        var embedding = embeddingModel.embed(TextSegment.from(content)).content();

        var search = EmbeddingSearchRequest.builder()
                .queryEmbedding(embedding)
                .maxResults(5)
                .minScore(0.6)
                .build();

        var matches = embeddingStore.search(search).matches();

        var results = matches.stream()
                .map(match -> {
                    Map<String, Object> map = new HashMap<>();
                    map.put("embeddingScore", match.score());
                    map.put("embeddingId", match.embeddingId());
                    // 添加文本内容以便调试
                    map.put("embedded", match.embedded().text());
                    return map;
                }).collect(Collectors.toList());

        return ResponseEntity.ok(results);
    }

}

4 结果

在这里插入图片描述
在这里插入图片描述

Logo

火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。

更多推荐