SpringBoot 集成 LangChain4j 内嵌模型 RAG 搜索
LangChain4j 提供了一系列官方封装的 ONNX 模型包,全部可离线使用。
·
SpringBoot 集成 LangChain4j 内嵌模型 RAG 搜索
LangChain4j 提供了一系列官方封装的 ONNX 模型包,全部可离线使用。
| 模型名 | artifactId | 语言 | 向量维度 | 特点 |
|---|---|---|---|---|
| all-MiniLM-L6-v2 | langchain4j-embeddings-all-minilm-l6-v2 |
英文 | 384 | 轻量、快速、英文检索推荐 |
| bge-small-en-v1.5-q | langchain4j-embeddings-bge-small-en-v15-q |
英文 | 384 | 比MiniLM准确度更高(特别是长句) |
| bge-small-zh-v1.5-q | langchain4j-embeddings-bge-small-zh-v15-q |
中文 | 384 | 中文优化模型,性能优异 |
| bge-large-zh-v1.5-q | langchain4j-embeddings-bge-large-zh-v15-q |
中文 | 1024 | 高精度中文语义模型 |
| e5-small-v2 | langchain4j-embeddings-e5-small-v2 |
英文 | 384 | 优化了通用信息检索性能(英文) |
| gte-small | langchain4j-embeddings-gte-small |
英文 | 384 | 平衡速度与精度的通用英文模型 |
| gte-base | langchain4j-embeddings-gte-base |
英文 | 768 | 准确度更高,速度略慢 |
| gte-large | langchain4j-embeddings-gte-large |
英文 | 1024 | 高精度版本(对语义检索更强) |
| bge-m3 | langchain4j-embeddings-bge-m3 |
多语言 | 1024 | 多语言语义统一空间 |
1 依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.5.6</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.xu</groupId>
<artifactId>lang-chain-redis</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>lang-chain-redis</name>
<description>Demo project for Spring Boot</description>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-bom</artifactId>
<version>1.7.1</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-community-bom</artifactId>
<version>1.7.1-beta14</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<properties>
<java.version>25</java.version>
</properties>
<dependencies>
<!--Spring Boot 的 Web starter-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!--LangChain4j 的 Spring Boot starter-->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-spring-boot-starter</artifactId>
</dependency>
<!--LangChain4j 对 redis 向量扩展(vector)的支持库-->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-community-redis-spring-boot-starter</artifactId>
</dependency>
<!-- 内嵌模型 -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
</dependency>
<!-- 文档解析 -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-parser-apache-tika</artifactId>
</dependency>
<!--Spring Boot 的开发工具,提供热部署、自动重启等功能,加速开发过程-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<scope>runtime</scope>
<optional>true</optional>
</dependency>
<!--简化 Java 代码的工具库-->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<!--Spring Boot 的测试 starter-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<releases>
<enabled>false</enabled>
</releases>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<releases>
<enabled>false</enabled>
</releases>
</pluginRepository>
</pluginRepositories>
</project>
2 配置
server:
port: 8080
servlet:
context-path: /
spring:
application:
name: lang-chain-rag
logging:
file:
name: logs/ lang-chain-rag.log
level:
root: INFO
com.xu: INFO
langchain4j:
community:
redis:
host: localhost
port: 6379
user: langchain4j
password: 123456
indexName: langchain4j
prefix: ":lc4j:"
dimension: 384
3 代码
3.1 RagConf
package com.xu.conf;
import dev.langchain4j.community.store.embedding.redis.RedisEmbeddingStore;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.allminilml6v2.AllMiniLmL6V2EmbeddingModelFactory;
import dev.langchain4j.store.embedding.EmbeddingStore;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import redis.clients.jedis.JedisPooled;
@Configuration
public class RagConf {
@Bean
public EmbeddingModel embeddingModel() {
return new AllMiniLmL6V2EmbeddingModelFactory().create();
}
@Bean
@Primary
public EmbeddingStore<TextSegment> embeddingStore() {
return RedisEmbeddingStore.builder()
.jedisPooled(new JedisPooled("127.0.0.1", 6379, "langchain4j", "123456"))
.dimension(384)
.build();
}
}
3.2 RagController
package com.xu.controller;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import dev.langchain4j.data.document.splitter.DocumentByWordSplitter;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
@Slf4j
@RestController
@AllArgsConstructor
@RequestMapping("/rag")
public class RagController {
private final EmbeddingModel embeddingModel;
private final EmbeddingStore<TextSegment> embeddingStore;
/**
* RAG数据入库
*
* @return 数量
*/
@GetMapping("/save")
public Object chat() {
// 1️⃣ 使用 Tika 解析 docx/pdf 等文件
var documents = FileSystemDocumentLoader.loadDocuments(
"D:\\SourceCode\\简历",
new ApacheTikaDocumentParser()
);
// 2️⃣ 定义自定义的文本拆分器 chunkSize=50 表示每段最大 50 tokens,overlap=10 表示重叠 10 tokens
var splitter = new DocumentByWordSplitter(50, 10);
// 3️⃣ 构建带自定义拆分器的 ingestor
var ingestor = EmbeddingStoreIngestor.builder()
.documentSplitter(splitter)
.embeddingStore(embeddingStore)
.embeddingModel(embeddingModel)
.build();
// 4️⃣ 执行嵌入生成与存储
ingestor.ingest(documents);
return documents.size();
}
/**
* RAG数据查询
*
* @param content 查询
* @return 结果
*/
@GetMapping("/search")
public Object search(@RequestParam String content) {
var embedding = embeddingModel.embed(TextSegment.from(content)).content();
var search = EmbeddingSearchRequest.builder()
.queryEmbedding(embedding)
.maxResults(5)
.minScore(0.6)
.build();
var matches = embeddingStore.search(search).matches();
var results = matches.stream()
.map(match -> {
Map<String, Object> map = new HashMap<>();
map.put("embeddingScore", match.score());
map.put("embeddingId", match.embeddingId());
// 添加文本内容以便调试
map.put("embedded", match.embedded().text());
return map;
}).collect(Collectors.toList());
return ResponseEntity.ok(results);
}
}
4 结果


火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。
更多推荐
所有评论(0)