一、系统架构设计
本教程将基于ThinkPHP 6.1构建一个知识图谱驱动的智能问答系统,实现自然语言理解和精准答案检索。
技术架构:
- 核心框架:ThinkPHP 6.1 + Swoole
- 知识存储:Neo4j图数据库 + MySQL
- NLP处理:Jieba分词 + 腾讯云NLP
- 搜索引擎:Elasticsearch 8.x
- 前端交互:Vue3 + Element Plus
核心功能模块:
- 知识图谱构建系统
- 问句语义解析引擎
- 多策略答案检索
- 对话上下文管理
- 智能推荐系统
二、项目初始化与配置
1. 项目创建与扩展安装
# 创建ThinkPHP6项目
composer create-project topthink/think tp-qa-system
# 安装必要扩展
cd tp-qa-system
composer require topthink/think-swoole
composer require jieba-php/jieba-php
composer require elasticsearch/elasticsearch
# 配置数据库连接
// config/database.php
return [
'connections' => [
'mysql' => [
'type' => 'mysql',
'hostname' => '127.0.0.1',
'database' => 'qa_system',
'username' => 'root',
'password' => '',
'charset' => 'utf8mb4',
],
'neo4j' => [
'type' => 'neo4j',
'host' => 'bolt://localhost',
'username' => 'neo4j',
'password' => 'password',
]
]
];
2. 目录结构设计
tp-qa-system/
├── app/
│ ├── controller/
│ ├── service/ # 服务层
│ │ ├── QaService.php
│ │ ├── NlpService.php
│ │ └── KgService.php
│ ├── model/
│ │ ├── entity/ # 知识实体
│ │ └── relation/ # 知识关系
│ ├── library/ # 类库
│ │ ├── Neo4jClient.php
│ │ └── EsClient.php
│ ├── middleware/ # 中间件
│ └── util/ # 工具类
├── config/
│ ├── nlp.php # NLP配置
│ └── elastic.php # ES配置
├── extend/
└── public/
三、知识图谱构建
1. 知识实体建模
// app/model/entity/Concept.php
namespace appmodelentity;
use thinkModel;
class Concept extends Model
{
protected $connection = 'neo4j';
// 定义节点标签
public $label = 'Concept';
// 定义节点属性
protected $schema = [
'id' => 'string',
'name' => 'string',
'desc' => 'string',
'aliases' => 'array'
];
// 创建知识节点
public function createNode($data)
{
$query = "CREATE (n:{$this->label} {";
$params = [];
foreach ($data as $key => $value) {
$query .= "$key: $$key,";
$params[$key] = $value;
}
$query = rtrim($query, ',') . "}) RETURN n";
return $this->query($query, $params);
}
// 查找相似概念
public function findSimilar($name, $limit = 5)
{
$query = "MATCH (n:{$this->label})
WHERE n.name CONTAINS $name OR $name IN n.aliases
RETURN n LIMIT $limit";
return $this->query($query, ['name' => $name]);
}
}
2. 知识关系管理
// app/model/relation/Relation.php
namespace appmodelrelation;
use thinkModel;
class Relation extends Model
{
protected $connection = 'neo4j';
// 创建关系
public function createRelation($fromId, $toId, $type, $props = [])
{
$query = "MATCH (a), (b)
WHERE a.id = $fromId AND b.id = $toId
CREATE (a)-[r:{$type} {";
$params = ['fromId' => $fromId, 'toId' => $toId];
foreach ($props as $key => $value) {
$query .= "$key: $$key,";
$params[$key] = $value;
}
$query = rtrim($query, ',') . "}]->(b) RETURN r";
return $this->query($query, $params);
}
// 查找两实体间关系
public function findRelations($fromId, $toId, $depth = 3)
{
$query = "MATCH path = (a)-[*1..$depth]-(b)
WHERE a.id = $fromId AND b.id = $toId
RETURN path";
return $this->query($query, [
'fromId' => $fromId,
'toId' => $toId
]);
}
}
四、自然语言处理
1. 问句解析服务
// app/service/NlpService.php
namespace appservice;
use FukuballJiebaJieba;
use FukuballJiebaFinalseg;
class NlpService
{
private $jieba;
public function __construct()
{
$this->jieba = new Jieba();
$this->jieba->init([
'dict' => 'small',
'user_dict' => config('nlp.user_dict')
]);
Finalseg::init();
}
// 问句分词与词性标注
public function segment($question)
{
$words = $this->jieba->cut($question);
$tags = $this->jieba->tag($words);
return array_map(function($word, $tag) {
return [
'word' => $word,
'tag' => $tag,
'is_entity' => $this->isEntity($word, $tag)
];
}, $words, $tags);
}
// 识别实体词
private function isEntity($word, $tag)
{
$entityTags = ['n', 'nr', 'ns', 'nt', 'nz'];
return in_array($tag, $entityTags) && mb_strlen($word) > 1;
}
// 问句分类
public function classify($question)
{
$types = [
'definition' => ['是什么', '什么是', '定义'],
'relation' => ['关系', '关联', '联系'],
'property' => ['属性', '特点', '特征']
];
foreach ($types as $type => $keywords) {
foreach ($keywords as $keyword) {
if (strpos($question, $keyword) !== false) {
return $type;
}
}
}
return 'general';
}
}
2. 语义相似度计算
// 使用腾讯云NLP计算相似度
public function semanticSimilarity($text1, $text2)
{
$client = new QcloudApiQcloudApi([
'module' => 'wenzhi',
'secretId' => config('nlp.secret_id'),
'secretKey' => config('nlp.secret_key')
]);
$response = $client->TextSimilarity([
'text1' => $text1,
'text2' => $text2
]);
if ($response === false) {
throw new Exception($client->getError());
}
return $response['similarity'] ?? 0;
}
// 本地词向量相似度计算
public function wordVectorSimilarity($words1, $words2)
{
$vector1 = $this->getSentenceVector($words1);
$vector2 = $this->getSentenceVector($words2);
if (empty($vector1) || empty($vector2)) {
return 0;
}
$dotProduct = 0;
$magnitude1 = 0;
$magnitude2 = 0;
foreach ($vector1 as $word => $value) {
$dotProduct += $value * ($vector2[$word] ?? 0);
$magnitude1 += $value * $value;
}
foreach ($vector2 as $value) {
$magnitude2 += $value * $value;
}
$magnitude = sqrt($magnitude1) * sqrt($magnitude2);
return $magnitude ? $dotProduct / $magnitude : 0;
}
五、问答引擎实现
1. 多策略答案检索
// app/service/QaService.php
namespace appservice;
use appmodelentityConcept;
use appmodelrelationRelation;
use applibraryEsClient;
class QaService
{
private $nlpService;
private $esClient;
public function __construct()
{
$this->nlpService = new NlpService();
$this->esClient = new EsClient();
}
// 问答主入口
public function answer($question, $context = [])
{
// 1. 问句解析
$segments = $this->nlpService->segment($question);
$questionType = $this->nlpService->classify($question);
// 2. 提取实体
$entities = array_filter($segments, fn($s) => $s['is_entity']);
$entityNames = array_column($entities, 'word');
// 3. 多策略检索
$answers = [];
// 知识图谱检索
if (!empty($entityNames)) {
$answers = array_merge(
$answers,
$this->searchFromKnowledgeGraph($entityNames, $questionType)
);
}
// 全文检索
$answers = array_merge(
$answers,
$this->searchFromElasticsearch($question)
);
// 4. 答案排序
usort($answers, function($a, $b) {
return $b['score'] $a['score'];
});
return array_slice($answers, 0, 3);
}
// 知识图谱检索
private function searchFromKnowledgeGraph($entities, $questionType)
{
$conceptModel = new Concept();
$relationModel = new Relation();
$results = [];
foreach ($entities as $entity) {
// 查找概念定义
if ($questionType === 'definition') {
$nodes = $conceptModel->findSimilar($entity);
foreach ($nodes as $node) {
$results[] = [
'type' => 'definition',
'content' => $node['desc'],
'source' => $node['name'],
'score' => $this->calculateScore($entity, $node['name'])
];
}
}
// 查找关系路径
if ($questionType === 'relation' && count($entities) >= 2) {
$paths = $relationModel->findRelations($entities[0], $entities[1]);
foreach ($paths as $path) {
$results[] = [
'type' => 'relation',
'content' => $this->formatPath($path),
'source' => implode('->', $entities),
'score' => 0.8
];
}
}
}
return $results;
}
// Elasticsearch检索
private function searchFromElasticsearch($question)
{
$results = [];
$response = $this->esClient->search([
'index' => 'qa_index',
'body' => [
'query' => [
'multi_match' => [
'query' => $question,
'fields' => ['question^3', 'answer']
]
]
]
]);
foreach ($response['hits']['hits'] as $hit) {
$results[] = [
'type' => 'document',
'content' => $hit['_source']['answer'],
'source' => $hit['_source']['title'],
'score' => $hit['_score'] / 10 // 归一化
];
}
return $results;
}
}
2. 对话上下文管理
// app/service/DialogService.php
namespace appservice;
use thinkfacadeCache;
class DialogService
{
private $expire = 1800; // 30分钟
// 创建对话会话
public function createSession($userId)
{
$sessionId = md5($userId . microtime());
Cache::set("dialog:$sessionId", [
'user_id' => $userId,
'context' => [],
'created_at' => time()
], $this->expire);
return $sessionId;
}
// 更新对话上下文
public function updateContext($sessionId, $question, $answer)
{
$data = Cache::get("dialog:$sessionId");
if (!$data) return false;
$data['context'][] = [
'question' => $question,
'answer' => $answer,
'time' => time()
];
// 保留最近5轮对话
if (count($data['context']) > 5) {
array_shift($data['context']);
}
Cache::set("dialog:$sessionId", $data, $this->expire);
return true;
}
// 获取上下文摘要
public function getContextSummary($sessionId)
{
$data = Cache::get("dialog:$sessionId");
if (!$data || empty($data['context'])) return '';
$summary = "之前的对话内容:n";
foreach ($data['context'] as $item) {
$summary .= "问:{$item['question']}n";
$summary .= "答:{$item['answer']}nn";
}
return $summary;
}
}
六、性能优化策略
1. 缓存优化方案
// app/service/KgService.php
namespace appservice;
use thinkfacadeCache;
class KgService
{
private $cachePrefix = 'kg:';
private $expire = 86400; // 24小时
// 带缓存的实体查询
public function getConceptWithCache($name)
{
$cacheKey = $this->cachePrefix . 'concept:' . md5($name);
// 先查本地缓存
if (Cache::has($cacheKey)) {
return Cache::get($cacheKey);
}
// 查询数据库
$concept = (new Concept())->findSimilar($name);
if ($concept) {
Cache::set($cacheKey, $concept, $this->expire);
}
return $concept;
}
// 批量预热缓存
public function warmUpCache($conceptNames)
{
$conceptModel = new Concept();
foreach ($conceptNames as $name) {
$cacheKey = $this->cachePrefix . 'concept:' . md5($name);
if (!Cache::has($cacheKey)) {
$concept = $conceptModel->findSimilar($name);
if ($concept) {
Cache::set($cacheKey, $concept, $this->expire);
}
}
}
}
// 清除相关缓存
public function clearCache($conceptName)
{
$cacheKey = $this->cachePrefix . 'concept:' . md5($conceptName);
Cache::delete($cacheKey);
}
}
2. Swoole协程优化
// 并行查询优化
public function parallelSearch($question)
{
$segments = $this->nlpService->segment($question);
$entities = array_column(array_filter($segments,
fn($s) => $s['is_entity']), 'word');
// 使用协程并行查询
$results = SwooleCoroutinebatch([
'kg' => function() use ($entities) {
return $this->searchFromKnowledgeGraph($entities);
},
'es' => function() use ($question) {
return $this->searchFromElasticsearch($question);
},
'faq' => function() use ($question) {
return $this->searchFromFaq($question);
}
]);
// 合并结果并排序
$answers = array_merge(
$results['kg'],
$results['es'],
$results['faq']
);
usort($answers, fn($a, $b) => $b['score'] $a['score']);
return array_slice($answers, 0, 3);
}
// 协程HTTP客户端
public function queryRemoteNlp($text)
{
return SwooleCoroutineHttppost('http://nlp-api.com/process', [
'headers' => ['Content-Type' => 'application/json'],
'body' => json_encode(['text' => $text])
]);
}
七、系统部署方案
1. Docker生产环境部署
# docker-compose.yml
version: '3.8'
services:
app:
build:
context: .
dockerfile: Dockerfile
image: tp-qa-system
container_name: qa-app
restart: unless-stopped
ports:
- "9501:9501"
depends_on:
- neo4j
- elasticsearch
- redis
neo4j:
image: neo4j:4.4
container_name: qa-neo4j
ports:
- "7474:7474"
- "7687:7687"
volumes:
- neo4j_data:/data
environment:
NEO4J_AUTH: neo4j/password
elasticsearch:
image: elasticsearch:8.5.0
container_name: qa-es
environment:
- discovery.type=single-node
- xpack.security.enabled=false
ports:
- "9200:9200"
volumes:
- es_data:/usr/share/elasticsearch/data
volumes:
neo4j_data:
es_data:
八、总结与扩展
本教程构建了一个智能问答系统:
- 实现了知识图谱构建
- 开发了自然语言处理模块
- 设计了多策略问答引擎
- 优化了系统性能
- 配置了生产环境
扩展方向:
- 深度学习模型集成
- 多语言支持
- 语音交互接口
- 知识自动抽取
完整项目代码已开源:https://github.com/example/tp-qa-system