ThinkPHP6知识图谱问答系统开发 | 自然语言处理与智能搜索实战

2025-08-14 0 776

一、系统架构设计

本教程将基于ThinkPHP 6.1构建一个知识图谱驱动的智能问答系统,实现自然语言理解和精准答案检索。

技术架构:

  • 核心框架:ThinkPHP 6.1 + Swoole
  • 知识存储:Neo4j图数据库 + MySQL
  • NLP处理:Jieba分词 + 腾讯云NLP
  • 搜索引擎:Elasticsearch 8.x
  • 前端交互:Vue3 + Element Plus

核心功能模块:

  1. 知识图谱构建系统
  2. 问句语义解析引擎
  3. 多策略答案检索
  4. 对话上下文管理
  5. 智能推荐系统

二、项目初始化与配置

1. 项目创建与扩展安装

# 创建ThinkPHP6项目
composer create-project topthink/think tp-qa-system

# 安装必要扩展
cd tp-qa-system
composer require topthink/think-swoole
composer require jieba-php/jieba-php
composer require elasticsearch/elasticsearch

# 配置数据库连接
// config/database.php
return [
    'connections' => [
        'mysql' => [
            'type'      => 'mysql',
            'hostname'  => '127.0.0.1',
            'database'  => 'qa_system',
            'username'  => 'root',
            'password'  => '',
            'charset'   => 'utf8mb4',
        ],
        'neo4j' => [
            'type'      => 'neo4j',
            'host'      => 'bolt://localhost',
            'username'  => 'neo4j',
            'password'  => 'password',
        ]
    ]
];

2. 目录结构设计

tp-qa-system/
├── app/
│   ├── controller/
│   ├── service/      # 服务层
│   │   ├── QaService.php
│   │   ├── NlpService.php
│   │   └── KgService.php
│   ├── model/
│   │   ├── entity/   # 知识实体
│   │   └── relation/ # 知识关系
│   ├── library/      # 类库
│   │   ├── Neo4jClient.php
│   │   └── EsClient.php
│   ├── middleware/   # 中间件
│   └── util/         # 工具类
├── config/
│   ├── nlp.php       # NLP配置
│   └── elastic.php   # ES配置
├── extend/
└── public/

三、知识图谱构建

1. 知识实体建模

// app/model/entity/Concept.php
namespace appmodelentity;

use thinkModel;

class Concept extends Model
{
    protected $connection = 'neo4j';
    
    // 定义节点标签
    public $label = 'Concept';
    
    // 定义节点属性
    protected $schema = [
        'id'      => 'string',
        'name'    => 'string',
        'desc'    => 'string',
        'aliases' => 'array'
    ];
    
    // 创建知识节点
    public function createNode($data)
    {
        $query = "CREATE (n:{$this->label} {";
        $params = [];
        
        foreach ($data as $key => $value) {
            $query .= "$key: $$key,";
            $params[$key] = $value;
        }
        
        $query = rtrim($query, ',') . "}) RETURN n";
        
        return $this->query($query, $params);
    }
    
    // 查找相似概念
    public function findSimilar($name, $limit = 5)
    {
        $query = "MATCH (n:{$this->label}) 
                 WHERE n.name CONTAINS $name OR $name IN n.aliases
                 RETURN n LIMIT $limit";
        
        return $this->query($query, ['name' => $name]);
    }
}

2. 知识关系管理

// app/model/relation/Relation.php
namespace appmodelrelation;

use thinkModel;

class Relation extends Model
{
    protected $connection = 'neo4j';
    
    // 创建关系
    public function createRelation($fromId, $toId, $type, $props = [])
    {
        $query = "MATCH (a), (b) 
                 WHERE a.id = $fromId AND b.id = $toId
                 CREATE (a)-[r:{$type} {";
        
        $params = ['fromId' => $fromId, 'toId' => $toId];
        
        foreach ($props as $key => $value) {
            $query .= "$key: $$key,";
            $params[$key] = $value;
        }
        
        $query = rtrim($query, ',') . "}]->(b) RETURN r";
        
        return $this->query($query, $params);
    }
    
    // 查找两实体间关系
    public function findRelations($fromId, $toId, $depth = 3)
    {
        $query = "MATCH path = (a)-[*1..$depth]-(b)
                 WHERE a.id = $fromId AND b.id = $toId
                 RETURN path";
        
        return $this->query($query, [
            'fromId' => $fromId,
            'toId' => $toId
        ]);
    }
}

四、自然语言处理

1. 问句解析服务

// app/service/NlpService.php
namespace appservice;

use FukuballJiebaJieba;
use FukuballJiebaFinalseg;

class NlpService
{
    private $jieba;
    
    public function __construct()
    {
        $this->jieba = new Jieba();
        $this->jieba->init([
            'dict' => 'small',
            'user_dict' => config('nlp.user_dict')
        ]);
        Finalseg::init();
    }
    
    // 问句分词与词性标注
    public function segment($question)
    {
        $words = $this->jieba->cut($question);
        $tags = $this->jieba->tag($words);
        
        return array_map(function($word, $tag) {
            return [
                'word' => $word,
                'tag' => $tag,
                'is_entity' => $this->isEntity($word, $tag)
            ];
        }, $words, $tags);
    }
    
    // 识别实体词
    private function isEntity($word, $tag)
    {
        $entityTags = ['n', 'nr', 'ns', 'nt', 'nz'];
        return in_array($tag, $entityTags) && mb_strlen($word) > 1;
    }
    
    // 问句分类
    public function classify($question)
    {
        $types = [
            'definition' => ['是什么', '什么是', '定义'],
            'relation' => ['关系', '关联', '联系'],
            'property' => ['属性', '特点', '特征']
        ];
        
        foreach ($types as $type => $keywords) {
            foreach ($keywords as $keyword) {
                if (strpos($question, $keyword) !== false) {
                    return $type;
                }
            }
        }
        
        return 'general';
    }
}

2. 语义相似度计算

// 使用腾讯云NLP计算相似度
public function semanticSimilarity($text1, $text2)
{
    $client = new QcloudApiQcloudApi([
        'module' => 'wenzhi',
        'secretId' => config('nlp.secret_id'),
        'secretKey' => config('nlp.secret_key')
    ]);
    
    $response = $client->TextSimilarity([
        'text1' => $text1,
        'text2' => $text2
    ]);
    
    if ($response === false) {
        throw new Exception($client->getError());
    }
    
    return $response['similarity'] ?? 0;
}

// 本地词向量相似度计算
public function wordVectorSimilarity($words1, $words2)
{
    $vector1 = $this->getSentenceVector($words1);
    $vector2 = $this->getSentenceVector($words2);
    
    if (empty($vector1) || empty($vector2)) {
        return 0;
    }
    
    $dotProduct = 0;
    $magnitude1 = 0;
    $magnitude2 = 0;
    
    foreach ($vector1 as $word => $value) {
        $dotProduct += $value * ($vector2[$word] ?? 0);
        $magnitude1 += $value * $value;
    }
    
    foreach ($vector2 as $value) {
        $magnitude2 += $value * $value;
    }
    
    $magnitude = sqrt($magnitude1) * sqrt($magnitude2);
    
    return $magnitude ? $dotProduct / $magnitude : 0;
}

五、问答引擎实现

1. 多策略答案检索

// app/service/QaService.php
namespace appservice;

use appmodelentityConcept;
use appmodelrelationRelation;
use applibraryEsClient;

class QaService
{
    private $nlpService;
    private $esClient;
    
    public function __construct()
    {
        $this->nlpService = new NlpService();
        $this->esClient = new EsClient();
    }
    
    // 问答主入口
    public function answer($question, $context = [])
    {
        // 1. 问句解析
        $segments = $this->nlpService->segment($question);
        $questionType = $this->nlpService->classify($question);
        
        // 2. 提取实体
        $entities = array_filter($segments, fn($s) => $s['is_entity']);
        $entityNames = array_column($entities, 'word');
        
        // 3. 多策略检索
        $answers = [];
        
        // 知识图谱检索
        if (!empty($entityNames)) {
            $answers = array_merge(
                $answers,
                $this->searchFromKnowledgeGraph($entityNames, $questionType)
            );
        }
        
        // 全文检索
        $answers = array_merge(
            $answers,
            $this->searchFromElasticsearch($question)
        );
        
        // 4. 答案排序
        usort($answers, function($a, $b) {
            return $b['score']  $a['score'];
        });
        
        return array_slice($answers, 0, 3);
    }
    
    // 知识图谱检索
    private function searchFromKnowledgeGraph($entities, $questionType)
    {
        $conceptModel = new Concept();
        $relationModel = new Relation();
        $results = [];
        
        foreach ($entities as $entity) {
            // 查找概念定义
            if ($questionType === 'definition') {
                $nodes = $conceptModel->findSimilar($entity);
                foreach ($nodes as $node) {
                    $results[] = [
                        'type' => 'definition',
                        'content' => $node['desc'],
                        'source' => $node['name'],
                        'score' => $this->calculateScore($entity, $node['name'])
                    ];
                }
            }
            
            // 查找关系路径
            if ($questionType === 'relation' && count($entities) >= 2) {
                $paths = $relationModel->findRelations($entities[0], $entities[1]);
                foreach ($paths as $path) {
                    $results[] = [
                        'type' => 'relation',
                        'content' => $this->formatPath($path),
                        'source' => implode('->', $entities),
                        'score' => 0.8
                    ];
                }
            }
        }
        
        return $results;
    }
    
    // Elasticsearch检索
    private function searchFromElasticsearch($question)
    {
        $results = [];
        $response = $this->esClient->search([
            'index' => 'qa_index',
            'body' => [
                'query' => [
                    'multi_match' => [
                        'query' => $question,
                        'fields' => ['question^3', 'answer']
                    ]
                ]
            ]
        ]);
        
        foreach ($response['hits']['hits'] as $hit) {
            $results[] = [
                'type' => 'document',
                'content' => $hit['_source']['answer'],
                'source' => $hit['_source']['title'],
                'score' => $hit['_score'] / 10 // 归一化
            ];
        }
        
        return $results;
    }
}

2. 对话上下文管理

// app/service/DialogService.php
namespace appservice;

use thinkfacadeCache;

class DialogService
{
    private $expire = 1800; // 30分钟
    
    // 创建对话会话
    public function createSession($userId)
    {
        $sessionId = md5($userId . microtime());
        Cache::set("dialog:$sessionId", [
            'user_id' => $userId,
            'context' => [],
            'created_at' => time()
        ], $this->expire);
        
        return $sessionId;
    }
    
    // 更新对话上下文
    public function updateContext($sessionId, $question, $answer)
    {
        $data = Cache::get("dialog:$sessionId");
        if (!$data) return false;
        
        $data['context'][] = [
            'question' => $question,
            'answer' => $answer,
            'time' => time()
        ];
        
        // 保留最近5轮对话
        if (count($data['context']) > 5) {
            array_shift($data['context']);
        }
        
        Cache::set("dialog:$sessionId", $data, $this->expire);
        return true;
    }
    
    // 获取上下文摘要
    public function getContextSummary($sessionId)
    {
        $data = Cache::get("dialog:$sessionId");
        if (!$data || empty($data['context'])) return '';
        
        $summary = "之前的对话内容:n";
        foreach ($data['context'] as $item) {
            $summary .= "问:{$item['question']}n";
            $summary .= "答:{$item['answer']}nn";
        }
        
        return $summary;
    }
}

六、性能优化策略

1. 缓存优化方案

// app/service/KgService.php
namespace appservice;

use thinkfacadeCache;

class KgService
{
    private $cachePrefix = 'kg:';
    private $expire = 86400; // 24小时
    
    // 带缓存的实体查询
    public function getConceptWithCache($name)
    {
        $cacheKey = $this->cachePrefix . 'concept:' . md5($name);
        
        // 先查本地缓存
        if (Cache::has($cacheKey)) {
            return Cache::get($cacheKey);
        }
        
        // 查询数据库
        $concept = (new Concept())->findSimilar($name);
        if ($concept) {
            Cache::set($cacheKey, $concept, $this->expire);
        }
        
        return $concept;
    }
    
    // 批量预热缓存
    public function warmUpCache($conceptNames)
    {
        $conceptModel = new Concept();
        foreach ($conceptNames as $name) {
            $cacheKey = $this->cachePrefix . 'concept:' . md5($name);
            if (!Cache::has($cacheKey)) {
                $concept = $conceptModel->findSimilar($name);
                if ($concept) {
                    Cache::set($cacheKey, $concept, $this->expire);
                }
            }
        }
    }
    
    // 清除相关缓存
    public function clearCache($conceptName)
    {
        $cacheKey = $this->cachePrefix . 'concept:' . md5($conceptName);
        Cache::delete($cacheKey);
    }
}

2. Swoole协程优化

// 并行查询优化
public function parallelSearch($question)
{
    $segments = $this->nlpService->segment($question);
    $entities = array_column(array_filter($segments, 
        fn($s) => $s['is_entity']), 'word');
    
    // 使用协程并行查询
    $results = SwooleCoroutinebatch([
        'kg' => function() use ($entities) {
            return $this->searchFromKnowledgeGraph($entities);
        },
        'es' => function() use ($question) {
            return $this->searchFromElasticsearch($question);
        },
        'faq' => function() use ($question) {
            return $this->searchFromFaq($question);
        }
    ]);
    
    // 合并结果并排序
    $answers = array_merge(
        $results['kg'],
        $results['es'],
        $results['faq']
    );
    
    usort($answers, fn($a, $b) => $b['score']  $a['score']);
    
    return array_slice($answers, 0, 3);
}

// 协程HTTP客户端
public function queryRemoteNlp($text)
{
    return SwooleCoroutineHttppost('http://nlp-api.com/process', [
        'headers' => ['Content-Type' => 'application/json'],
        'body' => json_encode(['text' => $text])
    ]);
}

七、系统部署方案

1. Docker生产环境部署

# docker-compose.yml
version: '3.8'

services:
  app:
    build:
      context: .
      dockerfile: Dockerfile
    image: tp-qa-system
    container_name: qa-app
    restart: unless-stopped
    ports:
      - "9501:9501"
    depends_on:
      - neo4j
      - elasticsearch
      - redis
  
  neo4j:
    image: neo4j:4.4
    container_name: qa-neo4j
    ports:
      - "7474:7474"
      - "7687:7687"
    volumes:
      - neo4j_data:/data
    environment:
      NEO4J_AUTH: neo4j/password
  
  elasticsearch:
    image: elasticsearch:8.5.0
    container_name: qa-es
    environment:
      - discovery.type=single-node
      - xpack.security.enabled=false
    ports:
      - "9200:9200"
    volumes:
      - es_data:/usr/share/elasticsearch/data

volumes:
  neo4j_data:
  es_data:

八、总结与扩展

本教程构建了一个智能问答系统:

  1. 实现了知识图谱构建
  2. 开发了自然语言处理模块
  3. 设计了多策略问答引擎
  4. 优化了系统性能
  5. 配置了生产环境

扩展方向:

  • 深度学习模型集成
  • 多语言支持
  • 语音交互接口
  • 知识自动抽取

完整项目代码已开源:https://github.com/example/tp-qa-system

ThinkPHP6知识图谱问答系统开发 | 自然语言处理与智能搜索实战
收藏 (0) 打赏

感谢您的支持,我会继续努力的!

打开微信/支付宝扫一扫,即可进行扫码打赏哦,分享从这里开始,精彩与您同在
点赞 (0)

淘吗网 thinkphp ThinkPHP6知识图谱问答系统开发 | 自然语言处理与智能搜索实战 https://www.taomawang.com/server/thinkphp/826.html

常见问题

相关文章

发表评论
暂无评论
官方客服团队

为您解决烦忧 - 24小时在线 专业服务