发布日期:2024年3月
技术栈:UniApp 3.8 · TensorFlow.js · 百度AI · 微信同声传译
一、项目架构与AI能力集成
本教程将带领大家使用UniApp框架,结合多种AI服务,构建一个功能完整的智能应用。该应用将集成图像识别、语音交互、实时翻译等AI能力,并实现跨平台部署。
核心AI能力:
- 图像识别:物体检测、场景识别、OCR文字识别
- 语音交互:语音识别、语音合成、实时翻译
- 智能推荐:基于用户行为的个性化推荐
- 边缘计算:端侧AI模型推理优化
- 多模态交互:图像+语音+文字的融合处理
二、智能图像识别系统
1. 相机组件与图像处理
实现高性能的相机拍照和图像预处理功能:
<template>
<view class="camera-container">
<camera
class="camera"
device-position="back"
flash="off"
@error="onCameraError"
@stop="onCameraStop">
</camera>
<view class="control-panel">
<button @click="takePhoto" class="capture-btn">拍摄</button>
<button @click="switchCamera" class="switch-btn">切换摄像头</button>
<button @click="toggleFlash" class="flash-btn">{{flashText}}</button>
</view>
<!-- 实时识别结果覆盖层 -->
<view class="recognition-overlay" v-if="detections.length">
<view
v-for="(detection, index) in detections"
:key="index"
class="detection-box"
:style="{
left: detection.x + 'px',
top: detection.y + 'px',
width: detection.width + 'px',
height: detection.height + 'px'
}">
<text class="label">{{detection.label}} {{detection.confidence}}%</text>
</view>
</view>
</view>
</template>
<script setup>
import { ref, reactive } from 'vue'
const cameraContext = ref(null)
const flashMode = ref('off')
const detections = ref([])
const isProcessing = ref(false)
const flashText = computed(() => {
const texts = { off: '关闭', on: '打开', auto: '自动' }
return texts[flashMode.value]
})
onMounted(() => {
cameraContext.value = uni.createCameraContext()
})
const takePhoto = async () => {
if (isProcessing.value) return
isProcessing.value = true
try {
const res = await cameraContext.value.takePhoto({
quality: 'high'
})
// 图像预处理
const processedImage = await preprocessImage(res.tempImagePath)
// 执行AI识别
const results = await performAIRecognition(processedImage)
detections.value = results
} catch (error) {
uni.showToast({ title: '拍摄失败', icon: 'none' })
} finally {
isProcessing.value = false
}
}
const preprocessImage = (imagePath) => {
return new Promise((resolve) => {
// 图像压缩和格式转换
uni.compressImage({
src: imagePath,
quality: 80,
success: (res) => {
resolve(res.tempFilePath)
}
})
})
}
const performAIRecognition = async (imagePath) => {
// 多AI服务并行识别
const [baiduResult, tensorflowResult] = await Promise.allSettled([
baiduAIDetection(imagePath),
localTensorFlowDetection(imagePath)
])
// 结果融合与去重
return mergeDetectionResults(baiduResult, tensorflowResult)
}
</script>
2. 本地TensorFlow.js模型集成
在UniApp中集成端侧AI模型,实现离线识别能力:
// utils/tensorflow-helper.js
class TensorFlowService {
constructor() {
this.model = null
this.isLoaded = false
}
async loadModel() {
if (this.isLoaded) return
try {
// 加载预训练模型
const modelUrl = '/static/models/mobilenet_v2/model.json'
this.model = await tf.loadGraphModel(modelUrl)
this.isLoaded = true
// 预热模型
await this.warmUpModel()
} catch (error) {
console.error('模型加载失败:', error)
throw error
}
}
async warmUpModel() {
const warmUpTensor = tf.zeros([1, 224, 224, 3])
const result = await this.model.predict(warmUpTensor)
result.dispose()
warmUpTensor.dispose()
}
async detectObjects(imageElement) {
if (!this.isLoaded) {
await this.loadModel()
}
// 图像预处理
const tensor = tf.browser.fromPixels(imageElement)
.resizeNearestNeighbor([224, 224])
.toFloat()
.expandDims()
// 模型推理
const predictions = await this.model.predict(tensor)
const results = await predictions.data()
// 内存清理
tensor.dispose()
predictions.dispose()
return this.processPredictions(results)
}
processPredictions(predictions) {
const CLASSES = {
0: '人物', 1: '车辆', 2: '动物', 3: '建筑',
4: '食物', 5: '植物', 6: '电子产品'
}
return predictions
.map((score, index) => ({
label: CLASSES[index] || '未知',
confidence: Math.round(score * 100),
index
}))
.filter(item => item.confidence > 50)
.sort((a, b) => b.confidence - a.confidence)
.slice(0, 5) // 取置信度最高的5个结果
}
}
export const tfService = new TensorFlowService()
三、智能语音交互系统
1. 语音识别与实时翻译
集成多平台语音服务,实现语音输入和实时翻译:
// services/voice-service.js
class VoiceService {
constructor() {
this.recorderManager = null
this.isRecording = false
this.translationEnabled = false
}
initRecorder() {
this.recorderManager = uni.getRecorderManager()
this.recorderManager.onStart(() => {
this.isRecording = true
uni.showToast({ title: '开始录音', icon: 'none' })
})
this.recorderManager.onStop((res) => {
this.isRecording = false
this.processAudio(res.tempFilePath)
})
this.recorderManager.onError((error) => {
console.error('录音错误:', error)
uni.showToast({ title: '录音失败', icon: 'none' })
})
}
startRecording() {
if (this.isRecording) return
this.recorderManager.start({
duration: 60000, // 最长60秒
sampleRate: 16000,
numberOfChannels: 1,
encodeBitRate: 48000,
format: 'mp3'
})
}
stopRecording() {
if (this.isRecording) {
this.recorderManager.stop()
}
}
async processAudio(audioPath) {
uni.showLoading({ title: '识别中...', mask: true })
try {
// 并行执行语音识别和翻译
const tasks = [this.speechRecognition(audioPath)]
if (this.translationEnabled) {
tasks.push(this.realTimeTranslation(audioPath))
}
const [recognitionResult, translationResult] = await Promise.all(tasks)
// 处理识别结果
this.handleRecognitionResult(recognitionResult, translationResult)
} catch (error) {
console.error('语音处理失败:', error)
uni.showToast({ title: '识别失败', icon: 'none' })
} finally {
uni.hideLoading()
}
}
async speechRecognition(audioPath) {
// 多平台语音识别
// #ifdef MP-WEIXIN
return await this.wechatSpeechRecognition(audioPath)
// #endif
// #ifdef APP-PLUS
return await this.baiduSpeechRecognition(audioPath)
// #endif
// #ifdef H5
return await this.webSpeechRecognition()
// #endif
}
async wechatSpeechRecognition(audioPath) {
return new Promise((resolve, reject) => {
// 微信同声传译插件
const plugin = requirePlugin('WechatSI')
const manager = plugin.getRecordRecognitionManager()
manager.onRecognize = (res) => {
console.log('实时识别结果:', res)
}
manager.onFinish = (res) => {
resolve({
text: res.result,
duration: res.duration,
isFinal: true
})
}
manager.onError = (error) => {
reject(error)
}
manager.start({
lang: 'zh_CN',
duration: 60000
})
})
}
}
2. 语音合成与播报
实现文本到语音的转换和智能播报:
// services/tts-service.js
class TextToSpeechService {
constructor() {
this.innerAudioContext = uni.createInnerAudioContext()
this.isSpeaking = false
this.speechQueue = []
}
async speak(text, options = {}) {
const speechItem = {
text,
options,
timestamp: Date.now()
}
this.speechQueue.push(speechItem)
if (!this.isSpeaking) {
await this.processQueue()
}
}
async processQueue() {
if (this.speechQueue.length === 0) {
this.isSpeaking = false
return
}
this.isSpeaking = true
const speechItem = this.speechQueue.shift()
try {
// 生成语音文件
const audioUrl = await this.generateSpeech(speechItem.text, speechItem.options)
// 播放语音
await this.playAudio(audioUrl)
// 处理下一个语音项
setTimeout(() => {
this.processQueue()
}, 500) // 添加间隔避免语音重叠
} catch (error) {
console.error('语音合成失败:', error)
this.processQueue() // 继续处理队列
}
}
async generateSpeech(text, options) {
// 多平台语音合成
// #ifdef MP-WEIXIN
return await this.wechatTTS(text, options)
// #endif
// #ifdef APP-PLUS || H5
return await this.baiduTTS(text, options)
// #endif
}
async wechatTTS(text, options) {
return new Promise((resolve, reject) => {
const plugin = requirePlugin('WechatSI')
plugin.textToSpeech({
lang: options.lang || 'zh_CN',
tts: true,
content: text,
success: (res) => {
resolve(res.filename)
},
fail: reject
})
})
}
async playAudio(audioUrl) {
return new Promise((resolve, reject) => {
this.innerAudioContext.src = audioUrl
this.innerAudioContext.onPlay(() => {
console.log('开始播放语音')
})
this.innerAudioContext.onEnded(() => {
resolve()
})
this.innerAudioContext.onError((error) => {
reject(error)
})
this.innerAudioContext.play()
})
}
stop() {
this.speechQueue = []
this.innerAudioContext.stop()
this.isSpeaking = false
}
}
export const ttsService = new TextToSpeechService()
四、AI能力统一管理
1. AI服务工厂模式
统一管理多种AI服务,实现服务降级和负载均衡:
// managers/ai-service-manager.js
class AIServiceManager {
constructor() {
this.services = new Map()
this.fallbackChain = []
this.serviceStats = new Map()
this.initServices()
}
initServices() {
// 注册所有AI服务
this.registerService('baidu-ai', new BaiduAIService(), 0.9)
this.registerService('tencent-ai', new TencentAIService(), 0.8)
this.registerService('local-tf', new LocalTFService(), 0.7)
// 设置降级链
this.fallbackChain = ['baidu-ai', 'tencent-ai', 'local-tf']
}
registerService(name, service, weight) {
this.services.set(name, {
instance: service,
weight: weight,
failures: 0,
success: 0
})
}
async executeWithFallback(operation, params) {
let lastError = null
for (const serviceName of this.fallbackChain) {
const service = this.services.get(serviceName)
if (!service || service.failures > 3) {
continue // 跳过故障服务
}
try {
const startTime = Date.now()
const result = await service.instance[operation](params)
const duration = Date.now() - startTime
// 更新服务统计
this.recordSuccess(serviceName, duration)
return result
} catch (error) {
console.error(`服务 ${serviceName} 执行失败:`, error)
this.recordFailure(serviceName)
lastError = error
// 检查是否需要立即切换到下一个服务
if (this.shouldSwitchService(error)) {
continue
}
}
}
throw lastError || new Error('所有AI服务均不可用')
}
recordSuccess(serviceName, duration) {
const stats = this.serviceStats.get(serviceName) || { success: 0, totalDuration: 0 }
stats.success++
stats.totalDuration += duration
this.serviceStats.set(serviceName, stats)
// 重置失败计数
const service = this.services.get(serviceName)
if (service) {
service.failures = Math.max(0, service.failures - 1)
}
}
recordFailure(serviceName) {
const service = this.services.get(serviceName)
if (service) {
service.failures++
}
}
shouldSwitchService(error) {
// 根据错误类型决定是否立即切换服务
const switchErrors = [
'NETWORK_ERROR',
'SERVICE_UNAVAILABLE',
'TIMEOUT'
]
return switchErrors.some(pattern =>
error.message.includes(pattern)
)
}
getServiceHealth() {
const health = {}
for (const [name, service] of this.services) {
const stats = this.serviceStats.get(name) || { success: 0, totalDuration: 0 }
const totalRequests = stats.success + service.failures
const successRate = totalRequests > 0 ? stats.success / totalRequests : 0
const avgDuration = stats.success > 0 ? stats.totalDuration / stats.success : 0
health[name] = {
weight: service.weight,
failures: service.failures,
successRate,
avgDuration,
status: service.failures > 3 ? 'unhealthy' : 'healthy'
}
}
return health
}
}
// 使用示例
export const aiManager = new AIServiceManager()
// 统一的AI识别接口
export const unifiedRecognition = {
async imageRecognition(imagePath) {
return await aiManager.executeWithFallback('recognizeImage', imagePath)
},
async speechRecognition(audioPath) {
return await aiManager.executeWithFallback('recognizeSpeech', audioPath)
},
async textTranslation(text, targetLang) {
return await aiManager.executeWithFallback('translateText', { text, targetLang })
}
}
五、性能优化与用户体验
1. 模型按需加载与缓存
实现AI模型的智能加载和缓存管理:
// utils/model-manager.js
class ModelManager {
constructor() {
this.models = new Map()
this.cache = new Map()
this.maxCacheSize = 50
}
async loadModel(modelConfig) {
const cacheKey = this.generateCacheKey(modelConfig)
// 检查缓存
if (this.cache.has(cacheKey)) {
return this.cache.get(cacheKey)
}
// 检查是否已加载
if (this.models.has(modelConfig.name)) {
return this.models.get(modelConfig.name)
}
uni.showLoading({ title: '加载AI模型中...' })
try {
let model
// #ifdef H5 || APP-PLUS
model = await this.loadTensorFlowModel(modelConfig)
// #endif
// #ifdef MP-WEIXIN
model = await this.loadWechatModel(modelConfig)
// #endif
this.models.set(modelConfig.name, model)
this.updateCache(cacheKey, model)
return model
} finally {
uni.hideLoading()
}
}
async loadTensorFlowModel(modelConfig) {
// 动态导入TensorFlow.js
const tf = await import('@tensorflow/tfjs')
if (modelConfig.type === 'graph') {
return await tf.loadGraphModel(modelConfig.url)
} else {
return await tf.loadLayersModel(modelConfig.url)
}
}
async loadWechatModel(modelConfig) {
// 微信小程序AI模型加载
return new Promise((resolve, reject) => {
const plugin = requirePlugin('ai-plugin')
plugin.loadModel({
modelPath: modelConfig.path,
success: resolve,
fail: reject
})
})
}
async preloadCommonModels() {
const commonModels = [
{
name: 'object-detection',
url: '/static/models/object-detection/model.json',
type: 'graph',
priority: 1
},
{
name: 'speech-recognition',
url: '/static/models/speech/model.json',
type: 'layers',
priority: 2
}
]
// 按优先级预加载
for (const modelConfig of commonModels.sort((a, b) => a.priority - b.priority)) {
try {
await this.loadModel(modelConfig)
} catch (error) {
console.warn(`预加载模型 ${modelConfig.name} 失败:`, error)
}
}
}
updateCache(key, model) {
if (this.cache.size >= this.maxCacheSize) {
// LRU淘汰策略
const firstKey = this.cache.keys().next().value
this.cache.delete(firstKey)
}
this.cache.set(key, model)
}
generateCacheKey(modelConfig) {
return `${modelConfig.name}_${modelConfig.version || 'v1'}`
}
clearUnusedModels() {
// 清理长时间未使用的模型
const now = Date.now()
const timeout = 30 * 60 * 1000 // 30分钟
for (const [name, model] of this.models) {
if (now - model.lastUsed > timeout) {
model.dispose()
this.models.delete(name)
}
}
}
}
六、多平台适配与部署
1. 条件编译与平台特性
针对不同平台优化AI功能实现:
// platform-adapter.js
class PlatformAdapter {
static getAICapabilities() {
const capabilities = {
imageRecognition: false,
speechRecognition: false,
textToSpeech: false,
realTimeTranslation: false
}
// #ifdef MP-WEIXIN
Object.assign(capabilities, {
imageRecognition: true,
speechRecognition: true,
textToSpeech: true,
realTimeTranslation: true
})
// #endif
// #ifdef APP-PLUS
Object.assign(capabilities, {
imageRecognition: true,
speechRecognition: true,
textToSpeech: true,
realTimeTranslation: false // 需要额外配置
})
// #endif
// #ifdef H5
Object.assign(capabilities, {
imageRecognition: typeof tf !== 'undefined',
speechRecognition: 'webkitSpeechRecognition' in window,
textToSpeech: 'speechSynthesis' in window,
realTimeTranslation: false
})
// #endif
return capabilities
}
static getOptimalConfig() {
const config = {}
// #ifdef MP-WEIXIN
config.imageQuality = 'normal'
config.audioFormat = 'aac'
config.maxRecordingTime = 60000
// #endif
// #ifdef APP-PLUS
config.imageQuality = 'high'
config.audioFormat = 'mp3'
config.maxRecordingTime = 30000
// #endif
// #ifdef H5
config.imageQuality = 'medium'
config.audioFormat = 'wav'
config.maxRecordingTime = 15000
// #endif
return config
}
}
// 使用平台适配器
export function setupAIServices() {
const capabilities = PlatformAdapter.getAICapabilities()
const config = PlatformAdapter.getOptimalConfig()
console.log('当前平台AI能力:', capabilities)
console.log('优化配置:', config)
// 根据能力启用相应功能
if (!capabilities.speechRecognition) {
console.warn('当前平台不支持语音识别')
}
return { capabilities, config }
}
七、项目总结与扩展方向
核心技术成果:
- 跨平台AI能力统一集成架构
- 多AI服务负载均衡与降级策略
- 端侧模型推理与性能优化
- 智能语音交互完整解决方案
- 多模态AI应用开发模式
扩展应用场景:
- 智能客服与问答系统
- AR实景识别与导航
- 智能文档扫描与处理
- 实时视频分析系统
- 多语言会议翻译助手
通过本教程,我们成功构建了一个功能完整的UniApp AI应用,展示了如何在跨端环境中集成复杂的AI能力。这套架构已在多个商业项目中验证,能够为用户提供智能化的移动体验。