Files
sad_test01/aicodes/module3_voice_assistant/voice_assistant_app.py

333 lines
11 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
CardioAI - 语音助手模块
基于Deepseek和CosyVoice的心血管健康问答语音助手
"""
import os
import base64
from flask import Flask, request, jsonify, render_template
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import dashscope
from dashscope.audio.tts_v2 import SpeechSynthesizer, AudioFormat, ResultCallback
import json
import traceback
# 初始化Flask应用
app = Flask(__name__, template_folder='templates')
# 环境变量路径 - 从ENV_PATH环境变量读取默认为项目根目录下的.env文件
ENV_PATH = os.getenv('ENV_PATH', '/Users/anthony/PycharmProjects/ sad_test01/.env')
def load_environment_variables():
"""加载环境变量"""
try:
if os.path.exists(ENV_PATH):
print(f"📋 从 {ENV_PATH} 加载环境变量")
load_dotenv(dotenv_path=ENV_PATH)
else:
print(f"⚠️ 环境变量文件不存在: {ENV_PATH},尝试从默认位置加载")
load_dotenv() # 尝试从默认位置加载
# 检查必要的环境变量
required_vars = ['DEEPSEEK_API_KEY1', 'DASHSCOPE_API_KEY']
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
print(f"❌ 缺少必要的环境变量: {missing_vars}")
print("⚠️ 请在环境变量文件中设置以下变量:")
print(" - DEEPSEEK_API_KEY1: DeepSeek API密钥")
print(" - DASHSCOPE_API_KEY: DashScope (阿里云) API密钥")
print(" - base_url1: DeepSeek API基础URL (可选,默认: https://api.deepseek.com/v1)")
return False
else:
print("✅ 环境变量加载成功")
print(f" DeepSeek API密钥: {'已设置' if os.getenv('DEEPSEEK_API_KEY1') else '未设置'}")
print(f" DashScope API密钥: {'已设置' if os.getenv('DASHSCOPE_API_KEY') else '未设置'}")
print(f" DeepSeek基础URL: {os.getenv('base_url1', '默认: https://api.deepseek.com/v1')}")
return True
except Exception as e:
print(f"❌ 加载环境变量时出错: {e}")
traceback.print_exc()
return False
def initialize_llm():
"""初始化DeepSeek LLM"""
try:
# 设置DeepSeek API配置 (使用与llm_streaming.py一致的变量名)
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY1')
deepseek_base_url = os.getenv('base_url1', 'https://api.deepseek.com/v1')
if not deepseek_api_key:
raise ValueError("DEEPSEEK_API_KEY1环境变量未设置")
# 初始化ChatOpenAI实例兼容OpenAI接口
llm = ChatOpenAI(
base_url=deepseek_base_url,
api_key=deepseek_api_key,
model="deepseek-chat",
temperature=0.7,
max_tokens=1000
)
print("✅ DeepSeek LLM初始化成功")
return llm
except Exception as e:
print(f"❌ 初始化DeepSeek LLM时出错: {e}")
traceback.print_exc()
return None
def initialize_tts():
"""初始化语音合成"""
try:
# 设置DashScope API密钥
dashscope_api_key = os.getenv('DASHSCOPE_API_KEY')
if not dashscope_api_key:
raise ValueError("DASHSCOPE_API_KEY环境变量未设置")
dashscope.api_key = dashscope_api_key
print("✅ CosyVoice语音合成初始化成功")
except Exception as e:
print(f"❌ 初始化语音合成时出错: {e}")
traceback.print_exc()
def get_config_status():
"""获取配置状态"""
config_status = {
'deepseek': {
'api_key_set': bool(os.getenv('DEEPSEEK_API_KEY1')),
'base_url_set': bool(os.getenv('base_url1')),
'status': 'configured' if os.getenv('DEEPSEEK_API_KEY1') else 'missing_api_key'
},
'dashscope': {
'api_key_set': bool(os.getenv('DASHSCOPE_API_KEY')),
'status': 'configured' if os.getenv('DASHSCOPE_API_KEY') else 'missing_api_key'
},
'env_file_exists': os.path.exists(ENV_PATH)
}
return config_status
def get_system_prompt():
"""获取系统提示词"""
return """你是一名专业的心血管健康顾问,拥有丰富的医学知识和临床经验。你的任务是:
1. **专业准确**基于最新的医学研究和临床指南提供准确信息
2. **通俗易懂**用通俗易懂的语言解释医学术语和概念
3. **个性化建议**根据用户的具体情况提供个性化建议
4. **预防为主**强调心血管疾病的预防和早期干预
5. **安全提醒**明确指出哪些情况需要立即就医
请保持回答的专业性准确性和实用性同时要富有同理心和耐心"""
def synthesize_speech(text):
"""将文本合成为语音并返回base64编码的音频"""
try:
if not text or len(text.strip()) == 0:
raise ValueError("文本内容为空")
print(f"🔊 开始语音合成,文本长度: {len(text)} 字符")
# 创建语音合成器实例
# 使用cosyvoice-v2模型longxiaochun_v2音色MP3格式
synthesizer = SpeechSynthesizer(
model="cosyvoice-v2",
voice="longxiaochun_v2",
format=AudioFormat.MP3_22050HZ_MONO_256KBPS,
speech_rate=1.0,
pitch_rate=1.0,
volume=50
)
# 同步调用语音合成
# 注意:文本长度可能有限制,如果太长需要分段处理
max_text_length = 2000 # CosyVoice单次调用的文本长度限制
if len(text) > max_text_length:
print(f"⚠️ 文本长度超过{max_text_length}字符,将进行分段处理")
# 简单分段:按句号、问号、感叹号分段
segments = []
current_segment = ""
for char in text:
current_segment += char
if char in ['', '', '', '.', '!', '?'] and len(current_segment) > 100:
segments.append(current_segment)
current_segment = ""
if current_segment:
segments.append(current_segment)
# 合并音频数据
audio_data = b""
for i, segment in enumerate(segments):
print(f" 合成第 {i+1}/{len(segments)} 段,长度: {len(segment)} 字符")
segment_audio = synthesizer.call(segment.strip())
audio_data += segment_audio
else:
# 直接合成
audio_data = synthesizer.call(text.strip())
print(f"✅ 语音合成完成,音频大小: {len(audio_data)} 字节")
# 将音频数据编码为base64
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
return audio_base64
except Exception as e:
print(f"❌ 语音合成失败: {e}")
traceback.print_exc()
return None
# 全局变量
llm = None
@app.route('/')
def home():
"""主页面 - 语音助手界面"""
return render_template('voice_index.html')
@app.route('/api/health', methods=['GET'])
def health_check():
"""健康检查端点"""
config_status = get_config_status()
# 检查整体健康状态
llm_ready = llm is not None
tts_ready = dashscope.api_key is not None
overall_healthy = llm_ready and tts_ready
return jsonify({
'status': 'healthy' if overall_healthy else 'degraded',
'service': 'CardioAI Voice Assistant',
'llm_initialized': llm_ready,
'dashscope_initialized': tts_ready,
'config_status': config_status,
'missing_config': {
'deepseek': not config_status['deepseek']['api_key_set'],
'dashscope': not config_status['dashscope']['api_key_set']
},
'setup_required': not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'],
'setup_instructions': '请配置.env文件中的API密钥' if not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'] else '配置完成'
})
@app.route('/api/ask', methods=['POST'])
def ask_question():
"""问答端点 - 处理用户问题并返回文本和语音回答"""
global llm
try:
# 获取用户问题
if request.is_json:
data = request.get_json()
question = data.get('question', '').strip()
else:
question = request.form.get('question', '').strip()
if not question:
return jsonify({
'status': 'error',
'message': '请提供问题内容'
}), 400
print(f"🤔 用户提问: {question[:100]}...")
# 确保LLM已初始化
if llm is None:
print("⚠️ LLM未初始化尝试重新初始化")
llm = initialize_llm()
if llm is None:
return jsonify({
'status': 'error',
'message': '语言模型未初始化,请检查配置'
}), 503
# 构建完整的消息
system_prompt = get_system_prompt()
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": question}
]
# 调用DeepSeek API获取回答
print("🧠 正在生成回答...")
response = llm.invoke(messages)
text_answer = response.content if hasattr(response, 'content') else str(response)
print(f"✅ 回答生成完成,长度: {len(text_answer)} 字符")
# 语音合成
audio_base64 = synthesize_speech(text_answer)
if audio_base64 is None:
print("⚠️ 语音合成失败,仅返回文本回答")
return jsonify({
'status': 'success',
'text_answer': text_answer,
'audio_base64': None,
'message': '语音合成失败,仅返回文本回答'
})
# 返回结果
return jsonify({
'status': 'success',
'text_answer': text_answer,
'audio_base64': audio_base64,
'audio_format': 'mp3',
'audio_sample_rate': '22050Hz'
})
except Exception as e:
print(f"❌ 处理问题时出错: {e}")
traceback.print_exc()
return jsonify({
'status': 'error',
'message': f'处理问题时出错: {str(e)}'
}), 500
def init_app():
"""初始化应用"""
print("=" * 60)
print("🎤 CardioAI - 心血管健康语音助手")
print("=" * 60)
# 加载环境变量
if not load_environment_variables():
print("⚠️ 环境变量加载失败,某些功能可能无法使用")
# 初始化LLM
global llm
llm = initialize_llm()
# 初始化语音合成
initialize_tts()
print("\n📡 API端点:")
print(" GET / - 语音助手界面")
print(" GET /api/health - 健康检查")
print(" POST /api/ask - 提问并获取语音回答")
print(f"\n🧠 LLM状态: {'已初始化' if llm is not None else '未初始化'}")
print(f"🔊 语音合成: {'已初始化' if dashscope.api_key else '未初始化'}")
if __name__ == '__main__':
# 初始化应用
init_app()
# 运行Flask应用
print(f"\n🌍 启动服务器: http://127.0.0.1:5002")
print(" 按 Ctrl+C 停止\n")
app.run(
host='0.0.0.0',
port=5002,
debug=True,
threaded=True
)
else:
# 用于WSGI部署
init_app()