333 lines
11 KiB
Python
333 lines
11 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
CardioAI - 语音助手模块
|
|||
|
|
基于Deepseek和CosyVoice的心血管健康问答语音助手
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import base64
|
|||
|
|
from flask import Flask, request, jsonify, render_template
|
|||
|
|
from langchain_openai import ChatOpenAI
|
|||
|
|
from dotenv import load_dotenv
|
|||
|
|
import dashscope
|
|||
|
|
from dashscope.audio.tts_v2 import SpeechSynthesizer, AudioFormat, ResultCallback
|
|||
|
|
import json
|
|||
|
|
import traceback
|
|||
|
|
|
|||
|
|
# 初始化Flask应用
|
|||
|
|
app = Flask(__name__, template_folder='templates')
|
|||
|
|
|
|||
|
|
# 环境变量路径 - 从ENV_PATH环境变量读取,默认为项目根目录下的.env文件
|
|||
|
|
ENV_PATH = os.getenv('ENV_PATH', '/Users/anthony/PycharmProjects/ sad_test01/.env')
|
|||
|
|
|
|||
|
|
def load_environment_variables():
|
|||
|
|
"""加载环境变量"""
|
|||
|
|
try:
|
|||
|
|
if os.path.exists(ENV_PATH):
|
|||
|
|
print(f"📋 从 {ENV_PATH} 加载环境变量")
|
|||
|
|
load_dotenv(dotenv_path=ENV_PATH)
|
|||
|
|
else:
|
|||
|
|
print(f"⚠️ 环境变量文件不存在: {ENV_PATH},尝试从默认位置加载")
|
|||
|
|
load_dotenv() # 尝试从默认位置加载
|
|||
|
|
|
|||
|
|
# 检查必要的环境变量
|
|||
|
|
required_vars = ['DEEPSEEK_API_KEY1', 'DASHSCOPE_API_KEY']
|
|||
|
|
missing_vars = [var for var in required_vars if not os.getenv(var)]
|
|||
|
|
|
|||
|
|
if missing_vars:
|
|||
|
|
print(f"❌ 缺少必要的环境变量: {missing_vars}")
|
|||
|
|
print("⚠️ 请在环境变量文件中设置以下变量:")
|
|||
|
|
print(" - DEEPSEEK_API_KEY1: DeepSeek API密钥")
|
|||
|
|
print(" - DASHSCOPE_API_KEY: DashScope (阿里云) API密钥")
|
|||
|
|
print(" - base_url1: DeepSeek API基础URL (可选,默认: https://api.deepseek.com/v1)")
|
|||
|
|
return False
|
|||
|
|
else:
|
|||
|
|
print("✅ 环境变量加载成功")
|
|||
|
|
print(f" DeepSeek API密钥: {'已设置' if os.getenv('DEEPSEEK_API_KEY1') else '未设置'}")
|
|||
|
|
print(f" DashScope API密钥: {'已设置' if os.getenv('DASHSCOPE_API_KEY') else '未设置'}")
|
|||
|
|
print(f" DeepSeek基础URL: {os.getenv('base_url1', '默认: https://api.deepseek.com/v1')}")
|
|||
|
|
return True
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 加载环境变量时出错: {e}")
|
|||
|
|
traceback.print_exc()
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def initialize_llm():
|
|||
|
|
"""初始化DeepSeek LLM"""
|
|||
|
|
try:
|
|||
|
|
# 设置DeepSeek API配置 (使用与llm_streaming.py一致的变量名)
|
|||
|
|
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY1')
|
|||
|
|
deepseek_base_url = os.getenv('base_url1', 'https://api.deepseek.com/v1')
|
|||
|
|
|
|||
|
|
if not deepseek_api_key:
|
|||
|
|
raise ValueError("DEEPSEEK_API_KEY1环境变量未设置")
|
|||
|
|
|
|||
|
|
# 初始化ChatOpenAI实例(兼容OpenAI接口)
|
|||
|
|
llm = ChatOpenAI(
|
|||
|
|
base_url=deepseek_base_url,
|
|||
|
|
api_key=deepseek_api_key,
|
|||
|
|
model="deepseek-chat",
|
|||
|
|
temperature=0.7,
|
|||
|
|
max_tokens=1000
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
print("✅ DeepSeek LLM初始化成功")
|
|||
|
|
return llm
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 初始化DeepSeek LLM时出错: {e}")
|
|||
|
|
traceback.print_exc()
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def initialize_tts():
|
|||
|
|
"""初始化语音合成"""
|
|||
|
|
try:
|
|||
|
|
# 设置DashScope API密钥
|
|||
|
|
dashscope_api_key = os.getenv('DASHSCOPE_API_KEY')
|
|||
|
|
|
|||
|
|
if not dashscope_api_key:
|
|||
|
|
raise ValueError("DASHSCOPE_API_KEY环境变量未设置")
|
|||
|
|
|
|||
|
|
dashscope.api_key = dashscope_api_key
|
|||
|
|
print("✅ CosyVoice语音合成初始化成功")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 初始化语音合成时出错: {e}")
|
|||
|
|
traceback.print_exc()
|
|||
|
|
|
|||
|
|
def get_config_status():
|
|||
|
|
"""获取配置状态"""
|
|||
|
|
config_status = {
|
|||
|
|
'deepseek': {
|
|||
|
|
'api_key_set': bool(os.getenv('DEEPSEEK_API_KEY1')),
|
|||
|
|
'base_url_set': bool(os.getenv('base_url1')),
|
|||
|
|
'status': 'configured' if os.getenv('DEEPSEEK_API_KEY1') else 'missing_api_key'
|
|||
|
|
},
|
|||
|
|
'dashscope': {
|
|||
|
|
'api_key_set': bool(os.getenv('DASHSCOPE_API_KEY')),
|
|||
|
|
'status': 'configured' if os.getenv('DASHSCOPE_API_KEY') else 'missing_api_key'
|
|||
|
|
},
|
|||
|
|
'env_file_exists': os.path.exists(ENV_PATH)
|
|||
|
|
}
|
|||
|
|
return config_status
|
|||
|
|
|
|||
|
|
def get_system_prompt():
|
|||
|
|
"""获取系统提示词"""
|
|||
|
|
return """你是一名专业的心血管健康顾问,拥有丰富的医学知识和临床经验。你的任务是:
|
|||
|
|
|
|||
|
|
1. **专业准确**:基于最新的医学研究和临床指南提供准确信息
|
|||
|
|
2. **通俗易懂**:用通俗易懂的语言解释医学术语和概念
|
|||
|
|
3. **个性化建议**:根据用户的具体情况提供个性化建议
|
|||
|
|
4. **预防为主**:强调心血管疾病的预防和早期干预
|
|||
|
|
5. **安全提醒**:明确指出哪些情况需要立即就医
|
|||
|
|
|
|||
|
|
请保持回答的专业性、准确性和实用性,同时要富有同理心和耐心。"""
|
|||
|
|
|
|||
|
|
def synthesize_speech(text):
|
|||
|
|
"""将文本合成为语音并返回base64编码的音频"""
|
|||
|
|
try:
|
|||
|
|
if not text or len(text.strip()) == 0:
|
|||
|
|
raise ValueError("文本内容为空")
|
|||
|
|
|
|||
|
|
print(f"🔊 开始语音合成,文本长度: {len(text)} 字符")
|
|||
|
|
|
|||
|
|
# 创建语音合成器实例
|
|||
|
|
# 使用cosyvoice-v2模型,longxiaochun_v2音色,MP3格式
|
|||
|
|
synthesizer = SpeechSynthesizer(
|
|||
|
|
model="cosyvoice-v2",
|
|||
|
|
voice="longxiaochun_v2",
|
|||
|
|
format=AudioFormat.MP3_22050HZ_MONO_256KBPS,
|
|||
|
|
speech_rate=1.0,
|
|||
|
|
pitch_rate=1.0,
|
|||
|
|
volume=50
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 同步调用语音合成
|
|||
|
|
# 注意:文本长度可能有限制,如果太长需要分段处理
|
|||
|
|
max_text_length = 2000 # CosyVoice单次调用的文本长度限制
|
|||
|
|
if len(text) > max_text_length:
|
|||
|
|
print(f"⚠️ 文本长度超过{max_text_length}字符,将进行分段处理")
|
|||
|
|
# 简单分段:按句号、问号、感叹号分段
|
|||
|
|
segments = []
|
|||
|
|
current_segment = ""
|
|||
|
|
|
|||
|
|
for char in text:
|
|||
|
|
current_segment += char
|
|||
|
|
if char in ['。', '!', '?', '.', '!', '?'] and len(current_segment) > 100:
|
|||
|
|
segments.append(current_segment)
|
|||
|
|
current_segment = ""
|
|||
|
|
|
|||
|
|
if current_segment:
|
|||
|
|
segments.append(current_segment)
|
|||
|
|
|
|||
|
|
# 合并音频数据
|
|||
|
|
audio_data = b""
|
|||
|
|
for i, segment in enumerate(segments):
|
|||
|
|
print(f" 合成第 {i+1}/{len(segments)} 段,长度: {len(segment)} 字符")
|
|||
|
|
segment_audio = synthesizer.call(segment.strip())
|
|||
|
|
audio_data += segment_audio
|
|||
|
|
else:
|
|||
|
|
# 直接合成
|
|||
|
|
audio_data = synthesizer.call(text.strip())
|
|||
|
|
|
|||
|
|
print(f"✅ 语音合成完成,音频大小: {len(audio_data)} 字节")
|
|||
|
|
|
|||
|
|
# 将音频数据编码为base64
|
|||
|
|
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
|
|||
|
|
|
|||
|
|
return audio_base64
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 语音合成失败: {e}")
|
|||
|
|
traceback.print_exc()
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
# 全局变量
|
|||
|
|
llm = None
|
|||
|
|
|
|||
|
|
@app.route('/')
|
|||
|
|
def home():
|
|||
|
|
"""主页面 - 语音助手界面"""
|
|||
|
|
return render_template('voice_index.html')
|
|||
|
|
|
|||
|
|
@app.route('/api/health', methods=['GET'])
|
|||
|
|
def health_check():
|
|||
|
|
"""健康检查端点"""
|
|||
|
|
config_status = get_config_status()
|
|||
|
|
|
|||
|
|
# 检查整体健康状态
|
|||
|
|
llm_ready = llm is not None
|
|||
|
|
tts_ready = dashscope.api_key is not None
|
|||
|
|
overall_healthy = llm_ready and tts_ready
|
|||
|
|
|
|||
|
|
return jsonify({
|
|||
|
|
'status': 'healthy' if overall_healthy else 'degraded',
|
|||
|
|
'service': 'CardioAI Voice Assistant',
|
|||
|
|
'llm_initialized': llm_ready,
|
|||
|
|
'dashscope_initialized': tts_ready,
|
|||
|
|
'config_status': config_status,
|
|||
|
|
'missing_config': {
|
|||
|
|
'deepseek': not config_status['deepseek']['api_key_set'],
|
|||
|
|
'dashscope': not config_status['dashscope']['api_key_set']
|
|||
|
|
},
|
|||
|
|
'setup_required': not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'],
|
|||
|
|
'setup_instructions': '请配置.env文件中的API密钥' if not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'] else '配置完成'
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
@app.route('/api/ask', methods=['POST'])
|
|||
|
|
def ask_question():
|
|||
|
|
"""问答端点 - 处理用户问题并返回文本和语音回答"""
|
|||
|
|
global llm
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 获取用户问题
|
|||
|
|
if request.is_json:
|
|||
|
|
data = request.get_json()
|
|||
|
|
question = data.get('question', '').strip()
|
|||
|
|
else:
|
|||
|
|
question = request.form.get('question', '').strip()
|
|||
|
|
|
|||
|
|
if not question:
|
|||
|
|
return jsonify({
|
|||
|
|
'status': 'error',
|
|||
|
|
'message': '请提供问题内容'
|
|||
|
|
}), 400
|
|||
|
|
|
|||
|
|
print(f"🤔 用户提问: {question[:100]}...")
|
|||
|
|
|
|||
|
|
# 确保LLM已初始化
|
|||
|
|
if llm is None:
|
|||
|
|
print("⚠️ LLM未初始化,尝试重新初始化")
|
|||
|
|
llm = initialize_llm()
|
|||
|
|
if llm is None:
|
|||
|
|
return jsonify({
|
|||
|
|
'status': 'error',
|
|||
|
|
'message': '语言模型未初始化,请检查配置'
|
|||
|
|
}), 503
|
|||
|
|
|
|||
|
|
# 构建完整的消息
|
|||
|
|
system_prompt = get_system_prompt()
|
|||
|
|
messages = [
|
|||
|
|
{"role": "system", "content": system_prompt},
|
|||
|
|
{"role": "user", "content": question}
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 调用DeepSeek API获取回答
|
|||
|
|
print("🧠 正在生成回答...")
|
|||
|
|
response = llm.invoke(messages)
|
|||
|
|
text_answer = response.content if hasattr(response, 'content') else str(response)
|
|||
|
|
|
|||
|
|
print(f"✅ 回答生成完成,长度: {len(text_answer)} 字符")
|
|||
|
|
|
|||
|
|
# 语音合成
|
|||
|
|
audio_base64 = synthesize_speech(text_answer)
|
|||
|
|
|
|||
|
|
if audio_base64 is None:
|
|||
|
|
print("⚠️ 语音合成失败,仅返回文本回答")
|
|||
|
|
return jsonify({
|
|||
|
|
'status': 'success',
|
|||
|
|
'text_answer': text_answer,
|
|||
|
|
'audio_base64': None,
|
|||
|
|
'message': '语音合成失败,仅返回文本回答'
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 返回结果
|
|||
|
|
return jsonify({
|
|||
|
|
'status': 'success',
|
|||
|
|
'text_answer': text_answer,
|
|||
|
|
'audio_base64': audio_base64,
|
|||
|
|
'audio_format': 'mp3',
|
|||
|
|
'audio_sample_rate': '22050Hz'
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 处理问题时出错: {e}")
|
|||
|
|
traceback.print_exc()
|
|||
|
|
return jsonify({
|
|||
|
|
'status': 'error',
|
|||
|
|
'message': f'处理问题时出错: {str(e)}'
|
|||
|
|
}), 500
|
|||
|
|
|
|||
|
|
def init_app():
|
|||
|
|
"""初始化应用"""
|
|||
|
|
print("=" * 60)
|
|||
|
|
print("🎤 CardioAI - 心血管健康语音助手")
|
|||
|
|
print("=" * 60)
|
|||
|
|
|
|||
|
|
# 加载环境变量
|
|||
|
|
if not load_environment_variables():
|
|||
|
|
print("⚠️ 环境变量加载失败,某些功能可能无法使用")
|
|||
|
|
|
|||
|
|
# 初始化LLM
|
|||
|
|
global llm
|
|||
|
|
llm = initialize_llm()
|
|||
|
|
|
|||
|
|
# 初始化语音合成
|
|||
|
|
initialize_tts()
|
|||
|
|
|
|||
|
|
print("\n📡 API端点:")
|
|||
|
|
print(" GET / - 语音助手界面")
|
|||
|
|
print(" GET /api/health - 健康检查")
|
|||
|
|
print(" POST /api/ask - 提问并获取语音回答")
|
|||
|
|
|
|||
|
|
print(f"\n🧠 LLM状态: {'已初始化' if llm is not None else '未初始化'}")
|
|||
|
|
print(f"🔊 语音合成: {'已初始化' if dashscope.api_key else '未初始化'}")
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
# 初始化应用
|
|||
|
|
init_app()
|
|||
|
|
|
|||
|
|
# 运行Flask应用
|
|||
|
|
print(f"\n🌍 启动服务器: http://127.0.0.1:5002")
|
|||
|
|
print(" 按 Ctrl+C 停止\n")
|
|||
|
|
|
|||
|
|
app.run(
|
|||
|
|
host='0.0.0.0',
|
|||
|
|
port=5002,
|
|||
|
|
debug=True,
|
|||
|
|
threaded=True
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
# 用于WSGI部署
|
|||
|
|
init_app()
|