Files
sad_test01/aicodes/module3_voice_assistant/voice_assistant_app.py
anthony-s526 ffc4192ff0 Add CardioAI project with three modules
- Module 1: Dashboard for cardiovascular disease data visualization
- Module 2: Machine learning predictor with Flask API
- Module 3: Voice assistant with DeepSeek and CosyVoice integration
- Add .gitignore for proper file exclusion
- Update requirements and documentation

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 21:26:10 +08:00

333 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
CardioAI - 语音助手模块
基于Deepseek和CosyVoice的心血管健康问答语音助手
"""
import os
import base64
from flask import Flask, request, jsonify, render_template
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import dashscope
from dashscope.audio.tts_v2 import SpeechSynthesizer, AudioFormat, ResultCallback
import json
import traceback
# 初始化Flask应用
app = Flask(__name__, template_folder='templates')
# 环境变量路径 - 从ENV_PATH环境变量读取默认为项目根目录下的.env文件
ENV_PATH = os.getenv('ENV_PATH', '/Users/anthony/PycharmProjects/ sad_test01/.env')
def load_environment_variables():
"""加载环境变量"""
try:
if os.path.exists(ENV_PATH):
print(f"📋 从 {ENV_PATH} 加载环境变量")
load_dotenv(dotenv_path=ENV_PATH)
else:
print(f"⚠️ 环境变量文件不存在: {ENV_PATH},尝试从默认位置加载")
load_dotenv() # 尝试从默认位置加载
# 检查必要的环境变量
required_vars = ['DEEPSEEK_API_KEY1', 'DASHSCOPE_API_KEY']
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
print(f"❌ 缺少必要的环境变量: {missing_vars}")
print("⚠️ 请在环境变量文件中设置以下变量:")
print(" - DEEPSEEK_API_KEY1: DeepSeek API密钥")
print(" - DASHSCOPE_API_KEY: DashScope (阿里云) API密钥")
print(" - base_url1: DeepSeek API基础URL (可选,默认: https://api.deepseek.com/v1)")
return False
else:
print("✅ 环境变量加载成功")
print(f" DeepSeek API密钥: {'已设置' if os.getenv('DEEPSEEK_API_KEY1') else '未设置'}")
print(f" DashScope API密钥: {'已设置' if os.getenv('DASHSCOPE_API_KEY') else '未设置'}")
print(f" DeepSeek基础URL: {os.getenv('base_url1', '默认: https://api.deepseek.com/v1')}")
return True
except Exception as e:
print(f"❌ 加载环境变量时出错: {e}")
traceback.print_exc()
return False
def initialize_llm():
"""初始化DeepSeek LLM"""
try:
# 设置DeepSeek API配置 (使用与llm_streaming.py一致的变量名)
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY1')
deepseek_base_url = os.getenv('base_url1', 'https://api.deepseek.com/v1')
if not deepseek_api_key:
raise ValueError("DEEPSEEK_API_KEY1环境变量未设置")
# 初始化ChatOpenAI实例兼容OpenAI接口
llm = ChatOpenAI(
base_url=deepseek_base_url,
api_key=deepseek_api_key,
model="deepseek-chat",
temperature=0.7,
max_tokens=1000
)
print("✅ DeepSeek LLM初始化成功")
return llm
except Exception as e:
print(f"❌ 初始化DeepSeek LLM时出错: {e}")
traceback.print_exc()
return None
def initialize_tts():
"""初始化语音合成"""
try:
# 设置DashScope API密钥
dashscope_api_key = os.getenv('DASHSCOPE_API_KEY')
if not dashscope_api_key:
raise ValueError("DASHSCOPE_API_KEY环境变量未设置")
dashscope.api_key = dashscope_api_key
print("✅ CosyVoice语音合成初始化成功")
except Exception as e:
print(f"❌ 初始化语音合成时出错: {e}")
traceback.print_exc()
def get_config_status():
"""获取配置状态"""
config_status = {
'deepseek': {
'api_key_set': bool(os.getenv('DEEPSEEK_API_KEY1')),
'base_url_set': bool(os.getenv('base_url1')),
'status': 'configured' if os.getenv('DEEPSEEK_API_KEY1') else 'missing_api_key'
},
'dashscope': {
'api_key_set': bool(os.getenv('DASHSCOPE_API_KEY')),
'status': 'configured' if os.getenv('DASHSCOPE_API_KEY') else 'missing_api_key'
},
'env_file_exists': os.path.exists(ENV_PATH)
}
return config_status
def get_system_prompt():
"""获取系统提示词"""
return """你是一名专业的心血管健康顾问,拥有丰富的医学知识和临床经验。你的任务是:
1. **专业准确**:基于最新的医学研究和临床指南提供准确信息
2. **通俗易懂**:用通俗易懂的语言解释医学术语和概念
3. **个性化建议**:根据用户的具体情况提供个性化建议
4. **预防为主**:强调心血管疾病的预防和早期干预
5. **安全提醒**:明确指出哪些情况需要立即就医
请保持回答的专业性、准确性和实用性,同时要富有同理心和耐心。"""
def synthesize_speech(text):
"""将文本合成为语音并返回base64编码的音频"""
try:
if not text or len(text.strip()) == 0:
raise ValueError("文本内容为空")
print(f"🔊 开始语音合成,文本长度: {len(text)} 字符")
# 创建语音合成器实例
# 使用cosyvoice-v2模型longxiaochun_v2音色MP3格式
synthesizer = SpeechSynthesizer(
model="cosyvoice-v2",
voice="longxiaochun_v2",
format=AudioFormat.MP3_22050HZ_MONO_256KBPS,
speech_rate=1.0,
pitch_rate=1.0,
volume=50
)
# 同步调用语音合成
# 注意:文本长度可能有限制,如果太长需要分段处理
max_text_length = 2000 # CosyVoice单次调用的文本长度限制
if len(text) > max_text_length:
print(f"⚠️ 文本长度超过{max_text_length}字符,将进行分段处理")
# 简单分段:按句号、问号、感叹号分段
segments = []
current_segment = ""
for char in text:
current_segment += char
if char in ['', '', '', '.', '!', '?'] and len(current_segment) > 100:
segments.append(current_segment)
current_segment = ""
if current_segment:
segments.append(current_segment)
# 合并音频数据
audio_data = b""
for i, segment in enumerate(segments):
print(f" 合成第 {i+1}/{len(segments)} 段,长度: {len(segment)} 字符")
segment_audio = synthesizer.call(segment.strip())
audio_data += segment_audio
else:
# 直接合成
audio_data = synthesizer.call(text.strip())
print(f"✅ 语音合成完成,音频大小: {len(audio_data)} 字节")
# 将音频数据编码为base64
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
return audio_base64
except Exception as e:
print(f"❌ 语音合成失败: {e}")
traceback.print_exc()
return None
# 全局变量
llm = None
@app.route('/')
def home():
"""主页面 - 语音助手界面"""
return render_template('voice_index.html')
@app.route('/api/health', methods=['GET'])
def health_check():
"""健康检查端点"""
config_status = get_config_status()
# 检查整体健康状态
llm_ready = llm is not None
tts_ready = dashscope.api_key is not None
overall_healthy = llm_ready and tts_ready
return jsonify({
'status': 'healthy' if overall_healthy else 'degraded',
'service': 'CardioAI Voice Assistant',
'llm_initialized': llm_ready,
'dashscope_initialized': tts_ready,
'config_status': config_status,
'missing_config': {
'deepseek': not config_status['deepseek']['api_key_set'],
'dashscope': not config_status['dashscope']['api_key_set']
},
'setup_required': not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'],
'setup_instructions': '请配置.env文件中的API密钥' if not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'] else '配置完成'
})
@app.route('/api/ask', methods=['POST'])
def ask_question():
"""问答端点 - 处理用户问题并返回文本和语音回答"""
global llm
try:
# 获取用户问题
if request.is_json:
data = request.get_json()
question = data.get('question', '').strip()
else:
question = request.form.get('question', '').strip()
if not question:
return jsonify({
'status': 'error',
'message': '请提供问题内容'
}), 400
print(f"🤔 用户提问: {question[:100]}...")
# 确保LLM已初始化
if llm is None:
print("⚠️ LLM未初始化尝试重新初始化")
llm = initialize_llm()
if llm is None:
return jsonify({
'status': 'error',
'message': '语言模型未初始化,请检查配置'
}), 503
# 构建完整的消息
system_prompt = get_system_prompt()
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": question}
]
# 调用DeepSeek API获取回答
print("🧠 正在生成回答...")
response = llm.invoke(messages)
text_answer = response.content if hasattr(response, 'content') else str(response)
print(f"✅ 回答生成完成,长度: {len(text_answer)} 字符")
# 语音合成
audio_base64 = synthesize_speech(text_answer)
if audio_base64 is None:
print("⚠️ 语音合成失败,仅返回文本回答")
return jsonify({
'status': 'success',
'text_answer': text_answer,
'audio_base64': None,
'message': '语音合成失败,仅返回文本回答'
})
# 返回结果
return jsonify({
'status': 'success',
'text_answer': text_answer,
'audio_base64': audio_base64,
'audio_format': 'mp3',
'audio_sample_rate': '22050Hz'
})
except Exception as e:
print(f"❌ 处理问题时出错: {e}")
traceback.print_exc()
return jsonify({
'status': 'error',
'message': f'处理问题时出错: {str(e)}'
}), 500
def init_app():
"""初始化应用"""
print("=" * 60)
print("🎤 CardioAI - 心血管健康语音助手")
print("=" * 60)
# 加载环境变量
if not load_environment_variables():
print("⚠️ 环境变量加载失败,某些功能可能无法使用")
# 初始化LLM
global llm
llm = initialize_llm()
# 初始化语音合成
initialize_tts()
print("\n📡 API端点:")
print(" GET / - 语音助手界面")
print(" GET /api/health - 健康检查")
print(" POST /api/ask - 提问并获取语音回答")
print(f"\n🧠 LLM状态: {'已初始化' if llm is not None else '未初始化'}")
print(f"🔊 语音合成: {'已初始化' if dashscope.api_key else '未初始化'}")
if __name__ == '__main__':
# 初始化应用
init_app()
# 运行Flask应用
print(f"\n🌍 启动服务器: http://127.0.0.1:5002")
print(" 按 Ctrl+C 停止\n")
app.run(
host='0.0.0.0',
port=5002,
debug=True,
threaded=True
)
else:
# 用于WSGI部署
init_app()