diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..be1c5c4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,62 @@ +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# IDE and editor files +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Virtual environment +cardioenv/ + +# Logs +*.log + +# Data files +*.csv +*.xlsx +*.pkl +*.json +*.db +*.sqlite + +# Environment variables +.env +.env.local +.env.development.local +.env.test.local +.env.production.local + +# Temporary files +tmp/ +temp/ + +# Jupyter Notebook +.ipynb_checkpoints + +# Coverage +.coverage +htmlcov/ + +# PyTest +.pytest_cache/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..ab1f416 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,10 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Ignored default folder with query files +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/aicodes/.idea/.gitignore b/aicodes/.idea/.gitignore new file mode 100644 index 0000000..ab1f416 --- /dev/null +++ b/aicodes/.idea/.gitignore @@ -0,0 +1,10 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Ignored default folder with query files +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/aicodes/.idea/aicodes.iml b/aicodes/.idea/aicodes.iml new file mode 100644 index 0000000..d2507f4 --- /dev/null +++ b/aicodes/.idea/aicodes.iml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/aicodes/.idea/inspectionProfiles/Project_Default.xml b/aicodes/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..710d32f --- /dev/null +++ b/aicodes/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/aicodes/.idea/inspectionProfiles/profiles_settings.xml b/aicodes/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/aicodes/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/aicodes/.idea/misc.xml b/aicodes/.idea/misc.xml new file mode 100644 index 0000000..5840c03 --- /dev/null +++ b/aicodes/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/aicodes/.idea/modules.xml b/aicodes/.idea/modules.xml new file mode 100644 index 0000000..d1cbc82 --- /dev/null +++ b/aicodes/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/aicodes/.idea/vcs.xml b/aicodes/.idea/vcs.xml new file mode 100644 index 0000000..6c0b863 --- /dev/null +++ b/aicodes/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/aicodes/llm_streaming.py b/aicodes/llm_streaming.py new file mode 100644 index 0000000..51a5d37 --- /dev/null +++ b/aicodes/llm_streaming.py @@ -0,0 +1,47 @@ +import os +from langchain_openai import ChatOpenAI +from dotenv import load_dotenv + +# 加载环境变量 +load_dotenv() + + +def get_llm(): + """ + 初始化并返回 DeepSeek LLM 实例 + """ + # 注意:这里的 ChatOpenAI 实际上是兼容 OpenAI 接口的类,用于连接 DeepSeek API + llm = ChatOpenAI( + base_url=os.getenv("base_url1"), + api_key=os.getenv("DEEPSEEK_API_KEY1"), + model="deepseek-chat", + temperature=0, # 设置为0,确保结果稳定 + ) + return llm + + +# 1. 初始化 LLM +llm = get_llm() + +# 2. 用户级 prompt +user_prompt = "你是谁?请详细介绍一下你自己。" + +print(f"--- 用户提问:{user_prompt} ---") +print("--- LLM 流式回复开始 ---") + +# 3. **核心修改:使用 .stream() 方法进行流式调用** +# .stream() 返回一个迭代器,我们可以遍历它来获取分块的输出。 +response_stream = llm.stream(user_prompt) + +# 4. 遍历并打印流式输出 +full_response = "" +for chunk in response_stream: + # chunk.content 包含当前流片段的内容 + # end="" 确保打印时不换行,模拟流式输出效果 + print(chunk.content, end="", flush=True) + full_response += chunk.content + +print("\n--- LLM 流式回复结束 ---") + +# 5. 可选:打印完整的回复内容 +# print(f"\n完整的回复内容:{full_response}") \ No newline at end of file diff --git a/aicodes/module1_dashboard/cardio_dashboard.py b/aicodes/module1_dashboard/cardio_dashboard.py new file mode 100644 index 0000000..3238876 --- /dev/null +++ b/aicodes/module1_dashboard/cardio_dashboard.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 +""" +CardioAI - Cardiovascular Disease Dashboard +Streamlit application for data cleaning, feature engineering, and interactive visualization. +""" + +import streamlit as st +import pandas as pd +import numpy as np +import plotly.express as px +import plotly.graph_objects as go +from plotly.subplots import make_subplots +import warnings +warnings.filterwarnings('ignore') + +# Set page configuration +st.set_page_config( + page_title="CardioAI - Cardiovascular Disease Dashboard", + page_icon="❤️", + layout="wide", + initial_sidebar_state="expanded" +) + +# Constants +DATA_PATH = "/Users/anthony/ai_lesson/ai_zhangzhongshan/心血管疾病.xlsx" + +@st.cache_data +def load_and_clean_data(): + """ + Load and clean cardiovascular disease data. + Returns cleaned DataFrame. + """ + # Load data + try: + df = pd.read_excel(DATA_PATH, engine='openpyxl') + st.sidebar.success(f"数据加载成功!原始记录数: {len(df):,}") + except Exception as e: + st.error(f"数据加载失败: {e}") + return pd.DataFrame() + + # Standardize column names (handle potential naming variations) + # Common column name variations in cardiovascular datasets + column_mapping = { + 'age(天)': 'age', + 'cholestero1': 'cholesterol', + 'cholesterol': 'cholesterol', + 'gluc': 'gluc', + 'ap_hi': 'ap_hi', + 'ap_lo': 'ap_lo', + 'cardio': 'cardio', + 'gender': 'gender', + 'height': 'height', + 'weight': 'weight', + 'smoke': 'smoke', + 'alco': 'alco', + 'active': 'active' + } + + # Rename columns to standardized names + df_columns = df.columns.tolist() + for col in df_columns: + for key, value in column_mapping.items(): + if key.lower() in col.lower(): + df.rename(columns={col: value}, inplace=True) + break + + # Feature Engineering + + # 1. Convert age (in days) to years (age_years) + if 'age' in df.columns: + df['age_years'] = np.round(df['age'] / 365.25).astype(int) + + # 2. Calculate BMI + if 'height' in df.columns and 'weight' in df.columns: + df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2) + df['bmi'] = df['bmi'].round(2) + + # 3. Handle blood pressure outliers + # Remove records where diastolic (ap_lo) >= systolic (ap_hi) + if 'ap_hi' in df.columns and 'ap_lo' in df.columns: + df = df[df['ap_lo'] < df['ap_hi']] + + # Remove extreme blood pressure values + systolic_mask = (df['ap_hi'] >= 90) & (df['ap_hi'] <= 250) + diastolic_mask = (df['ap_lo'] >= 60) & (df['ap_lo'] <= 150) + df = df[systolic_mask & diastolic_mask] + + # 4. Convert cholesterol and glucose to descriptive strings + if 'cholesterol' in df.columns: + cholesterol_map = { + 1: "Normal", + 2: "Above Normal", + 3: "Well Above Normal" + } + df['cholesterol_str'] = df['cholesterol'].map(cholesterol_map) + + if 'gluc' in df.columns: + glucose_map = { + 1: "Normal", + 2: "Above Normal", + 3: "Well Above Normal" + } + df['gluc_str'] = df['gluc'].map(glucose_map) + + # 5. Create BMI categories + if 'bmi' in df.columns: + def categorize_bmi(bmi): + if bmi < 18.5: + return "Underweight" + elif 18.5 <= bmi < 25: + return "Normal" + elif 25 <= bmi < 30: + return "Overweight" + else: + return "Obese" + + df['bmi_category'] = df['bmi'].apply(categorize_bmi) + + return df + +def create_filters(df): + """Create interactive filters in sidebar.""" + st.sidebar.header("📊 数据筛选器") + + # Age range slider + if 'age_years' in df.columns: + min_age = int(df['age_years'].min()) + max_age = int(df['age_years'].max()) + age_range = st.sidebar.slider( + "选择年龄范围 (岁)", + min_value=min_age, + max_value=max_age, + value=(min_age, max_age) + ) + else: + age_range = (0, 100) + + # Gender filter (if exists) + gender_filter = None + if 'gender' in df.columns: + gender_options = df['gender'].unique() + gender_options_sorted = sorted(gender_options) + gender_selected = st.sidebar.multiselect( + "选择性别", + options=gender_options_sorted, + default=gender_options_sorted + ) + gender_filter = gender_selected + + # Cardiovascular disease filter + cardio_filter = None + if 'cardio' in df.columns: + cardio_options = df['cardio'].unique() + cardio_options_sorted = sorted(cardio_options) + cardio_selected = st.sidebar.multiselect( + "心血管疾病状态", + options=cardio_options_sorted, + default=cardio_options_sorted, + help="0: 无疾病, 1: 有疾病" + ) + cardio_filter = cardio_selected + + return age_range, gender_filter, cardio_filter + +def apply_filters(df, age_range, gender_filter, cardio_filter): + """Apply filters to DataFrame.""" + filtered_df = df.copy() + + # Apply age filter + if 'age_years' in filtered_df.columns: + filtered_df = filtered_df[ + (filtered_df['age_years'] >= age_range[0]) & + (filtered_df['age_years'] <= age_range[1]) + ] + + # Apply gender filter + if gender_filter is not None and 'gender' in filtered_df.columns: + filtered_df = filtered_df[filtered_df['gender'].isin(gender_filter)] + + # Apply cardio filter + if cardio_filter is not None and 'cardio' in filtered_df.columns: + filtered_df = filtered_df[filtered_df['cardio'].isin(cardio_filter)] + + return filtered_df + +def display_summary_metrics(df): + """Display summary metrics.""" + col1, col2, col3, col4 = st.columns(4) + + with col1: + total_records = len(df) + st.metric("📈 总记录数", f"{total_records:,}") + + with col2: + if 'cardio' in df.columns: + risk_rate = df['cardio'].mean() * 100 + st.metric("❤️ 心血管疾病风险率", f"{risk_rate:.2f}%") + else: + st.metric("❤️ 心血管疾病风险率", "N/A") + + with col3: + if 'age_years' in df.columns: + avg_age = df['age_years'].mean() + st.metric("👥 平均年龄", f"{avg_age:.1f} 岁") + else: + st.metric("👥 平均年龄", "N/A") + + with col4: + if 'bmi' in df.columns: + avg_bmi = df['bmi'].mean() + st.metric("⚖️ 平均BMI", f"{avg_bmi:.1f}") + else: + st.metric("⚖️ 平均BMI", "N/A") + + st.markdown("---") + +def create_visualizations(df): + """Create interactive visualizations.""" + # Create two columns for charts + col1, col2 = st.columns(2) + + with col1: + st.subheader("📊 年龄分布 (按心血管疾病状态)") + if 'age_years' in df.columns and 'cardio' in df.columns: + # Convert cardio to string for better legend + df['cardio_str'] = df['cardio'].apply(lambda x: '有疾病' if x == 1 else '无疾病') + + fig1 = px.histogram( + df, + x='age_years', + color='cardio_str', + nbins=30, + barmode='overlay', + opacity=0.7, + color_discrete_sequence=['#FF6B6B', '#4ECDC4'], + labels={'age_years': '年龄 (岁)', 'cardio_str': '心血管疾病状态'} + ) + fig1.update_layout( + legend_title="疾病状态", + xaxis_title="年龄 (岁)", + yaxis_title="人数", + bargap=0.1 + ) + st.plotly_chart(fig1, use_container_width=True) + else: + st.info("年龄或心血管疾病状态数据不可用") + + with col2: + st.subheader("📈 BMI类别与心血管疾病关系") + if 'bmi_category' in df.columns and 'cardio' in df.columns: + # Create cross-tabulation + cross_tab = pd.crosstab(df['bmi_category'], df['cardio'], normalize='index') * 100 + + # Prepare data for stacked bar chart + categories = cross_tab.index.tolist() + no_disease = cross_tab[0].values if 0 in cross_tab.columns else [0] * len(categories) + has_disease = cross_tab[1].values if 1 in cross_tab.columns else [0] * len(categories) + + fig2 = go.Figure(data=[ + go.Bar(name='无疾病', x=categories, y=no_disease, marker_color='#4ECDC4'), + go.Bar(name='有疾病', x=categories, y=has_disease, marker_color='#FF6B6B') + ]) + + fig2.update_layout( + barmode='stack', + xaxis_title="BMI 类别", + yaxis_title="百分比 (%)", + legend_title="疾病状态", + yaxis=dict(range=[0, 100]) + ) + st.plotly_chart(fig2, use_container_width=True) + else: + st.info("BMI类别或心血管疾病状态数据不可用") + + # Additional visualizations + st.markdown("---") + st.subheader("🔍 数据详情") + + # Show filtered data + with st.expander("查看筛选后的数据 (前100行)"): + st.dataframe(df.head(100), use_container_width=True) + + # Data statistics + with st.expander("查看数据统计摘要"): + if not df.empty: + st.write("**数值型变量统计:**") + numeric_cols = df.select_dtypes(include=[np.number]).columns + st.dataframe(df[numeric_cols].describe(), use_container_width=True) + + st.write("**分类变量统计:**") + categorical_cols = df.select_dtypes(include=['object', 'category']).columns + for col in categorical_cols: + if col in df.columns: + st.write(f"**{col}**:") + value_counts = df[col].value_counts() + st.dataframe(value_counts, use_container_width=True) + +def main(): + """Main application function.""" + # Title and description + st.title("❤️ CardioAI - 心血管疾病智能辅助系统") + st.markdown(""" + ### 模块1: 数据可视化仪表板 + 本模块提供心血管疾病数据的交互式探索、清洗和可视化功能。 + """) + + # Load and clean data + with st.spinner("正在加载和清洗数据..."): + df = load_and_clean_data() + + if df.empty: + st.error("无法加载数据。请检查数据文件路径和格式。") + return + + # Display dataset information + with st.expander("📋 数据集信息", expanded=False): + st.write(f"**总记录数:** {len(df):,}") + st.write(f"**特征数:** {len(df.columns)}") + st.write("**列名:**", ", ".join(df.columns.tolist())) + + # Show missing values + missing_values = df.isnull().sum() + if missing_values.any(): + st.write("**缺失值:**") + st.dataframe(missing_values[missing_values > 0].rename('缺失数量')) + else: + st.write("**缺失值:** 无") + + # Create filters in sidebar + age_range, gender_filter, cardio_filter = create_filters(df) + + # Apply filters + filtered_df = apply_filters(df, age_range, gender_filter, cardio_filter) + + # Display summary metrics + display_summary_metrics(filtered_df) + + # Create visualizations + create_visualizations(filtered_df) + + # Footer + st.markdown("---") + st.markdown( + """ +
+

CardioAI - 心血管疾病智能辅助系统 | 模块1: 数据可视化仪表板

+

使用 Streamlit 和 Plotly 构建

+
+ """, + unsafe_allow_html=True + ) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/aicodes/module1_dashboard/test_import.py b/aicodes/module1_dashboard/test_import.py new file mode 100644 index 0000000..858216b --- /dev/null +++ b/aicodes/module1_dashboard/test_import.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +"""Test script to verify the dashboard module can be imported.""" + +import sys +import os + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +try: + # Try to import the module + import cardio_dashboard + print("✅ cardio_dashboard module imported successfully!") + + # Check if required functions exist + required_functions = ['load_and_clean_data', 'create_filters', 'apply_filters', + 'display_summary_metrics', 'create_visualizations', 'main'] + + for func_name in required_functions: + if hasattr(cardio_dashboard, func_name): + print(f"✅ Function '{func_name}' found") + else: + print(f"❌ Function '{func_name}' not found") + +except ImportError as e: + print(f"❌ Import error: {e}") +except Exception as e: + print(f"❌ Other error: {e}") + +print("\n✅ Syntax check passed!") \ No newline at end of file diff --git a/aicodes/module2_predictor/app.py b/aicodes/module2_predictor/app.py new file mode 100644 index 0000000..2fdcfd7 --- /dev/null +++ b/aicodes/module2_predictor/app.py @@ -0,0 +1,421 @@ +#!/usr/bin/env python3 +""" +CardioAI - Cardiovascular Disease Prediction API +Flask application for real-time cardiovascular disease prediction. +""" + +from flask import Flask, request, jsonify, render_template +import pandas as pd +import numpy as np +import joblib +import traceback +from datetime import datetime +import os + +# Initialize Flask app +app = Flask(__name__) + +# Constants +MODEL_PATH = os.path.join(os.path.dirname(__file__), 'cardio_predictor_model.pkl') + +# Global variables for model and metadata +model = None +model_metadata = None +expected_features = None + +# Expected feature names for input validation +EXPECTED_FEATURES = [ + 'age_years', # Age in years + 'gender', # Gender (1: female, 2: male) + 'height', # Height in cm + 'weight', # Weight in kg + 'ap_hi', # Systolic blood pressure + 'ap_lo', # Diastolic blood pressure + 'cholesterol', # Cholesterol level (1: normal, 2: above normal, 3: well above normal) + 'gluc', # Glucose level (1: normal, 2: above normal, 3: well above normal) + 'smoke', # Smoking (0: no, 1: yes) + 'alco', # Alcohol intake (0: no, 1: yes) + 'active' # Physical activity (0: no, 1: yes) +] + +# Feature ranges for validation (min, max) +FEATURE_RANGES = { + 'age_years': (20, 100), + 'gender': (1, 2), + 'height': (100, 250), # cm + 'weight': (30, 200), # kg + 'ap_hi': (90, 250), # systolic blood pressure + 'ap_lo': (60, 150), # diastolic blood pressure + 'cholesterol': (1, 3), + 'gluc': (1, 3), + 'smoke': (0, 1), + 'alco': (0, 1), + 'active': (0, 1), + 'bmi': (10, 50) # BMI range +} + +def load_model(): + """ + Load the trained model pipeline from disk. + """ + global model, model_metadata, expected_features + + try: + if os.path.exists(MODEL_PATH): + print(f"📦 Loading model from: {MODEL_PATH}") + model = joblib.load(MODEL_PATH) + print("✅ Model loaded successfully!") + + # Try to extract expected features from model + try: + # Get feature names from the preprocessor + preprocessor = model.named_steps['preprocessor'] + num_features = preprocessor.transformers_[0][2] # Numeric features + cat_features = preprocessor.transformers_[1][2] # Categorical features + expected_features = list(num_features) + list(cat_features) + print(f"📋 Model expects features: {expected_features}") + except Exception as e: + print(f"⚠️ Could not extract feature names from model: {e}") + expected_features = EXPECTED_FEATURES + + # Load metadata if exists + metadata_path = MODEL_PATH.replace('.pkl', '_metadata.json') + if os.path.exists(metadata_path): + import json + with open(metadata_path, 'r') as f: + model_metadata = json.load(f) + print(f"📋 Model metadata loaded: {model_metadata.get('description', 'N/A')}") + else: + model_metadata = {'description': 'CardioAI Prediction Model'} + + else: + print(f"❌ Model file not found at: {MODEL_PATH}") + print("⚠️ Please run train_and_save.py first to train and save the model.") + model = None + + except Exception as e: + print(f"❌ Error loading model: {e}") + traceback.print_exc() + model = None + +def validate_input_features(input_data): + """ + Validate input features for prediction. + Returns (is_valid, error_message, validated_data) + """ + try: + # Convert input to dictionary if it's not already + if isinstance(input_data, dict): + data = input_data + else: + data = dict(input_data) + + # Check for missing features + missing_features = [feat for feat in EXPECTED_FEATURES if feat not in data] + if missing_features: + return False, f"Missing features: {missing_features}", None + + # Check for extra features + extra_features = [feat for feat in data.keys() if feat not in EXPECTED_FEATURES] + if extra_features: + print(f"⚠️ Warning: Extra features provided: {extra_features}") + + # Validate each feature + validated_data = {} + for feature in EXPECTED_FEATURES: + value = data[feature] + + # Convert to appropriate type + try: + if feature in ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']: + # These should be integers + validated_data[feature] = int(float(value)) + else: + # These should be floats + validated_data[feature] = float(value) + except (ValueError, TypeError): + return False, f"Invalid value for {feature}: {value} (should be numeric)", None + + # Check value ranges + if feature in FEATURE_RANGES: + min_val, max_val = FEATURE_RANGES[feature] + if not (min_val <= validated_data[feature] <= max_val): + return False, f"{feature} value {validated_data[feature]} out of range ({min_val}-{max_val})", None + + # Calculate BMI if height and weight are available + if 'height' in validated_data and 'weight' in validated_data: + height_m = validated_data['height'] / 100 # convert cm to meters + bmi_value = validated_data['weight'] / (height_m ** 2) + bmi_value = round(bmi_value, 2) + validated_data['bmi'] = bmi_value + + # Validate BMI range + if 'bmi' in FEATURE_RANGES: + min_val, max_val = FEATURE_RANGES['bmi'] + if not (min_val <= bmi_value <= max_val): + return False, f"Calculated BMI value {bmi_value} out of range ({min_val}-{max_val})", None + + return True, "Validation passed", validated_data + + except Exception as e: + return False, f"Validation error: {str(e)}", None + +def preprocess_input(input_data): + """ + Convert validated input data to DataFrame for model prediction. + """ + try: + # Create DataFrame with a single row + df = pd.DataFrame([input_data]) + + # Determine which features the model expects + # Use expected_features from loaded model, or fallback to EXPECTED_FEATURES + bmi + model_features = expected_features if expected_features is not None else EXPECTED_FEATURES + ['bmi'] if 'bmi' not in EXPECTED_FEATURES else EXPECTED_FEATURES + + # Ensure all expected features are present in the DataFrame + # Add missing features with default values (should not happen if validation passed) + for feature in model_features: + if feature not in df.columns: + print(f"⚠️ Warning: Feature '{feature}' not in input data, adding default value") + if feature == 'bmi' and 'height' in df.columns and 'weight' in df.columns: + # Calculate BMI if missing but height and weight are available + height_m = df['height'].iloc[0] / 100 + df['bmi'] = df['weight'].iloc[0] / (height_m ** 2) + else: + # Add default value (0 for numeric, 1 for categorical) + if feature in ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']: + df[feature] = 1 + else: + df[feature] = 0.0 + + # Ensure correct column order as expected by the model + df = df[model_features] + + return df + + except Exception as e: + print(f"❌ Error preprocessing input: {e}") + raise + +@app.route('/') +def home(): + """ + Home page - serve the prediction interface. + """ + return render_template('index.html') + +@app.route('/api/model_info', methods=['GET']) +def model_info(): + """ + API endpoint to get model information. + """ + try: + if model is None: + load_model() + + if model is None: + return jsonify({ + 'status': 'error', + 'message': 'Model not loaded. Please run training script first.' + }), 503 + + info = { + 'status': 'success', + 'model': { + 'description': model_metadata.get('description', 'CardioAI Prediction Model'), + 'version': model_metadata.get('model_version', '1.0'), + 'creation_date': model_metadata.get('creation_date', 'N/A'), + 'features': expected_features if expected_features else EXPECTED_FEATURES, + 'feature_ranges': FEATURE_RANGES + } + } + + return jsonify(info) + + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': f'Error getting model info: {str(e)}' + }), 500 + +@app.route('/predict_cardio', methods=['POST']) +def predict_cardio(): + """ + API endpoint for cardiovascular disease prediction. + Accepts JSON POST request with 11 feature values. + Returns prediction result and probability. + """ + global model + + try: + # Load model if not already loaded + if model is None: + load_model() + + if model is None: + return jsonify({ + 'status': 'error', + 'message': 'Model not available. Please run training script first.' + }), 503 + + # Get input data + if request.is_json: + input_data = request.get_json() + else: + # Try form data + input_data = request.form.to_dict() + + if not input_data: + return jsonify({ + 'status': 'error', + 'message': 'No input data provided. Please provide 11 feature values.' + }), 400 + + # Validate input + is_valid, error_msg, validated_data = validate_input_features(input_data) + + if not is_valid: + return jsonify({ + 'status': 'error', + 'message': error_msg, + 'expected_features': EXPECTED_FEATURES, + 'feature_ranges': FEATURE_RANGES + }), 400 + + # Preprocess input + input_df = preprocess_input(validated_data) + + # Make prediction + try: + # Get probability prediction + probability = model.predict_proba(input_df)[0] + + # Get class prediction (0: no disease, 1: disease) + prediction = model.predict(input_df)[0] + + # Calculate risk level based on probability + risk_probability = float(probability[1]) # Probability of class 1 (disease) + risk_percentage = risk_probability * 100 + + if risk_probability < 0.3: + risk_level = "Low" + recommendation = "Continue healthy lifestyle." + elif risk_probability < 0.7: + risk_level = "Medium" + recommendation = "Consider regular health check-ups." + else: + risk_level = "High" + recommendation = "Consult a healthcare professional." + + # Prepare response + response = { + 'status': 'success', + 'prediction': int(prediction), + 'probability': risk_probability, + 'probability_percentage': risk_percentage, + 'risk_level': risk_level, + 'recommendation': recommendation, + 'input_features': validated_data, + 'timestamp': datetime.now().isoformat() + } + + print(f"✅ Prediction made: {prediction}, Probability: {risk_probability:.4f}") + + return jsonify(response) + + except Exception as e: + print(f"❌ Prediction error: {e}") + traceback.print_exc() + return jsonify({ + 'status': 'error', + 'message': f'Prediction error: {str(e)}' + }), 500 + + except Exception as e: + print(f"❌ API error: {e}") + traceback.print_exc() + return jsonify({ + 'status': 'error', + 'message': f'Internal server error: {str(e)}' + }), 500 + +@app.route('/health', methods=['GET']) +def health_check(): + """ + Health check endpoint. + """ + try: + model_status = 'loaded' if model is not None else 'not_loaded' + + health_info = { + 'status': 'healthy', + 'service': 'CardioAI Prediction API', + 'model_status': model_status, + 'timestamp': datetime.now().isoformat() + } + + return jsonify(health_info) + + except Exception as e: + return jsonify({ + 'status': 'unhealthy', + 'error': str(e) + }), 500 + +@app.errorhandler(404) +def not_found(error): + return jsonify({ + 'status': 'error', + 'message': 'Endpoint not found' + }), 404 + +@app.errorhandler(405) +def method_not_allowed(error): + return jsonify({ + 'status': 'error', + 'message': 'Method not allowed' + }), 405 + +@app.errorhandler(500) +def internal_error(error): + return jsonify({ + 'status': 'error', + 'message': 'Internal server error' + }), 500 + +def init_app(): + """ + Initialize the Flask application. + """ + print("=" * 60) + print("🚀 CardioAI - Cardiovascular Disease Prediction API") + print("=" * 60) + + # Load model at startup + load_model() + + print("\n📡 API Endpoints:") + print(" GET / - Prediction web interface") + print(" GET /api/model_info - Get model information") + print(" POST /predict_cardio - Make prediction") + print(" GET /health - Health check") + + print(f"\n📊 Model status: {'Loaded' if model is not None else 'Not loaded'}") + +if __name__ == '__main__': + # Initialize the app + init_app() + + # Run the Flask app + print(f"\n🌍 Starting server on http://127.0.0.1:8080") + print(" Press Ctrl+C to stop\n") + + app.run( + host='0.0.0.0', + port=8080, + debug=True, + threaded=True + ) +else: + # For WSGI deployment + init_app() \ No newline at end of file diff --git a/aicodes/module2_predictor/templates/index.html b/aicodes/module2_predictor/templates/index.html new file mode 100644 index 0000000..67fd29b --- /dev/null +++ b/aicodes/module2_predictor/templates/index.html @@ -0,0 +1,889 @@ + + + + + + CardioAI - Cardiovascular Disease Risk Assessment + + + + + +
+
+ +

Cardiovascular Disease Risk Assessment System

+

Enter your health information below to assess your risk of cardiovascular disease

+
+ +
+
+ Connected to CardioAI Prediction Engine +
+ +
+
+

Patient Information

+ +
+

Please provide accurate health information for the most reliable risk assessment.

+
+ +
+
+
+ +
+ + years +
+
Enter age between 20-100 years
+
+ +
+ + +
1: Female, 2: Male
+
+
+ +
+
+ +
+ + cm +
+
Height in centimeters (100-250 cm)
+
+ +
+ +
+ + kg +
+
Weight in kilograms (30-200 kg)
+
+
+ +
+
+ +
+ + mmHg +
+
Systolic blood pressure (90-250 mmHg)
+
+ +
+ +
+ + mmHg +
+
Diastolic blood pressure (60-150 mmHg)
+
+
+ +
+
+ + +
1: Normal, 2: Above Normal, 3: Well Above Normal
+
+ +
+ + +
1: Normal, 2: Above Normal, 3: Well Above Normal
+
+
+ +
+
+ + +
0: No, 1: Yes
+
+ +
+ + +
0: No, 1: Yes
+
+
+ +
+ + +
0: No, 1: Yes
+
+ + +
+ +
+
+

Analyzing health data and calculating risk...

+
+ +
+
+ +
+

Risk Assessment Results

+ +
+
+ +
+ +
+ +
+ +
+
0%
+
Risk Probability
+
+ +
+ +
+ +
+

Input Summary

+
+ +
+
+ + +
+
+
+ + +
+ + + + \ No newline at end of file diff --git a/aicodes/module2_predictor/train_and_save.py b/aicodes/module2_predictor/train_and_save.py new file mode 100644 index 0000000..ef75817 --- /dev/null +++ b/aicodes/module2_predictor/train_and_save.py @@ -0,0 +1,348 @@ +#!/usr/bin/env python3 +""" +CardioAI - Model Training and Saving Script +Train XGBoost classifier for cardiovascular disease prediction and save the model. +""" + +import pandas as pd +import numpy as np +import warnings +warnings.filterwarnings('ignore') + +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler, OneHotEncoder +from sklearn.compose import ColumnTransformer +from sklearn.pipeline import Pipeline +from sklearn.metrics import classification_report, confusion_matrix, accuracy_score +import xgboost as xgb +from xgboost import XGBClassifier +import joblib + +# Constants +DATA_PATH = "/Users/anthony/ai_lesson/ai_zhangzhongshan/心血管疾病.xlsx" +MODEL_PATH = "/Users/anthony/PycharmProjects/ sad_test01/aicodes/module2_predictor/cardio_predictor_model.pkl" + +def load_and_preprocess_data(): + """ + Load and preprocess cardiovascular disease data. + Returns cleaned DataFrame with features and target. + """ + print("📊 Step 1: Loading data...") + # Load data + df = pd.read_excel(DATA_PATH, engine='openpyxl') + print(f" Original data shape: {df.shape}") + + # Standardize column names (similar to module1) + column_mapping = { + 'age(天)': 'age', + 'cholestero1': 'cholesterol', + 'cholesterol': 'cholesterol', + 'gluc': 'gluc', + 'ap_hi': 'ap_hi', + 'ap_lo': 'ap_lo', + 'cardio': 'cardio', + 'gender': 'gender', + 'height': 'height', + 'weight': 'weight', + 'smoke': 'smoke', + 'alco': 'alco', + 'active': 'active', + 'id': 'id' + } + + # Rename columns to standardized names + df_columns = df.columns.tolist() + for col in df_columns: + for key, value in column_mapping.items(): + if key.lower() in col.lower(): + df.rename(columns={col: value}, inplace=True) + break + + print("📊 Step 2: Performing feature engineering...") + + # 1. Convert age (in days) to years (age_years) + if 'age' in df.columns: + df['age_years'] = np.round(df['age'] / 365.25).astype(int) + + # 2. Calculate BMI + if 'height' in df.columns and 'weight' in df.columns: + df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2) + df['bmi'] = df['bmi'].round(2) + + # 3. Handle blood pressure outliers + # Remove records where diastolic (ap_lo) >= systolic (ap_hi) + if 'ap_hi' in df.columns and 'ap_lo' in df.columns: + initial_count = len(df) + df = df[df['ap_lo'] < df['ap_hi']] + + # Remove extreme blood pressure values + systolic_mask = (df['ap_hi'] >= 90) & (df['ap_hi'] <= 250) + diastolic_mask = (df['ap_lo'] >= 60) & (df['ap_lo'] <= 150) + df = df[systolic_mask & diastolic_mask] + + print(f" Removed {initial_count - len(df)} blood pressure outliers") + + # 4. Convert cholesterol and glucose to categorical (keep as numeric for model) + # XGBoost can handle numeric categorical features directly + + # 5. Create BMI categories for potential use (but we'll use continuous BMI for model) + if 'bmi' in df.columns: + def categorize_bmi(bmi): + if bmi < 18.5: + return 0 # Underweight + elif 18.5 <= bmi < 25: + return 1 # Normal + elif 25 <= bmi < 30: + return 2 # Overweight + else: + return 3 # Obese + + df['bmi_category'] = df['bmi'].apply(categorize_bmi) + + # Check required columns + required_cols = ['age_years', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo', + 'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio'] + + missing_cols = [col for col in required_cols if col not in df.columns] + if missing_cols: + print(f"⚠️ Warning: Missing columns: {missing_cols}") + + return df + +def prepare_features_target(df): + """ + Prepare features (X) and target (y) for model training. + """ + print("📊 Step 3: Preparing features and target...") + + # Define features based on typical cardiovascular dataset + # These are the 11 features mentioned in the task requirements + feature_cols = [ + 'age_years', # Age in years (converted from days) + 'gender', # Gender (1: female, 2: male) + 'height', # Height in cm + 'weight', # Weight in kg + 'ap_hi', # Systolic blood pressure + 'ap_lo', # Diastolic blood pressure + 'cholesterol', # Cholesterol level (1: normal, 2: above normal, 3: well above normal) + 'gluc', # Glucose level (1: normal, 2: above normal, 3: well above normal) + 'smoke', # Smoking (0: no, 1: yes) + 'alco', # Alcohol intake (0: no, 1: yes) + 'active' # Physical activity (0: no, 1: yes) + ] + + # Check which features are available + available_features = [col for col in feature_cols if col in df.columns] + print(f" Available features: {available_features}") + + # If bmi is available, use it instead of height and weight (or in addition) + if 'bmi' in df.columns and 'bmi' not in available_features: + available_features.append('bmi') + + # Remove id column if exists + if 'id' in df.columns: + df = df.drop(columns=['id']) + + # Remove original age (in days) column + if 'age' in df.columns: + df = df.drop(columns=['age']) + + X = df[available_features].copy() + y = df['cardio'].copy() if 'cardio' in df.columns else None + + print(f" Features shape: {X.shape}") + print(f" Target shape: {y.shape if y is not None else 'N/A'}") + + return X, y, available_features + +def create_preprocessing_pipeline(feature_cols): + """ + Create a preprocessing pipeline with ColumnTransformer. + """ + print("📊 Step 4: Creating preprocessing pipeline...") + + # Define numeric and categorical features + numeric_features = ['age_years', 'height', 'weight', 'ap_hi', 'ap_lo'] + if 'bmi' in feature_cols: + numeric_features.append('bmi') + + categorical_features = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active'] + + # Filter to only include features that exist in our data + numeric_features = [feat for feat in numeric_features if feat in feature_cols] + categorical_features = [feat for feat in categorical_features if feat in feature_cols] + + print(f" Numeric features: {numeric_features}") + print(f" Categorical features: {categorical_features}") + + # Create preprocessing pipeline + preprocessor = ColumnTransformer( + transformers=[ + ('num', StandardScaler(), numeric_features), + ('cat', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'), + categorical_features) + ], + remainder='drop' # Drop any columns not specified + ) + + return preprocessor + +def train_model(X, y, preprocessor): + """ + Train XGBoost classifier with preprocessing pipeline. + """ + print("📊 Step 5: Training XGBoost model...") + + # Split data + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42, stratify=y + ) + + print(f" Training set: {X_train.shape}") + print(f" Test set: {X_test.shape}") + + # Create full pipeline + pipeline = Pipeline([ + ('preprocessor', preprocessor), + ('classifier', XGBClassifier( + n_estimators=100, + max_depth=5, + learning_rate=0.1, + subsample=0.8, + colsample_bytree=0.8, + random_state=42, + eval_metric='logloss', + use_label_encoder=False + )) + ]) + + # Train the model + pipeline.fit(X_train, y_train) + + # Evaluate the model + print("📊 Step 6: Evaluating model performance...") + + y_pred = pipeline.predict(X_test) + y_pred_proba = pipeline.predict_proba(X_test)[:, 1] + + accuracy = accuracy_score(y_test, y_pred) + print(f" Accuracy: {accuracy:.4f}") + + print("\n Classification Report:") + print(classification_report(y_test, y_pred, target_names=['No Disease', 'Disease'])) + + # Confusion matrix + cm = confusion_matrix(y_test, y_pred) + print(f" Confusion Matrix:") + print(f" [[{cm[0,0]:4d} {cm[0,1]:4d}]") + print(f" [{cm[1,0]:4d} {cm[1,1]:4d}]]") + + # Feature importance (if available) + try: + # Get feature names after preprocessing + feature_names = [] + + # Get numeric feature names + num_features = pipeline.named_steps['preprocessor'].transformers_[0][2] + feature_names.extend(num_features) + + # Get categorical feature names after one-hot encoding + cat_transformer = pipeline.named_steps['preprocessor'].transformers_[1][1] + cat_features = pipeline.named_steps['preprocessor'].transformers_[1][2] + + # Get one-hot encoded feature names + if hasattr(cat_transformer, 'get_feature_names_out'): + cat_feature_names = cat_transformer.get_feature_names_out(cat_features) + feature_names.extend(cat_feature_names) + else: + # Fallback: just use categorical feature names + feature_names.extend(cat_features) + + # Get feature importance from XGBoost + importances = pipeline.named_steps['classifier'].feature_importances_ + + # Sort by importance + importance_df = pd.DataFrame({ + 'feature': feature_names[:len(importances)], + 'importance': importances + }).sort_values('importance', ascending=False) + + print("\n Top 10 Feature Importances:") + print(importance_df.head(10).to_string(index=False)) + + except Exception as e: + print(f" Could not extract feature importance: {e}") + + return pipeline + +def save_model(pipeline, model_path): + """ + Save the trained pipeline to disk. + """ + print("📊 Step 7: Saving model...") + + # Save the model + joblib.dump(pipeline, model_path) + print(f" Model saved to: {model_path}") + + # Also save metadata about expected features + metadata = { + 'model_version': '1.0', + 'description': 'CardioAI Cardiovascular Disease Prediction Model', + 'features': pipeline.named_steps['preprocessor'].transformers_[0][2] + + pipeline.named_steps['preprocessor'].transformers_[1][2], + 'creation_date': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S') + } + + metadata_path = model_path.replace('.pkl', '_metadata.json') + import json + with open(metadata_path, 'w') as f: + json.dump(metadata, f, indent=2) + + print(f" Metadata saved to: {metadata_path}") + +def main(): + """ + Main training workflow. + """ + print("=" * 60) + print("🚀 CardioAI - Model Training Script") + print("=" * 60) + + try: + # Load and preprocess data + df = load_and_preprocess_data() + + # Prepare features and target + X, y, feature_cols = prepare_features_target(df) + + if y is None: + print("❌ Error: Target column 'cardio' not found in data!") + return + + # Create preprocessing pipeline + preprocessor = create_preprocessing_pipeline(feature_cols) + + # Train model + pipeline = train_model(X, y, preprocessor) + + # Save model + save_model(pipeline, MODEL_PATH) + + print("\n✅ Model training completed successfully!") + print("\n📋 Model Summary:") + print(f" - Model type: XGBoost Classifier") + print(f" - Features used: {len(feature_cols)}") + print(f" - Training samples: {len(X)}") + print(f" - Model saved to: {MODEL_PATH}") + + except Exception as e: + print(f"\n❌ Error during model training: {e}") + import traceback + traceback.print_exc() + return 1 + + return 0 + +if __name__ == "__main__": + exit(main()) \ No newline at end of file diff --git a/aicodes/module3_voice_assistant/templates/voice_index.html b/aicodes/module3_voice_assistant/templates/voice_index.html new file mode 100644 index 0000000..a45b84d --- /dev/null +++ b/aicodes/module3_voice_assistant/templates/voice_index.html @@ -0,0 +1,886 @@ + + + + + + CardioAI - 心血管健康语音助手 + + + + + +
+
+ +

心血管健康智能问答与语音交互系统

+
+

这是一个基于DeepSeek大模型和CosyVoice语音合成技术的心血管健康语音助手。您可以提问任何关于心血管健康的问题,系统将提供专业的文字回答并转换为语音播放。

+
+
+ +
+
+
+ DeepSeek LLM: 连接中... +
+
+
+ CosyVoice TTS: 连接中... +
+
+ +
+
+

提问心血管健康问题

+ +
+
+ + +
+ +
+ +
+ + + + +
+
+ + +
+ +
+
+

正在生成专业回答并合成语音,请稍候...

+
+ +
+ +
+

系统提示:回答由AI生成,仅供参考。如有医疗问题,请咨询专业医生。

+
+
+ +
+

回答结果

+ +
+ +
+ +
+

语音播报

+
+ +
+
+ 准备播放... + 00:00 +
+
+ +
+
+ + 专业心血管健康顾问 +
+
+ + 基于DeepSeek大模型 +
+
+ + CosyVoice语音合成 +
+
+ + 实时音频播放 +
+
+ + +
+
+ + +
+ + + + \ No newline at end of file diff --git a/aicodes/module3_voice_assistant/voice_assistant_app.py b/aicodes/module3_voice_assistant/voice_assistant_app.py new file mode 100644 index 0000000..b2a9901 --- /dev/null +++ b/aicodes/module3_voice_assistant/voice_assistant_app.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python3 +""" +CardioAI - 语音助手模块 +基于Deepseek和CosyVoice的心血管健康问答语音助手 +""" + +import os +import base64 +from flask import Flask, request, jsonify, render_template +from langchain_openai import ChatOpenAI +from dotenv import load_dotenv +import dashscope +from dashscope.audio.tts_v2 import SpeechSynthesizer, AudioFormat, ResultCallback +import json +import traceback + +# 初始化Flask应用 +app = Flask(__name__, template_folder='templates') + +# 环境变量路径 - 从ENV_PATH环境变量读取,默认为项目根目录下的.env文件 +ENV_PATH = os.getenv('ENV_PATH', '/Users/anthony/PycharmProjects/ sad_test01/.env') + +def load_environment_variables(): + """加载环境变量""" + try: + if os.path.exists(ENV_PATH): + print(f"📋 从 {ENV_PATH} 加载环境变量") + load_dotenv(dotenv_path=ENV_PATH) + else: + print(f"⚠️ 环境变量文件不存在: {ENV_PATH},尝试从默认位置加载") + load_dotenv() # 尝试从默认位置加载 + + # 检查必要的环境变量 + required_vars = ['DEEPSEEK_API_KEY1', 'DASHSCOPE_API_KEY'] + missing_vars = [var for var in required_vars if not os.getenv(var)] + + if missing_vars: + print(f"❌ 缺少必要的环境变量: {missing_vars}") + print("⚠️ 请在环境变量文件中设置以下变量:") + print(" - DEEPSEEK_API_KEY1: DeepSeek API密钥") + print(" - DASHSCOPE_API_KEY: DashScope (阿里云) API密钥") + print(" - base_url1: DeepSeek API基础URL (可选,默认: https://api.deepseek.com/v1)") + return False + else: + print("✅ 环境变量加载成功") + print(f" DeepSeek API密钥: {'已设置' if os.getenv('DEEPSEEK_API_KEY1') else '未设置'}") + print(f" DashScope API密钥: {'已设置' if os.getenv('DASHSCOPE_API_KEY') else '未设置'}") + print(f" DeepSeek基础URL: {os.getenv('base_url1', '默认: https://api.deepseek.com/v1')}") + return True + + except Exception as e: + print(f"❌ 加载环境变量时出错: {e}") + traceback.print_exc() + return False + +def initialize_llm(): + """初始化DeepSeek LLM""" + try: + # 设置DeepSeek API配置 (使用与llm_streaming.py一致的变量名) + deepseek_api_key = os.getenv('DEEPSEEK_API_KEY1') + deepseek_base_url = os.getenv('base_url1', 'https://api.deepseek.com/v1') + + if not deepseek_api_key: + raise ValueError("DEEPSEEK_API_KEY1环境变量未设置") + + # 初始化ChatOpenAI实例(兼容OpenAI接口) + llm = ChatOpenAI( + base_url=deepseek_base_url, + api_key=deepseek_api_key, + model="deepseek-chat", + temperature=0.7, + max_tokens=1000 + ) + + print("✅ DeepSeek LLM初始化成功") + return llm + + except Exception as e: + print(f"❌ 初始化DeepSeek LLM时出错: {e}") + traceback.print_exc() + return None + +def initialize_tts(): + """初始化语音合成""" + try: + # 设置DashScope API密钥 + dashscope_api_key = os.getenv('DASHSCOPE_API_KEY') + + if not dashscope_api_key: + raise ValueError("DASHSCOPE_API_KEY环境变量未设置") + + dashscope.api_key = dashscope_api_key + print("✅ CosyVoice语音合成初始化成功") + + except Exception as e: + print(f"❌ 初始化语音合成时出错: {e}") + traceback.print_exc() + +def get_config_status(): + """获取配置状态""" + config_status = { + 'deepseek': { + 'api_key_set': bool(os.getenv('DEEPSEEK_API_KEY1')), + 'base_url_set': bool(os.getenv('base_url1')), + 'status': 'configured' if os.getenv('DEEPSEEK_API_KEY1') else 'missing_api_key' + }, + 'dashscope': { + 'api_key_set': bool(os.getenv('DASHSCOPE_API_KEY')), + 'status': 'configured' if os.getenv('DASHSCOPE_API_KEY') else 'missing_api_key' + }, + 'env_file_exists': os.path.exists(ENV_PATH) + } + return config_status + +def get_system_prompt(): + """获取系统提示词""" + return """你是一名专业的心血管健康顾问,拥有丰富的医学知识和临床经验。你的任务是: + +1. **专业准确**:基于最新的医学研究和临床指南提供准确信息 +2. **通俗易懂**:用通俗易懂的语言解释医学术语和概念 +3. **个性化建议**:根据用户的具体情况提供个性化建议 +4. **预防为主**:强调心血管疾病的预防和早期干预 +5. **安全提醒**:明确指出哪些情况需要立即就医 + +请保持回答的专业性、准确性和实用性,同时要富有同理心和耐心。""" + +def synthesize_speech(text): + """将文本合成为语音并返回base64编码的音频""" + try: + if not text or len(text.strip()) == 0: + raise ValueError("文本内容为空") + + print(f"🔊 开始语音合成,文本长度: {len(text)} 字符") + + # 创建语音合成器实例 + # 使用cosyvoice-v2模型,longxiaochun_v2音色,MP3格式 + synthesizer = SpeechSynthesizer( + model="cosyvoice-v2", + voice="longxiaochun_v2", + format=AudioFormat.MP3_22050HZ_MONO_256KBPS, + speech_rate=1.0, + pitch_rate=1.0, + volume=50 + ) + + # 同步调用语音合成 + # 注意:文本长度可能有限制,如果太长需要分段处理 + max_text_length = 2000 # CosyVoice单次调用的文本长度限制 + if len(text) > max_text_length: + print(f"⚠️ 文本长度超过{max_text_length}字符,将进行分段处理") + # 简单分段:按句号、问号、感叹号分段 + segments = [] + current_segment = "" + + for char in text: + current_segment += char + if char in ['。', '!', '?', '.', '!', '?'] and len(current_segment) > 100: + segments.append(current_segment) + current_segment = "" + + if current_segment: + segments.append(current_segment) + + # 合并音频数据 + audio_data = b"" + for i, segment in enumerate(segments): + print(f" 合成第 {i+1}/{len(segments)} 段,长度: {len(segment)} 字符") + segment_audio = synthesizer.call(segment.strip()) + audio_data += segment_audio + else: + # 直接合成 + audio_data = synthesizer.call(text.strip()) + + print(f"✅ 语音合成完成,音频大小: {len(audio_data)} 字节") + + # 将音频数据编码为base64 + audio_base64 = base64.b64encode(audio_data).decode('utf-8') + + return audio_base64 + + except Exception as e: + print(f"❌ 语音合成失败: {e}") + traceback.print_exc() + return None + +# 全局变量 +llm = None + +@app.route('/') +def home(): + """主页面 - 语音助手界面""" + return render_template('voice_index.html') + +@app.route('/api/health', methods=['GET']) +def health_check(): + """健康检查端点""" + config_status = get_config_status() + + # 检查整体健康状态 + llm_ready = llm is not None + tts_ready = dashscope.api_key is not None + overall_healthy = llm_ready and tts_ready + + return jsonify({ + 'status': 'healthy' if overall_healthy else 'degraded', + 'service': 'CardioAI Voice Assistant', + 'llm_initialized': llm_ready, + 'dashscope_initialized': tts_ready, + 'config_status': config_status, + 'missing_config': { + 'deepseek': not config_status['deepseek']['api_key_set'], + 'dashscope': not config_status['dashscope']['api_key_set'] + }, + 'setup_required': not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'], + 'setup_instructions': '请配置.env文件中的API密钥' if not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'] else '配置完成' + }) + +@app.route('/api/ask', methods=['POST']) +def ask_question(): + """问答端点 - 处理用户问题并返回文本和语音回答""" + global llm + + try: + # 获取用户问题 + if request.is_json: + data = request.get_json() + question = data.get('question', '').strip() + else: + question = request.form.get('question', '').strip() + + if not question: + return jsonify({ + 'status': 'error', + 'message': '请提供问题内容' + }), 400 + + print(f"🤔 用户提问: {question[:100]}...") + + # 确保LLM已初始化 + if llm is None: + print("⚠️ LLM未初始化,尝试重新初始化") + llm = initialize_llm() + if llm is None: + return jsonify({ + 'status': 'error', + 'message': '语言模型未初始化,请检查配置' + }), 503 + + # 构建完整的消息 + system_prompt = get_system_prompt() + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": question} + ] + + # 调用DeepSeek API获取回答 + print("🧠 正在生成回答...") + response = llm.invoke(messages) + text_answer = response.content if hasattr(response, 'content') else str(response) + + print(f"✅ 回答生成完成,长度: {len(text_answer)} 字符") + + # 语音合成 + audio_base64 = synthesize_speech(text_answer) + + if audio_base64 is None: + print("⚠️ 语音合成失败,仅返回文本回答") + return jsonify({ + 'status': 'success', + 'text_answer': text_answer, + 'audio_base64': None, + 'message': '语音合成失败,仅返回文本回答' + }) + + # 返回结果 + return jsonify({ + 'status': 'success', + 'text_answer': text_answer, + 'audio_base64': audio_base64, + 'audio_format': 'mp3', + 'audio_sample_rate': '22050Hz' + }) + + except Exception as e: + print(f"❌ 处理问题时出错: {e}") + traceback.print_exc() + return jsonify({ + 'status': 'error', + 'message': f'处理问题时出错: {str(e)}' + }), 500 + +def init_app(): + """初始化应用""" + print("=" * 60) + print("🎤 CardioAI - 心血管健康语音助手") + print("=" * 60) + + # 加载环境变量 + if not load_environment_variables(): + print("⚠️ 环境变量加载失败,某些功能可能无法使用") + + # 初始化LLM + global llm + llm = initialize_llm() + + # 初始化语音合成 + initialize_tts() + + print("\n📡 API端点:") + print(" GET / - 语音助手界面") + print(" GET /api/health - 健康检查") + print(" POST /api/ask - 提问并获取语音回答") + + print(f"\n🧠 LLM状态: {'已初始化' if llm is not None else '未初始化'}") + print(f"🔊 语音合成: {'已初始化' if dashscope.api_key else '未初始化'}") + +if __name__ == '__main__': + # 初始化应用 + init_app() + + # 运行Flask应用 + print(f"\n🌍 启动服务器: http://127.0.0.1:5002") + print(" 按 Ctrl+C 停止\n") + + app.run( + host='0.0.0.0', + port=5002, + debug=True, + threaded=True + ) +else: + # 用于WSGI部署 + init_app() \ No newline at end of file diff --git a/aicodes/requirements.txt b/aicodes/requirements.txt new file mode 100644 index 0000000..3d3dbab --- /dev/null +++ b/aicodes/requirements.txt @@ -0,0 +1,17 @@ +# CardioAI 项目依赖包 +# 使用 conda 环境: cardioenv +# 安装命令: pip install -r requirements.txt + +pandas +openpyxl +numpy +scikit-learn +xgboost +joblib +streamlit +plotly +Flask +python-dotenv +langchain-openai +dashscope +requests \ No newline at end of file diff --git a/aicodes/test_data.py b/aicodes/test_data.py new file mode 100644 index 0000000..4a9ac1d --- /dev/null +++ b/aicodes/test_data.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +import pandas as pd +import numpy as np + +DATA_PATH = "/Users/anthony/ai_lesson/ai_zhangzhongshan/心血管疾病.xlsx" + +try: + df = pd.read_excel(DATA_PATH, engine='openpyxl') + print("Data loaded successfully!") + print(f"Shape: {df.shape}") + print("\nColumns:") + print(df.columns.tolist()) + print("\nFirst few rows:") + print(df.head()) + print("\nData types:") + print(df.dtypes) + print("\nMissing values:") + print(df.isnull().sum()) +except Exception as e: + print(f"Error: {e}") \ No newline at end of file diff --git a/aicodes/语音合成CosyVoice.md b/aicodes/语音合成CosyVoice.md new file mode 100644 index 0000000..9a52b33 --- /dev/null +++ b/aicodes/语音合成CosyVoice.md @@ -0,0 +1,274 @@ +# 语音合成CosyVoice Python SDK + +https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#915a935d871ak + +```python +dashscope.api_key = "sk-6fecb52bb96d4492bb7e58213eeeb00f" +``` + +### **流式调用** + +在同一个语音合成任务中分多次提交文本,并通过回调的方式实时获取合成结果。 + +**说明** + +- 流式输入时可多次调用`streaming_call`按顺序提交文本片段。服务端接收文本片段后自动进行分句: + + - 完整语句立即合成 + - 不完整语句缓存至完整后合成 + + 调用 `streaming_complete` 时,服务端会强制合成所有已接收但未处理的文本片段(包括未完成的句子)。 + +- 发送文本片段的间隔不得超过23秒,否则触发“request timeout after 23 seconds”异常。 + + 若无待发送文本,需及时调用 `streaming_complete`结束任务。 + + > 服务端强制设定23秒超时机制,客户端无法修改该配置。 + + ![image](https://help-static-aliyun-doc.aliyuncs.com/assets/img/zh-CN/0165103671/CAEQVRiBgMDb7PahrBkiIDVkNjEwOTMxYjEwOTRmOWFhMmI1OTRiY2Q3ZDgzZmE54709861_20241015153444.149.svg) + +1. 实例化SpeechSynthesizer类 + + 实例化[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)绑定[请求参数](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#2fe363ace1l4k)和[回调接口(ResultCallback)](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)。 + +2. 流式传输 + + 多次调用[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)的`streaming_call`方法分片提交待合成文本,将待合成文本分段发送至服务端。 + + 在发送文本的过程中,服务端会通过[回调接口(ResultCallback)](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_data`方法,将合成结果实时返回给客户端。 + + 每次调用`streaming_call`方法发送的文本片段(即`text`)长度不得超过2000字符,累计发送的文本总长度不得超过20万字符。 + +3. 结束处理 + + 调用[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)的`streaming_complete`方法结束语音合成。 + + 该方法会阻塞当前线程,直到[回调接口(ResultCallback)](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_complete`或者`on_error`回调触发后才会释放线程阻塞。 + + 请务必确保调用该方法,否则可能会导致结尾部分的文本无法成功转换为语音。 + +~~~python +# coding=utf-8 +# +# pyaudio安装说明: +# 如果是macOS操作系统,执行如下命令: +# brew install portaudio +# pip install pyaudio +# 如果是Debian/Ubuntu操作系统,执行如下命令: +# sudo apt-get install python-pyaudio python3-pyaudio +# 或者 +# pip install pyaudio +# 如果是CentOS操作系统,执行如下命令: +# sudo yum install -y portaudio portaudio-devel && pip install pyaudio +# 如果是Microsoft Windows,执行如下命令: +# python -m pip install pyaudio + +import time +import pyaudio +import dashscope +from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse +from dashscope.audio.tts_v2 import * + +from datetime import datetime + +def get_timestamp(): + now = datetime.now() + formatted_timestamp = now.strftime("[%Y-%m-%d %H:%M:%S.%f]") + return formatted_timestamp + +# 若没有将API Key配置到环境变量中,需将your-api-key替换为自己的API Key +# dashscope.api_key = "your-api-key" + +# 模型 +model = "cosyvoice-v2" +# 音色 +voice = "longxiaochun_v2" + + +# 定义回调接口 +class Callback(ResultCallback): + _player = None + _stream = None + + def on_open(self): + print("连接建立:" + get_timestamp()) + self._player = pyaudio.PyAudio() + self._stream = self._player.open( + format=pyaudio.paInt16, channels=1, rate=22050, output=True + ) + + def on_complete(self): + print("语音合成完成,所有合成结果已被接收:" + get_timestamp()) + + def on_error(self, message: str): + print(f"语音合成出现异常:{message}") + + def on_close(self): + print("连接关闭:" + get_timestamp()) + # 停止播放器 + self._stream.stop_stream() + self._stream.close() + self._player.terminate() + + def on_event(self, message): + pass + + def on_data(self, data: bytes) -> None: + print(get_timestamp() + " 二进制音频长度为:" + str(len(data))) + self._stream.write(data) + + +callback = Callback() + +test_text = [ + "流式文本语音合成SDK,", + "可以将输入的文本", + "合成为语音二进制数据,", + "相比于非流式语音合成,", + "流式合成的优势在于实时性", + "更强。用户在输入文本的同时", + "可以听到接近同步的语音输出,", + "极大地提升了交互体验,", + "减少了用户等待时间。", + "适用于调用大规模", + "语言模型(LLM),以", + "流式输入文本的方式", + "进行语音合成的场景。", +] + +# 实例化SpeechSynthesizer,并在构造方法中传入模型(model)、音色(voice)等请求参数 +synthesizer = SpeechSynthesizer( + model=model, + voice=voice, + format=AudioFormat.PCM_22050HZ_MONO_16BIT, + callback=callback, +) + + +# 流式发送待合成文本。在回调接口的on_data方法中实时获取二进制音频 +for text in test_text: + synthesizer.streaming_call(text) + time.sleep(0.1) +# 结束流式语音合成 +synthesizer.streaming_complete() + +# 首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时 +print('[Metric] requestId为:{},首包延迟为:{}毫秒'.format( + synthesizer.get_last_request_id(), + synthesizer.get_first_package_delay())) +~~~ + +## **请求参数** + +请求参数通过[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)的构造方法进行设置。 + +| | | | | | +| ---- | ---- | ---- | ---- | ---- | +| | | | | | + +| **参数** | **类型** | **默认值** | **是否必须** | **说明** | +| ---------------------- | -------------- | ---------- | ------------ | ------------------------------------------------------------ | +| model | str | - | 是 | 指定模型。不同版本的模型编码方式一致,但使用时须确保模型(`model`)与音色(`voice`)匹配:每个版本的模型只能使用本版本的默认音色或专属音色。 | +| voice | str | - | 是 | 指定语音合成所使用的音色。支持默认音色和专属音色:**默认音色**:参见**音色列表**章节。**专属音色**:通过[声音复刻](https://help.aliyun.com/zh/model-studio/voice-replica-1/)功能定制。使用复刻音色时,请确保声音复刻与语音合成使用同一账号。详细操作步骤请参见[CosyVoice声音复刻API](https://help.aliyun.com/zh/model-studio/cosyvoice-clone-api#da30eeebc4uwk)。**⚠️ 使用声音复刻系列模型合成语音时,仅能使用该模型复刻生成的专属音色,不能使用默认音色。****⚠️ 使用专属音色合成语音时,语音合成模型(**`**model**`**)必须与声音复刻模型(**`**target_model**`**)相同。** | +| format | enum | 因音色而异 | 否 | 指定音频编码格式及采样率。若未指定`format`,则合成音频采样率为22.05kHz,格式为mp3。**说明**默认采样率代表当前音色的最佳采样率,缺省条件下默认按照该采样率输出,同时支持降采样或升采样。可指定的音频编码格式及采样率如下:所有模型均支持的音频编码格式及采样率:AudioFormat.WAV_8000HZ_MONO_16BIT,代表音频格式为wav,采样率为8kHzAudioFormat.WAV_16000HZ_MONO_16BIT,代表音频格式为wav,采样率为16kHzAudioFormat.WAV_22050HZ_MONO_16BIT,代表音频格式为wav,采样率为22.05kHzAudioFormat.WAV_24000HZ_MONO_16BIT,代表音频格式为wav,采样率为24kHzAudioFormat.WAV_44100HZ_MONO_16BIT,代表音频格式为wav,采样率为44.1kHzAudioFormat.WAV_48000HZ_MONO_16BIT,代表音频格式为wav,采样率为48kHzAudioFormat.MP3_8000HZ_MONO_128KBPS,代表音频格式为mp3,采样率为8kHzAudioFormat.MP3_16000HZ_MONO_128KBPS,代表音频格式为mp3,采样率为16kHzAudioFormat.MP3_22050HZ_MONO_256KBPS,代表音频格式为mp3,采样率为22.05kHzAudioFormat.MP3_24000HZ_MONO_256KBPS,代表音频格式为mp3,采样率为24kHzAudioFormat.MP3_44100HZ_MONO_256KBPS,代表音频格式为mp3,采样率为44.1kHzAudioFormat.MP3_48000HZ_MONO_256KBPS,代表音频格式为mp3,采样率为48kHzAudioFormat.PCM_8000HZ_MONO_16BIT,代表音频格式为pcm,采样率为8kHzAudioFormat.PCM_16000HZ_MONO_16BIT,代表音频格式为pcm,采样率为16kHzAudioFormat.PCM_22050HZ_MONO_16BIT,代表音频格式为pcm,采样率为22.05kHzAudioFormat.PCM_24000HZ_MONO_16BIT,代表音频格式为pcm,采样率为24kHzAudioFormat.PCM_44100HZ_MONO_16BIT,代表音频格式为pcm,采样率为44.1kHzAudioFormat.PCM_48000HZ_MONO_16BIT,代表音频格式为pcm,采样率为48kHz除`cosyvoice-v1`外,其他模型支持的音频编码格式及采样率:音频格式为opus时,支持通过`bit_rate`参数调整码率。仅对1.24.0及之后版本的DashScope适用。AudioFormat.OGG_OPUS_8KHZ_MONO_32KBPS,代表音频格式为opus,采样率为8kHz,码率为32kbpsAudioFormat.OGG_OPUS_16KHZ_MONO_16KBPS,代表音频格式为opus,采样率为16kHz,码率为16kbpsAudioFormat.OGG_OPUS_16KHZ_MONO_32KBPS,代表音频格式为opus,采样率为16kHz,码率为32kbpsAudioFormat.OGG_OPUS_16KHZ_MONO_64KBPS,代表音频格式为opus,采样率为16kHz,码率为64kbpsAudioFormat.OGG_OPUS_24KHZ_MONO_16KBPS,代表音频格式为opus,采样率为24kHz,码率为16kbpsAudioFormat.OGG_OPUS_24KHZ_MONO_32KBPS,代表音频格式为opus,采样率为24kHz,码率为32kbpsAudioFormat.OGG_OPUS_24KHZ_MONO_64KBPS,代表音频格式为opus,采样率为24kHz,码率为64kbpsAudioFormat.OGG_OPUS_48KHZ_MONO_16KBPS,代表音频格式为opus,采样率为48kHz,码率为16kbpsAudioFormat.OGG_OPUS_48KHZ_MONO_32KBPS,代表音频格式为opus,采样率为48kHz,码率为32kbpsAudioFormat.OGG_OPUS_48KHZ_MONO_64KBPS,代表音频格式为opus,采样率为48kHz,码率为64kbps | +| volume | int | 50 | 否 | 合成音频的音量,取值范围:0~100。**重要**该字段在不同版本的DashScope SDK中有所不同:1.20.10及以后版本的SDK:volume1.20.10以前版本的SDK:volumn | +| speech_rate | float | 1.0 | 否 | 合成音频的语速,取值范围:0.5~2。0.5:表示默认语速的0.5倍速。1:表示默认语速。默认语速是指模型默认输出的合成语速,语速会因音色不同而略有不同。约每秒钟4个字。2:表示默认语速的2倍速。 | +| pitch_rate | float | 1.0 | 否 | 合成音频的语调,取值范围:0.5~2。 | +| bit_rate | int | 32 | 否 | 指定音频的[码率](https://opus-codec.org/),取值范围:6~510kbps。码率越大,音质越好,音频文件体积越大。仅在音频格式(`format`)为opus时可用。`cosyvoice-v1`模型不支持该参数。**说明**`bit_rate`需要通过`additional_params`参数进行设置: `synthesizer = SpeechSynthesizer(model="cosyvoice-v2", voice="longxiaochun_v2", format=AudioFormat.OGG_OPUS_16KHZ_MONO_16KBPS, additional_params={"bit_rate": 32})` | +| word_timestamp_enabled | bool | False | 否 | 是否开启字级别时间戳,默认关闭。仅cosyvoice-v2支持该功能。时间戳结果仅能通过回调接口获取**说明**`word_timestamp_enabled`需要通过`additional_params`参数进行设置: `synthesizer = SpeechSynthesizer(model="cosyvoice-v2", voice="longxiaochun_v2", callback=callback, # 时间戳结果仅能通过回调接口获取 additional_params={'word_timestamp_enabled': True})`**点击查看完整示例代码** | +| seed | int | 0 | 否 | 生成时使用的随机数种子,使合成的效果产生变化。默认值0。取值范围:0~65535。cosyvoice-v1不支持该功能。 | +| language_hints | list[str] | - | 否 | 合成文本语言提示,可选值为 `zh`(中文)或 `en`(英文),列表中仅第一个语言生效。仅cosyvoice-v3、cosyvoice-v3-plus支持该功能。此设置会影响阿拉伯数字等内容的读法。例如,当合成“123”时,若设置为`zh`,则读作“一百二十三”;而`en`则会读作“one hundred and twenty-three”。如果不设置,系统会根据文本内容自动判断并应用相应的合成规则。 | +| instruction | String | - | 否 | 设置提示词。仅cosyvoice-v3、cosyvoice-v3-plus支持该功能。目前仅支持设置情感。格式:“`你说话的情感是<情感值>。`”(注意,结尾一定不要遗漏句号,使用时将“`<情感值>`”替换为具体的情感值,例如替换为`neutral`)。示例:“`你说话的情感是neutral。`”支持的情感值:`neutral`、`fearful`、`angry`、`sad`、`surprised`、`happy`、`disgusted`。 | +| callback | ResultCallback | - | 否 | [回调接口(ResultCallback)](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s). | + +## **关键接口** + +### `SpeechSynthesizer`类 + +`SpeechSynthesizer`通过“`from dashscope.audio.tts_v2 import *`”方式引入,提供语音合成的关键接口。 + +| | | | | +| ---- | ---- | ---- | ---- | +| | | | | + +| **方法** | **参数** | **返回值** | **描述** | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ---------------------------------------------------------- | ------------------------------------------------------------ | +| `def call(self, text: str, timeout_millis=None)` | `text`:待合成文本`timeout_millis`:阻塞线程的超时时间,单位为毫秒,不设置或值为0时不生效 | 没有指定`ResultCallback`时返回二进制音频数据,否则返回None | 将整段文本(无论是纯文本还是包含[SSML](https://help.aliyun.com/zh/model-studio/introduction-to-cosyvoice-ssml-markup-language)的文本)转换为语音。在创建`SpeechSynthesizer`实例时,存在以下两种情况:没有指定`ResultCallback`:`call`方法会阻塞当前线程直到语音合成完成并返回二进制音频数据。使用方法请参见[同步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#8341058094tc3)。指定了`ResultCallback`:`call`方法会立刻返回None,并通过[回调接口(ResultCallback)](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_data`方法返回语音合成的结果。使用方法请参见[异步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#cc2a504f344s2)。**重要**每次调用`call`方法前,需要重新初始化`SpeechSynthesizer`实例。 | +| `def streaming_call(self, text: str)` | `text`:待合成文本片段 | 无 | 流式发送待合成文本(不支持包含SSML的文本)。您可以多次调用该接口,将待合成文本分多次发送给服务端。合成结果通过[回调接口(ResultCallback)](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_data`方法获取。使用方法请参见[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84)。 | +| `def streaming_complete(self, complete_timeout_millis=600000)` | `complete_timeout_millis`:等待时间,单位为毫秒 | 无 | 结束流式语音合成。该方法阻塞当前线程N毫秒(具体时长由`complete_timeout_millis`决定),直到任务结束。如果`completeTimeoutMillis`设置为0,则无限期等待。默认情况下,如果等待时间超过10分钟,则停止等待。使用方法请参见[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84)。**重要**在[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84)时,请务必确保调用该方法,否则可能会出现合成语音缺失的问题。 | +| `def get_last_request_id(self)` | 无 | 上一个任务的request_id | 获取上一个任务的request_id。 | +| `def get_first_package_delay(self)` | 无 | 首包延迟 | 获取首包延迟(一般在500ms左右)。首包延迟是开始发送文本和接收第一个音频包之间的时间,单位为毫秒。在任务完成后使用。首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时。 | +| `def get_response(self)` | 无 | 最后一次报文 | 获取最后一次报文(为JSON格式的数据),可以用于获取task-failed报错。 | + +### **回调接口(**`ResultCallback`) + +[异步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#cc2a504f344s2)或[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84)时,服务端会通过回调的方式,将关键流程信息和数据返回给客户端。您需要实现回调方法,处理服务端返回的信息或者数据。 + +通过“`from dashscope.audio.tts_v2 import *`”方式引入。 + +点击查看示例 + +| | | | | +| ---- | ---- | ---- | ---- | +| | | | | + +| **方法** | **参数** | **返回值** | **描述** | +| ------------------------------------------- | ---------------------------------- | ---------- | ------------------------------------------------------------ | +| `def on_open(self) -> None` | 无 | 无 | 当和服务端建立连接完成后,该方法立刻被回调。 | +| `def on_event( self, message: str) -> None` | `message`:服务端返回的信息 | 无 | 当服务有回复时会被回调。`message`为JSON字符串,解析可获取Task ID(`task_id`参数)、本次请求中计费的有效字符数(`characters`参数)等信息。 | +| `def on_complete(self) -> None` | 无 | 无 | 当所有合成数据全部返回(语音合成完成)后被回调。 | +| `def on_error(self, message) -> None` | `message`:异常信息 | 无 | 发生异常时该方法被回调。 | +| `def on_data(self, data: bytes) -> None` | `data`:服务器返回的二进制音频数据 | 无 | 当服务器有合成音频返回时被回调。您可以将二进制音频数据合成为一个完整的音频文件后使用播放器播放,也可以通过支持流式播放的播放器实时播放。**重要**流式语音合成中,对于mp3/opus等压缩格式,音频分段传输需使用流式播放器,不可逐帧播放,避免解码失败。支持流式播放的播放器:ffmpeg、pyaudio (Python)、AudioFormat (Java)、MediaSource (Javascript)等。将音频数据合成完整的音频文件时,应以追加模式写入同一文件。流式语音合成的wav/mp3 格式音频仅首帧包含头信息,后续帧为纯音频数据。 | +| `def on_close(self) -> None` | 无 | 无 | 当服务已经关闭连接后被回调。 | + +## **响应结果** + +服务器返回二进制音频数据: + +- [同步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#8341058094tc3):对[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)的`call`方法返回的二进制音频数据进行处理。 +- [异步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#cc2a504f344s2)或[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84):对[回调接口(ResultCallback)](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_data`方法的参数(bytes类型数据)进行处理。 + + + +## **错误码** + +如遇报错问题,请参见[错误信息](https://help.aliyun.com/zh/model-studio/error-code)进行排查。 + +若问题仍未解决,请加入[开发者群](https://github.com/aliyun/alibabacloud-bailian-speech-demo)反馈遇到的问题,并提供Request ID,以便进一步排查问题。 + +## **音色列表** + +当前默认支持的音色如下表所示。若您需要更加个性化的音色,可通过声音复刻功能免费定制专属音色,详情请参见[使用复刻的音色进行语音合成](https://help.aliyun.com/zh/model-studio/cosyvoice-clone-api#b6d3449fb336v)。 + +进行语音合成时,`model`参数必须与所选音色对应,否则调用将失败。 + +待合成文本(text)应使用与所选音色语种一致的语言,否则可能出现发音错误或不自然 + +cosyvoice-v3 + +cosyvoice-v2 + +cosyvoice-v1 + +| **音色** | **音频试听(右键保存音频)** | **voice参数** | **适用场景** | **语言** | +| -------- | ---------------------------- | ------------- | ------------------------------------------------------------ | --------------- | +| 龙婉 | | longwan | 语音助手、导航播报、聊天数字人 | 中文普通话 | +| 龙橙 | | longcheng | 语音助手、导航播报、聊天数字人 | 中文普通话 | +| 龙华 | | longhua | 语音助手、导航播报、聊天数字人 | 中文普通话 | +| 龙小淳 | | longxiaochun | 语音助手、导航播报、聊天数字人 | 中文普通话+英文 | +| 龙小夏 | | longxiaoxia | 语音助手、聊天数字人 | 中文普通话 | +| 龙小诚 | | longxiaocheng | 语音助手、导航播报、聊天数字人 | 中文普通话+英文 | +| 龙小白 | | longxiaobai | 聊天数字人、有声书、语音助手 | 中文普通话 | +| 龙老铁 | | longlaotie | 新闻播报、有声书、语音助手、直播带货、导航播报 | 中文东北口音 | +| 龙书 | | longshu | 有声书、语音助手、导航播报、新闻播报、智能客服 | 中文普通话 | +| 龙硕 | | longshuo | 语音助手、导航播报、新闻播报、客服催收 | 中文普通话 | +| 龙婧 | | longjing | 语音助手、导航播报、新闻播报、客服催收 | 中文普通话 | +| 龙妙 | | longmiao | 客服催收、导航播报、有声书、语音助手 | 中文普通话 | +| 龙悦 | | longyue | 语音助手、诗词朗诵、有声书朗读、导航播报、新闻播报、客服催收 | 中文普通话 | +| 龙媛 | | longyuan | 有声书、语音助手、聊天数字人 | 中文普通话 | +| 龙飞 | | longfei | 会议播报、新闻播报、有声书 | 中文普通话 | +| 龙杰力豆 | | longjielidou | 新闻播报、有声书、聊天助手 | 中文普通话+英文 | +| 龙彤 | | longtong | 有声书、导航播报、聊天数字人 | 中文普通话 | +| 龙祥 | | longxiang | 新闻播报、有声书、导航播报 | 中文普通话 | +| Stella | | loongstella | 语音助手、直播带货、导航播报、客服催收、有声书 | 中文普通话+英文 | +| Bella | | loongbella | 语音助手、客服催收、新闻播报、导航播报 | 中文普通话 | \ No newline at end of file diff --git a/exercise.py b/exercise.py deleted file mode 100644 index e09a980..0000000 --- a/exercise.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple calculator program for two numbers. -Supports addition, subtraction, multiplication, and division. -""" - -def calculator(): - """Main calculator function.""" - print("Simple Calculator") - print("Operations: + (addition), - (subtraction), * (multiplication), / (division)") - - try: - # Get input from user - num1 = float(input("Enter first number: ")) - num2 = float(input("Enter second number: ")) - operation = input("Enter operation (+, -, *, /): ").strip() - - # Perform calculation - if operation == '+': - result = num1 + num2 - print(f"{num1} + {num2} = {result}") - elif operation == '-': - result = num1 - num2 - print(f"{num1} - {num2} = {result}") - elif operation == '*': - result = num1 * num2 - print(f"{num1} * {num2} = {result}") - elif operation == '/': - if num2 == 0: - print("Error: Division by zero!") - else: - result = num1 / num2 - print(f"{num1} / {num2} = {result}") - else: - print("Invalid operation! Please use +, -, *, or /.") - - except ValueError: - print("Invalid input! Please enter numeric values.") - except Exception as e: - print(f"An error occurred: {e}") - -if __name__ == "__main__": - calculator() \ No newline at end of file