Add CardioAI project with three modules

- Module 1: Dashboard for cardiovascular disease data visualization
- Module 2: Machine learning predictor with Flask API
- Module 3: Voice assistant with DeepSeek and CosyVoice integration
- Add .gitignore for proper file exclusion
- Update requirements and documentation

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
anthony-s526
2026-02-26 21:26:10 +08:00
parent 5246dce55c
commit ffc4192ff0
21 changed files with 3759 additions and 43 deletions

62
.gitignore vendored Normal file
View File

@@ -0,0 +1,62 @@
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# IDE and editor files
.idea/
.vscode/
*.swp
*.swo
*~
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
venv/
ENV/
env.bak/
venv.bak/
# Virtual environment
cardioenv/
# Logs
*.log
# Data files
*.csv
*.xlsx
*.pkl
*.json
*.db
*.sqlite
# Environment variables
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
# Temporary files
tmp/
temp/
# Jupyter Notebook
.ipynb_checkpoints
# Coverage
.coverage
htmlcov/
# PyTest
.pytest_cache/

10
.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,10 @@
# Default ignored files
/shelf/
/workspace.xml
# Ignored default folder with query files
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

10
aicodes/.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,10 @@
# Default ignored files
/shelf/
/workspace.xml
# Ignored default folder with query files
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

19
aicodes/.idea/aicodes.iml generated Normal file
View File

@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="$USER_HOME$/miniconda3/envs/cardioenv" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
<component name="TemplatesService">
<option name="TEMPLATE_FOLDERS">
<list>
<option value="$MODULE_DIR$/module2_predictor/templates" />
</list>
</option>
</component>
</module>

View File

@@ -0,0 +1,12 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyStubPackagesAdvertiser" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<list>
<option value="pandas" />
</list>
</option>
</inspection_tool>
</profile>
</component>

View File

@@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
aicodes/.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="$USER_HOME$/miniconda3/envs/cardioenv" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/miniconda3/envs/cardioenv" project-jdk-type="Python SDK" />
</project>

8
aicodes/.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/aicodes.iml" filepath="$PROJECT_DIR$/.idea/aicodes.iml" />
</modules>
</component>
</project>

6
aicodes/.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>

47
aicodes/llm_streaming.py Normal file
View File

@@ -0,0 +1,47 @@
import os
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
# 加载环境变量
load_dotenv()
def get_llm():
"""
初始化并返回 DeepSeek LLM 实例
"""
# 注意:这里的 ChatOpenAI 实际上是兼容 OpenAI 接口的类,用于连接 DeepSeek API
llm = ChatOpenAI(
base_url=os.getenv("base_url1"),
api_key=os.getenv("DEEPSEEK_API_KEY1"),
model="deepseek-chat",
temperature=0, # 设置为0确保结果稳定
)
return llm
# 1. 初始化 LLM
llm = get_llm()
# 2. 用户级 prompt
user_prompt = "你是谁?请详细介绍一下你自己。"
print(f"--- 用户提问:{user_prompt} ---")
print("--- LLM 流式回复开始 ---")
# 3. **核心修改:使用 .stream() 方法进行流式调用**
# .stream() 返回一个迭代器,我们可以遍历它来获取分块的输出。
response_stream = llm.stream(user_prompt)
# 4. 遍历并打印流式输出
full_response = ""
for chunk in response_stream:
# chunk.content 包含当前流片段的内容
# end="" 确保打印时不换行,模拟流式输出效果
print(chunk.content, end="", flush=True)
full_response += chunk.content
print("\n--- LLM 流式回复结束 ---")
# 5. 可选:打印完整的回复内容
# print(f"\n完整的回复内容{full_response}")

View File

@@ -0,0 +1,354 @@
#!/usr/bin/env python3
"""
CardioAI - Cardiovascular Disease Dashboard
Streamlit application for data cleaning, feature engineering, and interactive visualization.
"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')
# Set page configuration
st.set_page_config(
page_title="CardioAI - Cardiovascular Disease Dashboard",
page_icon="❤️",
layout="wide",
initial_sidebar_state="expanded"
)
# Constants
DATA_PATH = "/Users/anthony/ai_lesson/ai_zhangzhongshan/心血管疾病.xlsx"
@st.cache_data
def load_and_clean_data():
"""
Load and clean cardiovascular disease data.
Returns cleaned DataFrame.
"""
# Load data
try:
df = pd.read_excel(DATA_PATH, engine='openpyxl')
st.sidebar.success(f"数据加载成功!原始记录数: {len(df):,}")
except Exception as e:
st.error(f"数据加载失败: {e}")
return pd.DataFrame()
# Standardize column names (handle potential naming variations)
# Common column name variations in cardiovascular datasets
column_mapping = {
'age(天)': 'age',
'cholestero1': 'cholesterol',
'cholesterol': 'cholesterol',
'gluc': 'gluc',
'ap_hi': 'ap_hi',
'ap_lo': 'ap_lo',
'cardio': 'cardio',
'gender': 'gender',
'height': 'height',
'weight': 'weight',
'smoke': 'smoke',
'alco': 'alco',
'active': 'active'
}
# Rename columns to standardized names
df_columns = df.columns.tolist()
for col in df_columns:
for key, value in column_mapping.items():
if key.lower() in col.lower():
df.rename(columns={col: value}, inplace=True)
break
# Feature Engineering
# 1. Convert age (in days) to years (age_years)
if 'age' in df.columns:
df['age_years'] = np.round(df['age'] / 365.25).astype(int)
# 2. Calculate BMI
if 'height' in df.columns and 'weight' in df.columns:
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
df['bmi'] = df['bmi'].round(2)
# 3. Handle blood pressure outliers
# Remove records where diastolic (ap_lo) >= systolic (ap_hi)
if 'ap_hi' in df.columns and 'ap_lo' in df.columns:
df = df[df['ap_lo'] < df['ap_hi']]
# Remove extreme blood pressure values
systolic_mask = (df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)
diastolic_mask = (df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)
df = df[systolic_mask & diastolic_mask]
# 4. Convert cholesterol and glucose to descriptive strings
if 'cholesterol' in df.columns:
cholesterol_map = {
1: "Normal",
2: "Above Normal",
3: "Well Above Normal"
}
df['cholesterol_str'] = df['cholesterol'].map(cholesterol_map)
if 'gluc' in df.columns:
glucose_map = {
1: "Normal",
2: "Above Normal",
3: "Well Above Normal"
}
df['gluc_str'] = df['gluc'].map(glucose_map)
# 5. Create BMI categories
if 'bmi' in df.columns:
def categorize_bmi(bmi):
if bmi < 18.5:
return "Underweight"
elif 18.5 <= bmi < 25:
return "Normal"
elif 25 <= bmi < 30:
return "Overweight"
else:
return "Obese"
df['bmi_category'] = df['bmi'].apply(categorize_bmi)
return df
def create_filters(df):
"""Create interactive filters in sidebar."""
st.sidebar.header("📊 数据筛选器")
# Age range slider
if 'age_years' in df.columns:
min_age = int(df['age_years'].min())
max_age = int(df['age_years'].max())
age_range = st.sidebar.slider(
"选择年龄范围 (岁)",
min_value=min_age,
max_value=max_age,
value=(min_age, max_age)
)
else:
age_range = (0, 100)
# Gender filter (if exists)
gender_filter = None
if 'gender' in df.columns:
gender_options = df['gender'].unique()
gender_options_sorted = sorted(gender_options)
gender_selected = st.sidebar.multiselect(
"选择性别",
options=gender_options_sorted,
default=gender_options_sorted
)
gender_filter = gender_selected
# Cardiovascular disease filter
cardio_filter = None
if 'cardio' in df.columns:
cardio_options = df['cardio'].unique()
cardio_options_sorted = sorted(cardio_options)
cardio_selected = st.sidebar.multiselect(
"心血管疾病状态",
options=cardio_options_sorted,
default=cardio_options_sorted,
help="0: 无疾病, 1: 有疾病"
)
cardio_filter = cardio_selected
return age_range, gender_filter, cardio_filter
def apply_filters(df, age_range, gender_filter, cardio_filter):
"""Apply filters to DataFrame."""
filtered_df = df.copy()
# Apply age filter
if 'age_years' in filtered_df.columns:
filtered_df = filtered_df[
(filtered_df['age_years'] >= age_range[0]) &
(filtered_df['age_years'] <= age_range[1])
]
# Apply gender filter
if gender_filter is not None and 'gender' in filtered_df.columns:
filtered_df = filtered_df[filtered_df['gender'].isin(gender_filter)]
# Apply cardio filter
if cardio_filter is not None and 'cardio' in filtered_df.columns:
filtered_df = filtered_df[filtered_df['cardio'].isin(cardio_filter)]
return filtered_df
def display_summary_metrics(df):
"""Display summary metrics."""
col1, col2, col3, col4 = st.columns(4)
with col1:
total_records = len(df)
st.metric("📈 总记录数", f"{total_records:,}")
with col2:
if 'cardio' in df.columns:
risk_rate = df['cardio'].mean() * 100
st.metric("❤️ 心血管疾病风险率", f"{risk_rate:.2f}%")
else:
st.metric("❤️ 心血管疾病风险率", "N/A")
with col3:
if 'age_years' in df.columns:
avg_age = df['age_years'].mean()
st.metric("👥 平均年龄", f"{avg_age:.1f}")
else:
st.metric("👥 平均年龄", "N/A")
with col4:
if 'bmi' in df.columns:
avg_bmi = df['bmi'].mean()
st.metric("⚖️ 平均BMI", f"{avg_bmi:.1f}")
else:
st.metric("⚖️ 平均BMI", "N/A")
st.markdown("---")
def create_visualizations(df):
"""Create interactive visualizations."""
# Create two columns for charts
col1, col2 = st.columns(2)
with col1:
st.subheader("📊 年龄分布 (按心血管疾病状态)")
if 'age_years' in df.columns and 'cardio' in df.columns:
# Convert cardio to string for better legend
df['cardio_str'] = df['cardio'].apply(lambda x: '有疾病' if x == 1 else '无疾病')
fig1 = px.histogram(
df,
x='age_years',
color='cardio_str',
nbins=30,
barmode='overlay',
opacity=0.7,
color_discrete_sequence=['#FF6B6B', '#4ECDC4'],
labels={'age_years': '年龄 (岁)', 'cardio_str': '心血管疾病状态'}
)
fig1.update_layout(
legend_title="疾病状态",
xaxis_title="年龄 (岁)",
yaxis_title="人数",
bargap=0.1
)
st.plotly_chart(fig1, use_container_width=True)
else:
st.info("年龄或心血管疾病状态数据不可用")
with col2:
st.subheader("📈 BMI类别与心血管疾病关系")
if 'bmi_category' in df.columns and 'cardio' in df.columns:
# Create cross-tabulation
cross_tab = pd.crosstab(df['bmi_category'], df['cardio'], normalize='index') * 100
# Prepare data for stacked bar chart
categories = cross_tab.index.tolist()
no_disease = cross_tab[0].values if 0 in cross_tab.columns else [0] * len(categories)
has_disease = cross_tab[1].values if 1 in cross_tab.columns else [0] * len(categories)
fig2 = go.Figure(data=[
go.Bar(name='无疾病', x=categories, y=no_disease, marker_color='#4ECDC4'),
go.Bar(name='有疾病', x=categories, y=has_disease, marker_color='#FF6B6B')
])
fig2.update_layout(
barmode='stack',
xaxis_title="BMI 类别",
yaxis_title="百分比 (%)",
legend_title="疾病状态",
yaxis=dict(range=[0, 100])
)
st.plotly_chart(fig2, use_container_width=True)
else:
st.info("BMI类别或心血管疾病状态数据不可用")
# Additional visualizations
st.markdown("---")
st.subheader("🔍 数据详情")
# Show filtered data
with st.expander("查看筛选后的数据 (前100行)"):
st.dataframe(df.head(100), use_container_width=True)
# Data statistics
with st.expander("查看数据统计摘要"):
if not df.empty:
st.write("**数值型变量统计:**")
numeric_cols = df.select_dtypes(include=[np.number]).columns
st.dataframe(df[numeric_cols].describe(), use_container_width=True)
st.write("**分类变量统计:**")
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
for col in categorical_cols:
if col in df.columns:
st.write(f"**{col}**:")
value_counts = df[col].value_counts()
st.dataframe(value_counts, use_container_width=True)
def main():
"""Main application function."""
# Title and description
st.title("❤️ CardioAI - 心血管疾病智能辅助系统")
st.markdown("""
### 模块1: 数据可视化仪表板
本模块提供心血管疾病数据的交互式探索、清洗和可视化功能。
""")
# Load and clean data
with st.spinner("正在加载和清洗数据..."):
df = load_and_clean_data()
if df.empty:
st.error("无法加载数据。请检查数据文件路径和格式。")
return
# Display dataset information
with st.expander("📋 数据集信息", expanded=False):
st.write(f"**总记录数:** {len(df):,}")
st.write(f"**特征数:** {len(df.columns)}")
st.write("**列名:**", ", ".join(df.columns.tolist()))
# Show missing values
missing_values = df.isnull().sum()
if missing_values.any():
st.write("**缺失值:**")
st.dataframe(missing_values[missing_values > 0].rename('缺失数量'))
else:
st.write("**缺失值:** 无")
# Create filters in sidebar
age_range, gender_filter, cardio_filter = create_filters(df)
# Apply filters
filtered_df = apply_filters(df, age_range, gender_filter, cardio_filter)
# Display summary metrics
display_summary_metrics(filtered_df)
# Create visualizations
create_visualizations(filtered_df)
# Footer
st.markdown("---")
st.markdown(
"""
<div style='text-align: center; color: gray;'>
<p>CardioAI - 心血管疾病智能辅助系统 | 模块1: 数据可视化仪表板</p>
<p>使用 Streamlit 和 Plotly 构建</p>
</div>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,30 @@
#!/usr/bin/env python3
"""Test script to verify the dashboard module can be imported."""
import sys
import os
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
# Try to import the module
import cardio_dashboard
print("✅ cardio_dashboard module imported successfully!")
# Check if required functions exist
required_functions = ['load_and_clean_data', 'create_filters', 'apply_filters',
'display_summary_metrics', 'create_visualizations', 'main']
for func_name in required_functions:
if hasattr(cardio_dashboard, func_name):
print(f"✅ Function '{func_name}' found")
else:
print(f"❌ Function '{func_name}' not found")
except ImportError as e:
print(f"❌ Import error: {e}")
except Exception as e:
print(f"❌ Other error: {e}")
print("\n✅ Syntax check passed!")

View File

@@ -0,0 +1,421 @@
#!/usr/bin/env python3
"""
CardioAI - Cardiovascular Disease Prediction API
Flask application for real-time cardiovascular disease prediction.
"""
from flask import Flask, request, jsonify, render_template
import pandas as pd
import numpy as np
import joblib
import traceback
from datetime import datetime
import os
# Initialize Flask app
app = Flask(__name__)
# Constants
MODEL_PATH = os.path.join(os.path.dirname(__file__), 'cardio_predictor_model.pkl')
# Global variables for model and metadata
model = None
model_metadata = None
expected_features = None
# Expected feature names for input validation
EXPECTED_FEATURES = [
'age_years', # Age in years
'gender', # Gender (1: female, 2: male)
'height', # Height in cm
'weight', # Weight in kg
'ap_hi', # Systolic blood pressure
'ap_lo', # Diastolic blood pressure
'cholesterol', # Cholesterol level (1: normal, 2: above normal, 3: well above normal)
'gluc', # Glucose level (1: normal, 2: above normal, 3: well above normal)
'smoke', # Smoking (0: no, 1: yes)
'alco', # Alcohol intake (0: no, 1: yes)
'active' # Physical activity (0: no, 1: yes)
]
# Feature ranges for validation (min, max)
FEATURE_RANGES = {
'age_years': (20, 100),
'gender': (1, 2),
'height': (100, 250), # cm
'weight': (30, 200), # kg
'ap_hi': (90, 250), # systolic blood pressure
'ap_lo': (60, 150), # diastolic blood pressure
'cholesterol': (1, 3),
'gluc': (1, 3),
'smoke': (0, 1),
'alco': (0, 1),
'active': (0, 1),
'bmi': (10, 50) # BMI range
}
def load_model():
"""
Load the trained model pipeline from disk.
"""
global model, model_metadata, expected_features
try:
if os.path.exists(MODEL_PATH):
print(f"📦 Loading model from: {MODEL_PATH}")
model = joblib.load(MODEL_PATH)
print("✅ Model loaded successfully!")
# Try to extract expected features from model
try:
# Get feature names from the preprocessor
preprocessor = model.named_steps['preprocessor']
num_features = preprocessor.transformers_[0][2] # Numeric features
cat_features = preprocessor.transformers_[1][2] # Categorical features
expected_features = list(num_features) + list(cat_features)
print(f"📋 Model expects features: {expected_features}")
except Exception as e:
print(f"⚠️ Could not extract feature names from model: {e}")
expected_features = EXPECTED_FEATURES
# Load metadata if exists
metadata_path = MODEL_PATH.replace('.pkl', '_metadata.json')
if os.path.exists(metadata_path):
import json
with open(metadata_path, 'r') as f:
model_metadata = json.load(f)
print(f"📋 Model metadata loaded: {model_metadata.get('description', 'N/A')}")
else:
model_metadata = {'description': 'CardioAI Prediction Model'}
else:
print(f"❌ Model file not found at: {MODEL_PATH}")
print("⚠️ Please run train_and_save.py first to train and save the model.")
model = None
except Exception as e:
print(f"❌ Error loading model: {e}")
traceback.print_exc()
model = None
def validate_input_features(input_data):
"""
Validate input features for prediction.
Returns (is_valid, error_message, validated_data)
"""
try:
# Convert input to dictionary if it's not already
if isinstance(input_data, dict):
data = input_data
else:
data = dict(input_data)
# Check for missing features
missing_features = [feat for feat in EXPECTED_FEATURES if feat not in data]
if missing_features:
return False, f"Missing features: {missing_features}", None
# Check for extra features
extra_features = [feat for feat in data.keys() if feat not in EXPECTED_FEATURES]
if extra_features:
print(f"⚠️ Warning: Extra features provided: {extra_features}")
# Validate each feature
validated_data = {}
for feature in EXPECTED_FEATURES:
value = data[feature]
# Convert to appropriate type
try:
if feature in ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']:
# These should be integers
validated_data[feature] = int(float(value))
else:
# These should be floats
validated_data[feature] = float(value)
except (ValueError, TypeError):
return False, f"Invalid value for {feature}: {value} (should be numeric)", None
# Check value ranges
if feature in FEATURE_RANGES:
min_val, max_val = FEATURE_RANGES[feature]
if not (min_val <= validated_data[feature] <= max_val):
return False, f"{feature} value {validated_data[feature]} out of range ({min_val}-{max_val})", None
# Calculate BMI if height and weight are available
if 'height' in validated_data and 'weight' in validated_data:
height_m = validated_data['height'] / 100 # convert cm to meters
bmi_value = validated_data['weight'] / (height_m ** 2)
bmi_value = round(bmi_value, 2)
validated_data['bmi'] = bmi_value
# Validate BMI range
if 'bmi' in FEATURE_RANGES:
min_val, max_val = FEATURE_RANGES['bmi']
if not (min_val <= bmi_value <= max_val):
return False, f"Calculated BMI value {bmi_value} out of range ({min_val}-{max_val})", None
return True, "Validation passed", validated_data
except Exception as e:
return False, f"Validation error: {str(e)}", None
def preprocess_input(input_data):
"""
Convert validated input data to DataFrame for model prediction.
"""
try:
# Create DataFrame with a single row
df = pd.DataFrame([input_data])
# Determine which features the model expects
# Use expected_features from loaded model, or fallback to EXPECTED_FEATURES + bmi
model_features = expected_features if expected_features is not None else EXPECTED_FEATURES + ['bmi'] if 'bmi' not in EXPECTED_FEATURES else EXPECTED_FEATURES
# Ensure all expected features are present in the DataFrame
# Add missing features with default values (should not happen if validation passed)
for feature in model_features:
if feature not in df.columns:
print(f"⚠️ Warning: Feature '{feature}' not in input data, adding default value")
if feature == 'bmi' and 'height' in df.columns and 'weight' in df.columns:
# Calculate BMI if missing but height and weight are available
height_m = df['height'].iloc[0] / 100
df['bmi'] = df['weight'].iloc[0] / (height_m ** 2)
else:
# Add default value (0 for numeric, 1 for categorical)
if feature in ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']:
df[feature] = 1
else:
df[feature] = 0.0
# Ensure correct column order as expected by the model
df = df[model_features]
return df
except Exception as e:
print(f"❌ Error preprocessing input: {e}")
raise
@app.route('/')
def home():
"""
Home page - serve the prediction interface.
"""
return render_template('index.html')
@app.route('/api/model_info', methods=['GET'])
def model_info():
"""
API endpoint to get model information.
"""
try:
if model is None:
load_model()
if model is None:
return jsonify({
'status': 'error',
'message': 'Model not loaded. Please run training script first.'
}), 503
info = {
'status': 'success',
'model': {
'description': model_metadata.get('description', 'CardioAI Prediction Model'),
'version': model_metadata.get('model_version', '1.0'),
'creation_date': model_metadata.get('creation_date', 'N/A'),
'features': expected_features if expected_features else EXPECTED_FEATURES,
'feature_ranges': FEATURE_RANGES
}
}
return jsonify(info)
except Exception as e:
return jsonify({
'status': 'error',
'message': f'Error getting model info: {str(e)}'
}), 500
@app.route('/predict_cardio', methods=['POST'])
def predict_cardio():
"""
API endpoint for cardiovascular disease prediction.
Accepts JSON POST request with 11 feature values.
Returns prediction result and probability.
"""
global model
try:
# Load model if not already loaded
if model is None:
load_model()
if model is None:
return jsonify({
'status': 'error',
'message': 'Model not available. Please run training script first.'
}), 503
# Get input data
if request.is_json:
input_data = request.get_json()
else:
# Try form data
input_data = request.form.to_dict()
if not input_data:
return jsonify({
'status': 'error',
'message': 'No input data provided. Please provide 11 feature values.'
}), 400
# Validate input
is_valid, error_msg, validated_data = validate_input_features(input_data)
if not is_valid:
return jsonify({
'status': 'error',
'message': error_msg,
'expected_features': EXPECTED_FEATURES,
'feature_ranges': FEATURE_RANGES
}), 400
# Preprocess input
input_df = preprocess_input(validated_data)
# Make prediction
try:
# Get probability prediction
probability = model.predict_proba(input_df)[0]
# Get class prediction (0: no disease, 1: disease)
prediction = model.predict(input_df)[0]
# Calculate risk level based on probability
risk_probability = float(probability[1]) # Probability of class 1 (disease)
risk_percentage = risk_probability * 100
if risk_probability < 0.3:
risk_level = "Low"
recommendation = "Continue healthy lifestyle."
elif risk_probability < 0.7:
risk_level = "Medium"
recommendation = "Consider regular health check-ups."
else:
risk_level = "High"
recommendation = "Consult a healthcare professional."
# Prepare response
response = {
'status': 'success',
'prediction': int(prediction),
'probability': risk_probability,
'probability_percentage': risk_percentage,
'risk_level': risk_level,
'recommendation': recommendation,
'input_features': validated_data,
'timestamp': datetime.now().isoformat()
}
print(f"✅ Prediction made: {prediction}, Probability: {risk_probability:.4f}")
return jsonify(response)
except Exception as e:
print(f"❌ Prediction error: {e}")
traceback.print_exc()
return jsonify({
'status': 'error',
'message': f'Prediction error: {str(e)}'
}), 500
except Exception as e:
print(f"❌ API error: {e}")
traceback.print_exc()
return jsonify({
'status': 'error',
'message': f'Internal server error: {str(e)}'
}), 500
@app.route('/health', methods=['GET'])
def health_check():
"""
Health check endpoint.
"""
try:
model_status = 'loaded' if model is not None else 'not_loaded'
health_info = {
'status': 'healthy',
'service': 'CardioAI Prediction API',
'model_status': model_status,
'timestamp': datetime.now().isoformat()
}
return jsonify(health_info)
except Exception as e:
return jsonify({
'status': 'unhealthy',
'error': str(e)
}), 500
@app.errorhandler(404)
def not_found(error):
return jsonify({
'status': 'error',
'message': 'Endpoint not found'
}), 404
@app.errorhandler(405)
def method_not_allowed(error):
return jsonify({
'status': 'error',
'message': 'Method not allowed'
}), 405
@app.errorhandler(500)
def internal_error(error):
return jsonify({
'status': 'error',
'message': 'Internal server error'
}), 500
def init_app():
"""
Initialize the Flask application.
"""
print("=" * 60)
print("🚀 CardioAI - Cardiovascular Disease Prediction API")
print("=" * 60)
# Load model at startup
load_model()
print("\n📡 API Endpoints:")
print(" GET / - Prediction web interface")
print(" GET /api/model_info - Get model information")
print(" POST /predict_cardio - Make prediction")
print(" GET /health - Health check")
print(f"\n📊 Model status: {'Loaded' if model is not None else 'Not loaded'}")
if __name__ == '__main__':
# Initialize the app
init_app()
# Run the Flask app
print(f"\n🌍 Starting server on http://127.0.0.1:8080")
print(" Press Ctrl+C to stop\n")
app.run(
host='0.0.0.0',
port=8080,
debug=True,
threaded=True
)
else:
# For WSGI deployment
init_app()

View File

@@ -0,0 +1,889 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>CardioAI - Cardiovascular Disease Risk Assessment</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&family=Roboto:wght@300;400;500&display=swap" rel="stylesheet">
<style>
:root {
--primary-color: #2c3e50;
--secondary-color: #3498db;
--success-color: #27ae60;
--warning-color: #f39c12;
--danger-color: #e74c3c;
--light-color: #ecf0f1;
--dark-color: #2c3e50;
--border-radius: 10px;
--box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
--transition: all 0.3s ease;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Roboto', sans-serif;
line-height: 1.6;
color: #333;
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
.header {
text-align: center;
margin-bottom: 40px;
padding: 30px;
background: white;
border-radius: var(--border-radius);
box-shadow: var(--box-shadow);
}
.logo {
display: flex;
align-items: center;
justify-content: center;
margin-bottom: 20px;
}
.logo i {
font-size: 48px;
color: var(--danger-color);
margin-right: 15px;
}
.logo h1 {
font-family: 'Poppins', sans-serif;
font-size: 36px;
color: var(--primary-color);
font-weight: 700;
}
.subtitle {
font-size: 18px;
color: #666;
margin-top: 10px;
}
.content {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 30px;
margin-bottom: 40px;
}
@media (max-width: 992px) {
.content {
grid-template-columns: 1fr;
}
}
.card {
background: white;
border-radius: var(--border-radius);
box-shadow: var(--box-shadow);
padding: 30px;
transition: var(--transition);
}
.card:hover {
box-shadow: 0 10px 25px rgba(0, 0, 0, 0.15);
}
.card h2 {
font-family: 'Poppins', sans-serif;
font-size: 24px;
color: var(--primary-color);
margin-bottom: 20px;
padding-bottom: 15px;
border-bottom: 2px solid var(--light-color);
}
.card h2 i {
margin-right: 10px;
color: var(--secondary-color);
}
.form-group {
margin-bottom: 20px;
}
.form-group label {
display: block;
margin-bottom: 8px;
font-weight: 500;
color: var(--dark-color);
}
.form-group input, .form-group select {
width: 100%;
padding: 12px 15px;
border: 2px solid #ddd;
border-radius: 8px;
font-size: 16px;
transition: var(--transition);
background-color: #f8f9fa;
}
.form-group input:focus, .form-group select:focus {
border-color: var(--secondary-color);
outline: none;
background-color: white;
box-shadow: 0 0 0 3px rgba(52, 152, 219, 0.2);
}
.input-with-unit {
position: relative;
}
.input-with-unit input {
padding-right: 50px;
}
.unit {
position: absolute;
right: 15px;
top: 50%;
transform: translateY(-50%);
color: #777;
font-weight: 500;
}
.form-row {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
}
.form-help {
font-size: 14px;
color: #666;
margin-top: 5px;
}
.btn {
display: inline-block;
padding: 14px 30px;
background: var(--secondary-color);
color: white;
border: none;
border-radius: 8px;
font-size: 18px;
font-weight: 600;
cursor: pointer;
transition: var(--transition);
width: 100%;
margin-top: 10px;
}
.btn:hover {
background: #2980b9;
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(41, 128, 185, 0.3);
}
.btn:active {
transform: translateY(0);
}
.btn i {
margin-right: 10px;
}
.result-container {
display: none;
}
.result-card {
text-align: center;
padding: 30px;
}
.risk-level {
font-size: 32px;
font-weight: 700;
margin-bottom: 20px;
padding: 15px;
border-radius: var(--border-radius);
}
.risk-low {
background-color: rgba(39, 174, 96, 0.1);
color: var(--success-color);
border: 2px solid var(--success-color);
}
.risk-medium {
background-color: rgba(243, 156, 18, 0.1);
color: var(--warning-color);
border: 2px solid var(--warning-color);
}
.risk-high {
background-color: rgba(231, 76, 60, 0.1);
color: var(--danger-color);
border: 2px solid var(--danger-color);
}
.probability-circle {
width: 200px;
height: 200px;
border-radius: 50%;
margin: 0 auto 30px;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
position: relative;
background: conic-gradient(
var(--secondary-color) 0% var(--percentage),
#f0f0f0 var(--percentage) 100%
);
}
.probability-circle::before {
content: '';
position: absolute;
width: 160px;
height: 160px;
background: white;
border-radius: 50%;
}
.probability-value {
font-size: 48px;
font-weight: 700;
position: relative;
z-index: 1;
color: var(--primary-color);
}
.probability-label {
font-size: 18px;
position: relative;
z-index: 1;
color: #666;
}
.prediction-result {
font-size: 24px;
font-weight: 600;
margin: 20px 0;
padding: 15px;
border-radius: var(--border-radius);
background-color: #f8f9fa;
}
.recommendation {
background-color: #f0f8ff;
padding: 20px;
border-radius: var(--border-radius);
margin-top: 20px;
border-left: 4px solid var(--secondary-color);
}
.recommendation h3 {
color: var(--primary-color);
margin-bottom: 10px;
}
.details {
margin-top: 30px;
text-align: left;
}
.details h3 {
color: var(--primary-color);
margin-bottom: 15px;
}
.details-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 15px;
}
.detail-item {
background-color: #f8f9fa;
padding: 15px;
border-radius: 8px;
}
.detail-label {
font-weight: 600;
color: #666;
font-size: 14px;
}
.detail-value {
font-weight: 700;
color: var(--primary-color);
font-size: 18px;
margin-top: 5px;
}
.loading {
display: none;
text-align: center;
padding: 30px;
}
.loading-spinner {
width: 50px;
height: 50px;
border: 5px solid #f3f3f3;
border-top: 5px solid var(--secondary-color);
border-radius: 50%;
animation: spin 1s linear infinite;
margin: 0 auto 20px;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.error {
display: none;
background-color: rgba(231, 76, 60, 0.1);
border: 1px solid var(--danger-color);
color: var(--danger-color);
padding: 15px;
border-radius: 8px;
margin-top: 20px;
}
.footer {
text-align: center;
padding: 20px;
color: #666;
font-size: 14px;
margin-top: 40px;
border-top: 1px solid #eee;
}
.info-box {
background-color: #e8f4fc;
border-left: 4px solid var(--secondary-color);
padding: 15px;
margin-bottom: 25px;
border-radius: 0 8px 8px 0;
}
.info-box p {
margin: 0;
}
.model-status {
display: flex;
align-items: center;
justify-content: center;
margin-bottom: 20px;
padding: 10px;
border-radius: 8px;
background-color: #f8f9fa;
}
.status-indicator {
width: 12px;
height: 12px;
border-radius: 50%;
margin-right: 10px;
}
.status-connected {
background-color: var(--success-color);
animation: pulse 2s infinite;
}
@keyframes pulse {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
</style>
</head>
<body>
<div class="container">
<header class="header">
<div class="logo">
<i class="fas fa-heartbeat"></i>
<h1>CardioAI</h1>
</div>
<p class="subtitle">Cardiovascular Disease Risk Assessment System</p>
<p>Enter your health information below to assess your risk of cardiovascular disease</p>
</header>
<div class="model-status" id="modelStatus">
<div class="status-indicator status-connected"></div>
<span>Connected to CardioAI Prediction Engine</span>
</div>
<div class="content">
<div class="card">
<h2><i class="fas fa-user-circle"></i> Patient Information</h2>
<div class="info-box">
<p><i class="fas fa-info-circle"></i> Please provide accurate health information for the most reliable risk assessment.</p>
</div>
<form id="predictionForm">
<div class="form-row">
<div class="form-group">
<label for="age_years"><i class="fas fa-birthday-cake"></i> Age (years)</label>
<div class="input-with-unit">
<input type="number" id="age_years" name="age_years" min="20" max="100" step="1" value="45" required>
<span class="unit">years</span>
</div>
<div class="form-help">Enter age between 20-100 years</div>
</div>
<div class="form-group">
<label for="gender"><i class="fas fa-venus-mars"></i> Gender</label>
<select id="gender" name="gender" required>
<option value="1">Female</option>
<option value="2" selected>Male</option>
</select>
<div class="form-help">1: Female, 2: Male</div>
</div>
</div>
<div class="form-row">
<div class="form-group">
<label for="height"><i class="fas fa-arrows-alt-v"></i> Height</label>
<div class="input-with-unit">
<input type="number" id="height" name="height" min="100" max="250" step="0.1" value="170" required>
<span class="unit">cm</span>
</div>
<div class="form-help">Height in centimeters (100-250 cm)</div>
</div>
<div class="form-group">
<label for="weight"><i class="fas fa-weight"></i> Weight</label>
<div class="input-with-unit">
<input type="number" id="weight" name="weight" min="30" max="200" step="0.1" value="70" required>
<span class="unit">kg</span>
</div>
<div class="form-help">Weight in kilograms (30-200 kg)</div>
</div>
</div>
<div class="form-row">
<div class="form-group">
<label for="ap_hi"><i class="fas fa-heart"></i> Systolic BP</label>
<div class="input-with-unit">
<input type="number" id="ap_hi" name="ap_hi" min="90" max="250" step="1" value="120" required>
<span class="unit">mmHg</span>
</div>
<div class="form-help">Systolic blood pressure (90-250 mmHg)</div>
</div>
<div class="form-group">
<label for="ap_lo"><i class="fas fa-heartbeat"></i> Diastolic BP</label>
<div class="input-with-unit">
<input type="number" id="ap_lo" name="ap_lo" min="60" max="150" step="1" value="80" required>
<span class="unit">mmHg</span>
</div>
<div class="form-help">Diastolic blood pressure (60-150 mmHg)</div>
</div>
</div>
<div class="form-row">
<div class="form-group">
<label for="cholesterol"><i class="fas fa-vial"></i> Cholesterol</label>
<select id="cholesterol" name="cholesterol" required>
<option value="1">Normal</option>
<option value="2" selected>Above Normal</option>
<option value="3">Well Above Normal</option>
</select>
<div class="form-help">1: Normal, 2: Above Normal, 3: Well Above Normal</div>
</div>
<div class="form-group">
<label for="gluc"><i class="fas fa-tint"></i> Glucose</label>
<select id="gluc" name="gluc" required>
<option value="1">Normal</option>
<option value="2" selected>Above Normal</option>
<option value="3">Well Above Normal</option>
</select>
<div class="form-help">1: Normal, 2: Above Normal, 3: Well Above Normal</div>
</div>
</div>
<div class="form-row">
<div class="form-group">
<label for="smoke"><i class="fas fa-smoking"></i> Smoking</label>
<select id="smoke" name="smoke" required>
<option value="0" selected>No</option>
<option value="1">Yes</option>
</select>
<div class="form-help">0: No, 1: Yes</div>
</div>
<div class="form-group">
<label for="alco"><i class="fas fa-wine-glass-alt"></i> Alcohol</label>
<select id="alco" name="alco" required>
<option value="0" selected>No</option>
<option value="1">Yes</option>
</select>
<div class="form-help">0: No, 1: Yes</div>
</div>
</div>
<div class="form-group">
<label for="active"><i class="fas fa-running"></i> Physical Activity</label>
<select id="active" name="active" required>
<option value="0">No</option>
<option value="1" selected>Yes</option>
</select>
<div class="form-help">0: No, 1: Yes</div>
</div>
<button type="submit" class="btn" id="predictBtn">
<i class="fas fa-brain"></i> Assess Cardiovascular Risk
</button>
</form>
<div class="loading" id="loading">
<div class="loading-spinner"></div>
<p>Analyzing health data and calculating risk...</p>
</div>
<div class="error" id="errorMessage"></div>
</div>
<div class="card result-container" id="resultContainer">
<h2><i class="fas fa-chart-line"></i> Risk Assessment Results</h2>
<div class="result-card">
<div class="prediction-result" id="predictionResult">
<!-- Prediction result will be shown here -->
</div>
<div class="risk-level" id="riskLevel">
<!-- Risk level will be shown here -->
</div>
<div class="probability-circle" id="probabilityCircle">
<div class="probability-value" id="probabilityValue">0%</div>
<div class="probability-label">Risk Probability</div>
</div>
<div class="recommendation" id="recommendation">
<!-- Recommendation will be shown here -->
</div>
<div class="details">
<h3><i class="fas fa-info-circle"></i> Input Summary</h3>
<div class="details-grid" id="inputSummary">
<!-- Input summary will be shown here -->
</div>
</div>
<button class="btn" id="newAssessmentBtn" style="margin-top: 30px; background-color: var(--primary-color);">
<i class="fas fa-redo"></i> New Assessment
</button>
</div>
</div>
</div>
<div class="footer">
<p><i class="fas fa-shield-alt"></i> CardioAI - Cardiovascular Disease Intelligent Assistant System</p>
<p>This tool provides a risk assessment based on machine learning models and should not replace professional medical advice.</p>
<p>© 2026 CardioAI. All rights reserved.</p>
</div>
</div>
<script>
// DOM Elements
const predictionForm = document.getElementById('predictionForm');
const resultContainer = document.getElementById('resultContainer');
const loadingElement = document.getElementById('loading');
const errorElement = document.getElementById('errorMessage');
const probabilityCircle = document.getElementById('probabilityCircle');
const probabilityValue = document.getElementById('probabilityValue');
const riskLevel = document.getElementById('riskLevel');
const predictionResult = document.getElementById('predictionResult');
const recommendation = document.getElementById('recommendation');
const inputSummary = document.getElementById('inputSummary');
const newAssessmentBtn = document.getElementById('newAssessmentBtn');
const predictBtn = document.getElementById('predictBtn');
// Model status check
async function checkModelStatus() {
try {
const response = await fetch('/api/model_info');
if (response.ok) {
console.log('✅ Model is ready');
return true;
} else {
console.warn('⚠️ Model may not be loaded');
return false;
}
} catch (error) {
console.error('❌ Error checking model status:', error);
return false;
}
}
// Format feature values for display
function formatFeatureValue(feature, value) {
const featureLabels = {
'age_years': 'Age',
'gender': 'Gender',
'height': 'Height',
'weight': 'Weight',
'ap_hi': 'Systolic BP',
'ap_lo': 'Diastolic BP',
'cholesterol': 'Cholesterol',
'gluc': 'Glucose',
'smoke': 'Smoking',
'alco': 'Alcohol',
'active': 'Physical Activity'
};
const valueLabels = {
'gender': { '1': 'Female', '2': 'Male' },
'cholesterol': { '1': 'Normal', '2': 'Above Normal', '3': 'Well Above Normal' },
'gluc': { '1': 'Normal', '2': 'Above Normal', '3': 'Well Above Normal' },
'smoke': { '0': 'No', '1': 'Yes' },
'alco': { '0': 'No', '1': 'Yes' },
'active': { '0': 'No', '1': 'Yes' }
};
const label = featureLabels[feature] || feature;
let displayValue = value;
if (feature in valueLabels) {
displayValue = valueLabels[feature][value] || value;
} else if (feature === 'height') {
displayValue = `${value} cm`;
} else if (feature === 'weight') {
displayValue = `${value} kg`;
} else if (feature === 'ap_hi' || feature === 'ap_lo') {
displayValue = `${value} mmHg`;
} else if (feature === 'age_years') {
displayValue = `${value} years`;
}
return { label, value: displayValue };
}
// Show loading state
function showLoading() {
loadingElement.style.display = 'block';
predictBtn.disabled = true;
predictBtn.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
errorElement.style.display = 'none';
}
// Hide loading state
function hideLoading() {
loadingElement.style.display = 'none';
predictBtn.disabled = false;
predictBtn.innerHTML = '<i class="fas fa-brain"></i> Assess Cardiovascular Risk';
}
// Show error message
function showError(message) {
errorElement.textContent = message;
errorElement.style.display = 'block';
hideLoading();
}
// Display results
function displayResults(data) {
// Update probability circle
const percentage = (data.probability * 100).toFixed(1);
probabilityCircle.style.setProperty('--percentage', `${percentage}%`);
probabilityValue.textContent = `${percentage}%`;
// Update risk level
let riskClass = '';
let riskText = '';
if (data.risk_level === 'Low') {
riskClass = 'risk-low';
riskText = 'Low Risk';
} else if (data.risk_level === 'Medium') {
riskClass = 'risk-medium';
riskText = 'Medium Risk';
} else {
riskClass = 'risk-high';
riskText = 'High Risk';
}
riskLevel.className = `risk-level ${riskClass}`;
riskLevel.innerHTML = `<i class="fas fa-exclamation-circle"></i> ${riskText}`;
// Update prediction result
if (data.prediction === 1) {
predictionResult.innerHTML = `
<i class="fas fa-exclamation-triangle"></i>
<span style="color: ${data.risk_level === 'High' ? '#e74c3c' : '#f39c12'}">
Cardiovascular Disease Risk Detected
</span>
`;
} else {
predictionResult.innerHTML = `
<i class="fas fa-check-circle"></i>
<span style="color: #27ae60">
No Significant Cardiovascular Disease Risk Detected
</span>
`;
}
// Update recommendation
recommendation.innerHTML = `
<h3><i class="fas fa-stethoscope"></i> Recommendation</h3>
<p>${data.recommendation}</p>
<p style="margin-top: 10px; font-size: 14px; color: #666;">
<i class="fas fa-lightbulb"></i> Probability of cardiovascular disease: ${percentage}%
</p>
`;
// Update input summary
let summaryHTML = '';
for (const [feature, value] of Object.entries(data.input_features)) {
const formatted = formatFeatureValue(feature, value);
summaryHTML += `
<div class="detail-item">
<div class="detail-label">${formatted.label}</div>
<div class="detail-value">${formatted.value}</div>
</div>
`;
}
inputSummary.innerHTML = summaryHTML;
// Show result container
resultContainer.style.display = 'block';
// Scroll to results
resultContainer.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
}
// Form submission handler
predictionForm.addEventListener('submit', async function(event) {
event.preventDefault();
// Validate form
if (!predictionForm.checkValidity()) {
predictionForm.reportValidity();
return;
}
// Collect form data
const formData = new FormData(predictionForm);
const data = Object.fromEntries(formData.entries());
// Convert numeric values
const numericFields = ['age_years', 'height', 'weight', 'ap_hi', 'ap_lo',
'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'gender'];
numericFields.forEach(field => {
if (data[field] !== undefined) {
data[field] = parseFloat(data[field]);
}
});
showLoading();
try {
// Send prediction request
const response = await fetch('/predict_cardio', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(data)
});
const result = await response.json();
if (response.ok && result.status === 'success') {
displayResults(result);
} else {
showError(result.message || 'Error making prediction');
}
} catch (error) {
showError(`Network error: ${error.message}. Please check if the server is running.`);
} finally {
hideLoading();
}
});
// New assessment button
newAssessmentBtn.addEventListener('click', function() {
resultContainer.style.display = 'none';
predictionForm.reset();
// Scroll back to form
predictionForm.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
});
// Initialize with example data and check model status
document.addEventListener('DOMContentLoaded', async function() {
console.log('🚀 CardioAI Web Interface Initialized');
// Check model status
const modelReady = await checkModelStatus();
if (!modelReady) {
console.warn('Model may not be ready. Please ensure the training script has been run.');
}
// Set up example data button (optional)
const exampleBtn = document.createElement('button');
exampleBtn.type = 'button';
exampleBtn.className = 'btn';
exampleBtn.style.backgroundColor = 'var(--warning-color)';
exampleBtn.style.marginTop = '10px';
exampleBtn.innerHTML = '<i class="fas fa-vial"></i> Load Example Data';
exampleBtn.onclick = function() {
document.getElementById('age_years').value = 55;
document.getElementById('gender').value = 2;
document.getElementById('height').value = 175.5;
document.getElementById('weight').value = 85.2;
document.getElementById('ap_hi').value = 140;
document.getElementById('ap_lo').value = 90;
document.getElementById('cholesterol').value = 3;
document.getElementById('gluc').value = 2;
document.getElementById('smoke').value = 1;
document.getElementById('alco').value = 1;
document.getElementById('active').value = 0;
};
predictionForm.appendChild(exampleBtn);
});
// Input validation helper
function setupInputValidation() {
const inputs = document.querySelectorAll('input[type="number"]');
inputs.forEach(input => {
input.addEventListener('input', function() {
const min = parseFloat(this.min);
const max = parseFloat(this.max);
const value = parseFloat(this.value);
if (!isNaN(min) && !isNaN(max) && !isNaN(value)) {
if (value < min) this.value = min;
if (value > max) this.value = max;
}
});
});
}
// Initialize input validation
setupInputValidation();
</script>
</body>
</html>

View File

@@ -0,0 +1,348 @@
#!/usr/bin/env python3
"""
CardioAI - Model Training and Saving Script
Train XGBoost classifier for cardiovascular disease prediction and save the model.
"""
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import xgboost as xgb
from xgboost import XGBClassifier
import joblib
# Constants
DATA_PATH = "/Users/anthony/ai_lesson/ai_zhangzhongshan/心血管疾病.xlsx"
MODEL_PATH = "/Users/anthony/PycharmProjects/ sad_test01/aicodes/module2_predictor/cardio_predictor_model.pkl"
def load_and_preprocess_data():
"""
Load and preprocess cardiovascular disease data.
Returns cleaned DataFrame with features and target.
"""
print("📊 Step 1: Loading data...")
# Load data
df = pd.read_excel(DATA_PATH, engine='openpyxl')
print(f" Original data shape: {df.shape}")
# Standardize column names (similar to module1)
column_mapping = {
'age(天)': 'age',
'cholestero1': 'cholesterol',
'cholesterol': 'cholesterol',
'gluc': 'gluc',
'ap_hi': 'ap_hi',
'ap_lo': 'ap_lo',
'cardio': 'cardio',
'gender': 'gender',
'height': 'height',
'weight': 'weight',
'smoke': 'smoke',
'alco': 'alco',
'active': 'active',
'id': 'id'
}
# Rename columns to standardized names
df_columns = df.columns.tolist()
for col in df_columns:
for key, value in column_mapping.items():
if key.lower() in col.lower():
df.rename(columns={col: value}, inplace=True)
break
print("📊 Step 2: Performing feature engineering...")
# 1. Convert age (in days) to years (age_years)
if 'age' in df.columns:
df['age_years'] = np.round(df['age'] / 365.25).astype(int)
# 2. Calculate BMI
if 'height' in df.columns and 'weight' in df.columns:
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
df['bmi'] = df['bmi'].round(2)
# 3. Handle blood pressure outliers
# Remove records where diastolic (ap_lo) >= systolic (ap_hi)
if 'ap_hi' in df.columns and 'ap_lo' in df.columns:
initial_count = len(df)
df = df[df['ap_lo'] < df['ap_hi']]
# Remove extreme blood pressure values
systolic_mask = (df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)
diastolic_mask = (df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)
df = df[systolic_mask & diastolic_mask]
print(f" Removed {initial_count - len(df)} blood pressure outliers")
# 4. Convert cholesterol and glucose to categorical (keep as numeric for model)
# XGBoost can handle numeric categorical features directly
# 5. Create BMI categories for potential use (but we'll use continuous BMI for model)
if 'bmi' in df.columns:
def categorize_bmi(bmi):
if bmi < 18.5:
return 0 # Underweight
elif 18.5 <= bmi < 25:
return 1 # Normal
elif 25 <= bmi < 30:
return 2 # Overweight
else:
return 3 # Obese
df['bmi_category'] = df['bmi'].apply(categorize_bmi)
# Check required columns
required_cols = ['age_years', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
print(f"⚠️ Warning: Missing columns: {missing_cols}")
return df
def prepare_features_target(df):
"""
Prepare features (X) and target (y) for model training.
"""
print("📊 Step 3: Preparing features and target...")
# Define features based on typical cardiovascular dataset
# These are the 11 features mentioned in the task requirements
feature_cols = [
'age_years', # Age in years (converted from days)
'gender', # Gender (1: female, 2: male)
'height', # Height in cm
'weight', # Weight in kg
'ap_hi', # Systolic blood pressure
'ap_lo', # Diastolic blood pressure
'cholesterol', # Cholesterol level (1: normal, 2: above normal, 3: well above normal)
'gluc', # Glucose level (1: normal, 2: above normal, 3: well above normal)
'smoke', # Smoking (0: no, 1: yes)
'alco', # Alcohol intake (0: no, 1: yes)
'active' # Physical activity (0: no, 1: yes)
]
# Check which features are available
available_features = [col for col in feature_cols if col in df.columns]
print(f" Available features: {available_features}")
# If bmi is available, use it instead of height and weight (or in addition)
if 'bmi' in df.columns and 'bmi' not in available_features:
available_features.append('bmi')
# Remove id column if exists
if 'id' in df.columns:
df = df.drop(columns=['id'])
# Remove original age (in days) column
if 'age' in df.columns:
df = df.drop(columns=['age'])
X = df[available_features].copy()
y = df['cardio'].copy() if 'cardio' in df.columns else None
print(f" Features shape: {X.shape}")
print(f" Target shape: {y.shape if y is not None else 'N/A'}")
return X, y, available_features
def create_preprocessing_pipeline(feature_cols):
"""
Create a preprocessing pipeline with ColumnTransformer.
"""
print("📊 Step 4: Creating preprocessing pipeline...")
# Define numeric and categorical features
numeric_features = ['age_years', 'height', 'weight', 'ap_hi', 'ap_lo']
if 'bmi' in feature_cols:
numeric_features.append('bmi')
categorical_features = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
# Filter to only include features that exist in our data
numeric_features = [feat for feat in numeric_features if feat in feature_cols]
categorical_features = [feat for feat in categorical_features if feat in feature_cols]
print(f" Numeric features: {numeric_features}")
print(f" Categorical features: {categorical_features}")
# Create preprocessing pipeline
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), numeric_features),
('cat', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'),
categorical_features)
],
remainder='drop' # Drop any columns not specified
)
return preprocessor
def train_model(X, y, preprocessor):
"""
Train XGBoost classifier with preprocessing pipeline.
"""
print("📊 Step 5: Training XGBoost model...")
# Split data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
print(f" Training set: {X_train.shape}")
print(f" Test set: {X_test.shape}")
# Create full pipeline
pipeline = Pipeline([
('preprocessor', preprocessor),
('classifier', XGBClassifier(
n_estimators=100,
max_depth=5,
learning_rate=0.1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
eval_metric='logloss',
use_label_encoder=False
))
])
# Train the model
pipeline.fit(X_train, y_train)
# Evaluate the model
print("📊 Step 6: Evaluating model performance...")
y_pred = pipeline.predict(X_test)
y_pred_proba = pipeline.predict_proba(X_test)[:, 1]
accuracy = accuracy_score(y_test, y_pred)
print(f" Accuracy: {accuracy:.4f}")
print("\n Classification Report:")
print(classification_report(y_test, y_pred, target_names=['No Disease', 'Disease']))
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print(f" Confusion Matrix:")
print(f" [[{cm[0,0]:4d} {cm[0,1]:4d}]")
print(f" [{cm[1,0]:4d} {cm[1,1]:4d}]]")
# Feature importance (if available)
try:
# Get feature names after preprocessing
feature_names = []
# Get numeric feature names
num_features = pipeline.named_steps['preprocessor'].transformers_[0][2]
feature_names.extend(num_features)
# Get categorical feature names after one-hot encoding
cat_transformer = pipeline.named_steps['preprocessor'].transformers_[1][1]
cat_features = pipeline.named_steps['preprocessor'].transformers_[1][2]
# Get one-hot encoded feature names
if hasattr(cat_transformer, 'get_feature_names_out'):
cat_feature_names = cat_transformer.get_feature_names_out(cat_features)
feature_names.extend(cat_feature_names)
else:
# Fallback: just use categorical feature names
feature_names.extend(cat_features)
# Get feature importance from XGBoost
importances = pipeline.named_steps['classifier'].feature_importances_
# Sort by importance
importance_df = pd.DataFrame({
'feature': feature_names[:len(importances)],
'importance': importances
}).sort_values('importance', ascending=False)
print("\n Top 10 Feature Importances:")
print(importance_df.head(10).to_string(index=False))
except Exception as e:
print(f" Could not extract feature importance: {e}")
return pipeline
def save_model(pipeline, model_path):
"""
Save the trained pipeline to disk.
"""
print("📊 Step 7: Saving model...")
# Save the model
joblib.dump(pipeline, model_path)
print(f" Model saved to: {model_path}")
# Also save metadata about expected features
metadata = {
'model_version': '1.0',
'description': 'CardioAI Cardiovascular Disease Prediction Model',
'features': pipeline.named_steps['preprocessor'].transformers_[0][2] +
pipeline.named_steps['preprocessor'].transformers_[1][2],
'creation_date': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
}
metadata_path = model_path.replace('.pkl', '_metadata.json')
import json
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
print(f" Metadata saved to: {metadata_path}")
def main():
"""
Main training workflow.
"""
print("=" * 60)
print("🚀 CardioAI - Model Training Script")
print("=" * 60)
try:
# Load and preprocess data
df = load_and_preprocess_data()
# Prepare features and target
X, y, feature_cols = prepare_features_target(df)
if y is None:
print("❌ Error: Target column 'cardio' not found in data!")
return
# Create preprocessing pipeline
preprocessor = create_preprocessing_pipeline(feature_cols)
# Train model
pipeline = train_model(X, y, preprocessor)
# Save model
save_model(pipeline, MODEL_PATH)
print("\n✅ Model training completed successfully!")
print("\n📋 Model Summary:")
print(f" - Model type: XGBoost Classifier")
print(f" - Features used: {len(feature_cols)}")
print(f" - Training samples: {len(X)}")
print(f" - Model saved to: {MODEL_PATH}")
except Exception as e:
print(f"\n❌ Error during model training: {e}")
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
exit(main())

View File

@@ -0,0 +1,886 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>CardioAI - 心血管健康语音助手</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&family=Roboto:wght@300;400;500&display=swap" rel="stylesheet">
<style>
:root {
--primary-color: #2c3e50;
--secondary-color: #3498db;
--accent-color: #9b59b6;
--success-color: #27ae60;
--warning-color: #f39c12;
--danger-color: #e74c3c;
--light-color: #ecf0f1;
--dark-color: #2c3e50;
--border-radius: 12px;
--box-shadow: 0 5px 20px rgba(0, 0, 0, 0.1);
--transition: all 0.3s ease;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Roboto', sans-serif;
line-height: 1.6;
color: #333;
background: linear-gradient(135deg, #f5f7fa 0%, #e4e8f0 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
.header {
text-align: center;
margin-bottom: 40px;
padding: 30px;
background: white;
border-radius: var(--border-radius);
box-shadow: var(--box-shadow);
position: relative;
overflow: hidden;
}
.header::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
height: 5px;
background: linear-gradient(90deg, var(--primary-color), var(--accent-color), var(--secondary-color));
}
.logo {
display: flex;
align-items: center;
justify-content: center;
margin-bottom: 20px;
}
.logo i {
font-size: 48px;
color: var(--danger-color);
margin-right: 15px;
}
.logo h1 {
font-family: 'Poppins', sans-serif;
font-size: 36px;
color: var(--primary-color);
font-weight: 700;
}
.subtitle {
font-size: 18px;
color: #666;
margin-top: 10px;
}
.description {
max-width: 800px;
margin: 20px auto 0;
padding: 15px;
background-color: #f8f9fa;
border-radius: 8px;
font-size: 15px;
color: #555;
line-height: 1.8;
}
.content {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 30px;
margin-bottom: 40px;
}
@media (max-width: 992px) {
.content {
grid-template-columns: 1fr;
}
}
.card {
background: white;
border-radius: var(--border-radius);
box-shadow: var(--box-shadow);
padding: 30px;
transition: var(--transition);
position: relative;
overflow: hidden;
}
.card::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
height: 4px;
background: linear-gradient(90deg, var(--secondary-color), var(--accent-color));
}
.card:hover {
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.15);
transform: translateY(-5px);
}
.card h2 {
font-family: 'Poppins', sans-serif;
font-size: 24px;
color: var(--primary-color);
margin-bottom: 20px;
padding-bottom: 15px;
border-bottom: 2px solid var(--light-color);
display: flex;
align-items: center;
}
.card h2 i {
margin-right: 10px;
color: var(--secondary-color);
}
.form-group {
margin-bottom: 25px;
}
.form-group label {
display: block;
margin-bottom: 8px;
font-weight: 500;
color: var(--dark-color);
font-size: 16px;
}
.form-group textarea {
width: 100%;
min-height: 120px;
padding: 15px;
border: 2px solid #ddd;
border-radius: 8px;
font-size: 16px;
font-family: 'Roboto', sans-serif;
resize: vertical;
transition: var(--transition);
background-color: #f8f9fa;
}
.form-group textarea:focus {
border-color: var(--secondary-color);
outline: none;
background-color: white;
box-shadow: 0 0 0 3px rgba(52, 152, 219, 0.2);
}
.examples {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 10px;
margin-top: 15px;
}
.example-btn {
padding: 10px 15px;
background-color: #f8f9fa;
border: 1px solid #ddd;
border-radius: 6px;
font-size: 14px;
cursor: pointer;
transition: var(--transition);
text-align: left;
}
.example-btn:hover {
background-color: var(--secondary-color);
color: white;
border-color: var(--secondary-color);
}
.btn {
display: inline-block;
padding: 14px 30px;
background: var(--secondary-color);
color: white;
border: none;
border-radius: 8px;
font-size: 18px;
font-weight: 600;
cursor: pointer;
transition: var(--transition);
width: 100%;
margin-top: 10px;
}
.btn:hover {
background: #2980b9;
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(41, 128, 185, 0.3);
}
.btn:active {
transform: translateY(0);
}
.btn:disabled {
background-color: #95a5a6;
cursor: not-allowed;
transform: none;
box-shadow: none;
}
.btn i {
margin-right: 10px;
}
.result-container {
display: none;
}
.answer-text {
background-color: #f8f9fa;
padding: 20px;
border-radius: 8px;
margin-bottom: 25px;
line-height: 1.8;
font-size: 16px;
max-height: 300px;
overflow-y: auto;
}
.answer-text h3 {
color: var(--primary-color);
margin-bottom: 10px;
font-size: 18px;
}
.audio-controls {
background-color: #f0f8ff;
padding: 20px;
border-radius: 8px;
margin-bottom: 25px;
border-left: 4px solid var(--secondary-color);
}
.audio-controls h3 {
color: var(--primary-color);
margin-bottom: 15px;
font-size: 18px;
display: flex;
align-items: center;
}
.audio-controls h3 i {
margin-right: 10px;
color: var(--accent-color);
}
.audio-player {
width: 100%;
margin-top: 10px;
}
.audio-player audio {
width: 100%;
}
.audio-info {
display: flex;
justify-content: space-between;
margin-top: 10px;
font-size: 14px;
color: #666;
}
.loading {
display: none;
text-align: center;
padding: 40px;
}
.loading-spinner {
width: 60px;
height: 60px;
border: 5px solid #f3f3f3;
border-top: 5px solid var(--secondary-color);
border-radius: 50%;
animation: spin 1s linear infinite;
margin: 0 auto 20px;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.loading p {
font-size: 16px;
color: #666;
margin-top: 10px;
}
.error {
display: none;
background-color: rgba(231, 76, 60, 0.1);
border: 1px solid var(--danger-color);
color: var(--danger-color);
padding: 15px;
border-radius: 8px;
margin-top: 20px;
}
.status-bar {
display: flex;
align-items: center;
justify-content: space-between;
padding: 15px;
background-color: #f8f9fa;
border-radius: 8px;
margin-bottom: 25px;
}
.status-item {
display: flex;
align-items: center;
}
.status-indicator {
width: 12px;
height: 12px;
border-radius: 50%;
margin-right: 10px;
}
.status-connected {
background-color: var(--success-color);
animation: pulse 2s infinite;
}
.status-disconnected {
background-color: var(--danger-color);
}
@keyframes pulse {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
.status-text {
font-size: 14px;
color: #666;
}
.footer {
text-align: center;
padding: 20px;
color: #666;
font-size: 14px;
margin-top: 40px;
border-top: 1px solid #eee;
}
.system-info {
background-color: #f8f9fa;
padding: 15px;
border-radius: 8px;
margin-top: 20px;
font-size: 13px;
color: #777;
}
.system-info p {
margin: 5px 0;
}
.feature-list {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr));
gap: 15px;
margin-top: 20px;
}
.feature-item {
background-color: #f8f9fa;
padding: 15px;
border-radius: 8px;
display: flex;
align-items: center;
}
.feature-item i {
color: var(--secondary-color);
margin-right: 10px;
font-size: 18px;
}
</style>
</head>
<body>
<div class="container">
<header class="header">
<div class="logo">
<i class="fas fa-microphone-alt"></i>
<h1>CardioAI 语音助手</h1>
</div>
<p class="subtitle">心血管健康智能问答与语音交互系统</p>
<div class="description">
<p>这是一个基于DeepSeek大模型和CosyVoice语音合成技术的心血管健康语音助手。您可以提问任何关于心血管健康的问题系统将提供专业的文字回答并转换为语音播放。</p>
</div>
</header>
<div class="status-bar" id="statusBar">
<div class="status-item">
<div class="status-indicator status-connected" id="llmStatus"></div>
<span class="status-text" id="llmStatusText">DeepSeek LLM: 连接中...</span>
</div>
<div class="status-item">
<div class="status-indicator status-connected" id="ttsStatus"></div>
<span class="status-text" id="ttsStatusText">CosyVoice TTS: 连接中...</span>
</div>
</div>
<div class="content">
<div class="card">
<h2><i class="fas fa-question-circle"></i> 提问心血管健康问题</h2>
<form id="questionForm">
<div class="form-group">
<label for="question"><i class="fas fa-comment-medical"></i> 请输入您的问题:</label>
<textarea id="question" name="question" placeholder="例如:如何预防高血压?心脏病早期有哪些症状?哪些食物对心脏健康有益?..." required></textarea>
</div>
<div class="form-group">
<label><i class="fas fa-lightbulb"></i> 示例问题:</label>
<div class="examples">
<button type="button" class="example-btn" onclick="setExample(0)">如何预防高血压?</button>
<button type="button" class="example-btn" onclick="setExample(1)">心脏病早期有哪些症状?</button>
<button type="button" class="example-btn" onclick="setExample(2)">哪些食物对心脏健康有益?</button>
<button type="button" class="example-btn" onclick="setExample(3)">如何控制胆固醇水平?</button>
</div>
</div>
<button type="submit" class="btn" id="submitBtn">
<i class="fas fa-brain"></i> 获取专业回答
</button>
</form>
<div class="loading" id="loading">
<div class="loading-spinner"></div>
<p>正在生成专业回答并合成语音,请稍候...</p>
</div>
<div class="error" id="errorMessage"></div>
<div class="system-info">
<p><i class="fas fa-info-circle"></i> 系统提示回答由AI生成仅供参考。如有医疗问题请咨询专业医生。</p>
</div>
</div>
<div class="card result-container" id="resultContainer">
<h2><i class="fas fa-comment-medical"></i> 回答结果</h2>
<div class="answer-text" id="answerText">
<!-- 文字回答将显示在这里 -->
</div>
<div class="audio-controls">
<h3><i class="fas fa-volume-up"></i> 语音播报</h3>
<div class="audio-player">
<audio controls id="audioPlayer" autoplay>
<source id="audioSource" type="audio/mp3">
您的浏览器不支持音频播放。
</audio>
</div>
<div class="audio-info">
<span id="audioStatus">准备播放...</span>
<span id="audioTime">00:00</span>
</div>
</div>
<div class="feature-list">
<div class="feature-item">
<i class="fas fa-user-md"></i>
<span>专业心血管健康顾问</span>
</div>
<div class="feature-item">
<i class="fas fa-robot"></i>
<span>基于DeepSeek大模型</span>
</div>
<div class="feature-item">
<i class="fas fa-microphone-alt"></i>
<span>CosyVoice语音合成</span>
</div>
<div class="feature-item">
<i class="fas fa-headphones-alt"></i>
<span>实时音频播放</span>
</div>
</div>
<button class="btn" id="newQuestionBtn" style="margin-top: 30px; background-color: var(--primary-color);">
<i class="fas fa-redo"></i> 提出新问题
</button>
</div>
</div>
<div class="footer">
<p><i class="fas fa-shield-alt"></i> CardioAI - 心血管疾病智能辅助系统</p>
<p>本工具提供基于AI的心血管健康咨询服务仅供参考不能替代专业医疗建议。</p>
<p>© 2026 CardioAI. 所有权利保留。</p>
</div>
</div>
<script>
// DOM元素
const questionForm = document.getElementById('questionForm');
const questionInput = document.getElementById('question');
const submitBtn = document.getElementById('submitBtn');
const loadingElement = document.getElementById('loading');
const errorElement = document.getElementById('errorMessage');
const resultContainer = document.getElementById('resultContainer');
const answerText = document.getElementById('answerText');
const audioPlayer = document.getElementById('audioPlayer');
const audioSource = document.getElementById('audioSource');
const audioStatus = document.getElementById('audioStatus');
const audioTime = document.getElementById('audioTime');
const newQuestionBtn = document.getElementById('newQuestionBtn');
const llmStatus = document.getElementById('llmStatus');
const llmStatusText = document.getElementById('llmStatusText');
const ttsStatus = document.getElementById('ttsStatus');
const ttsStatusText = document.getElementById('ttsStatusText');
// 示例问题
const exampleQuestions = [
"如何预防高血压?请提供具体的饮食和生活方式建议。",
"心脏病早期有哪些症状?哪些人群需要特别注意?",
"哪些食物对心脏健康有益?请推荐一些心脏友好的食谱。",
"如何控制胆固醇水平?除了药物,还有哪些自然方法?",
"心血管疾病患者适合进行哪些运动?运动时需要注意什么?",
"什么是冠心病?它的主要危险因素有哪些?",
"如何区分心绞痛和心肌梗死?出现相关症状应该怎么办?",
"糖尿病患者如何预防心血管并发症?",
"血压多少算正常?不同年龄段的血压标准有差异吗?",
"长期压力对心血管健康有什么影响?如何有效管理压力?"
];
// 设置示例问题
function setExample(index) {
if (index >= 0 && index < exampleQuestions.length) {
questionInput.value = exampleQuestions[index];
questionInput.focus();
}
}
// 检查系统状态
async function checkSystemStatus() {
try {
const response = await fetch('/api/health');
if (response.ok) {
const data = await response.json();
// 更新LLM状态
if (data.llm_initialized) {
llmStatus.className = 'status-indicator status-connected';
llmStatusText.textContent = 'DeepSeek LLM: 已连接';
} else {
llmStatus.className = 'status-indicator status-disconnected';
if (data.missing_config && data.missing_config.deepseek) {
llmStatusText.textContent = 'DeepSeek LLM: 需要API密钥';
llmStatusText.title = '请配置.env文件中的DEEPSEEK_API_KEY1';
} else {
llmStatusText.textContent = 'DeepSeek LLM: 未连接';
}
}
// 更新TTS状态
if (data.dashscope_initialized) {
ttsStatus.className = 'status-indicator status-connected';
ttsStatusText.textContent = 'CosyVoice TTS: 已连接';
} else {
ttsStatus.className = 'status-indicator status-disconnected';
if (data.missing_config && data.missing_config.dashscope) {
ttsStatusText.textContent = 'CosyVoice TTS: 需要API密钥';
ttsStatusText.title = '请配置.env文件中的DASHSCOPE_API_KEY';
} else {
ttsStatusText.textContent = 'CosyVoice TTS: 未连接';
}
}
// 显示配置警告(如果需要)
if (data.setup_required) {
showConfigWarning(data.setup_instructions);
} else {
hideConfigWarning();
}
return data;
} else {
throw new Error('健康检查失败');
}
} catch (error) {
console.error('检查系统状态时出错:', error);
llmStatus.className = 'status-indicator status-disconnected';
llmStatusText.textContent = 'DeepSeek LLM: 检查失败';
ttsStatus.className = 'status-indicator status-disconnected';
ttsStatusText.textContent = 'CosyVoice TTS: 检查失败';
return null;
}
}
// 显示加载状态
function showLoading() {
loadingElement.style.display = 'block';
submitBtn.disabled = true;
submitBtn.innerHTML = '<i class="fas fa-spinner fa-spin"></i> 处理中...';
errorElement.style.display = 'none';
}
// 隐藏加载状态
function hideLoading() {
loadingElement.style.display = 'none';
submitBtn.disabled = false;
submitBtn.innerHTML = '<i class="fas fa-brain"></i> 获取专业回答';
}
// 显示错误信息
function showError(message) {
errorElement.textContent = message;
errorElement.style.display = 'block';
hideLoading();
}
// 显示配置警告
function showConfigWarning(message) {
// 创建或显示配置警告元素
let configWarning = document.getElementById('configWarning');
if (!configWarning) {
configWarning = document.createElement('div');
configWarning.id = 'configWarning';
configWarning.className = 'config-warning';
configWarning.style.cssText = `
background-color: #fff3cd;
border: 1px solid #ffeaa7;
color: #856404;
padding: 15px;
border-radius: 8px;
margin-bottom: 20px;
display: flex;
align-items: center;
justify-content: space-between;
`;
const warningContent = document.createElement('div');
warningContent.style.flex = '1';
warningContent.innerHTML = `
<strong><i class="fas fa-exclamation-triangle"></i> 配置提示</strong>
<p style="margin: 8px 0 0 0; font-size: 14px;">${message}</p>
<p style="margin: 5px 0 0 0; font-size: 13px;">
请复制 <code>.env.example</code> 为 <code>.env</code> 并填入您的API密钥。
</p>
`;
const closeBtn = document.createElement('button');
closeBtn.innerHTML = '<i class="fas fa-times"></i>';
closeBtn.style.cssText = `
background: none;
border: none;
color: #856404;
cursor: pointer;
font-size: 16px;
padding: 5px 10px;
`;
closeBtn.onclick = () => configWarning.style.display = 'none';
configWarning.appendChild(warningContent);
configWarning.appendChild(closeBtn);
// 插入到状态栏之后
const statusBar = document.getElementById('statusBar');
statusBar.parentNode.insertBefore(configWarning, statusBar.nextSibling);
} else {
configWarning.style.display = 'block';
const warningContent = configWarning.querySelector('div');
warningContent.innerHTML = `
<strong><i class="fas fa-exclamation-triangle"></i> 配置提示</strong>
<p style="margin: 8px 0 0 0; font-size: 14px;">${message}</p>
<p style="margin: 5px 0 0 0; font-size: 13px;">
请复制 <code>.env.example</code> 为 <code>.env</code> 并填入您的API密钥。
</p>
`;
}
}
function hideConfigWarning() {
const configWarning = document.getElementById('configWarning');
if (configWarning) {
configWarning.style.display = 'none';
}
}
// 更新音频时间显示
function updateAudioTime() {
const currentTime = audioPlayer.currentTime;
const duration = audioPlayer.duration;
if (!isNaN(duration)) {
const currentMinutes = Math.floor(currentTime / 60);
const currentSeconds = Math.floor(currentTime % 60);
const durationMinutes = Math.floor(duration / 60);
const durationSeconds = Math.floor(duration % 60);
audioTime.textContent =
`${currentMinutes.toString().padStart(2, '0')}:${currentSeconds.toString().padStart(2, '0')} / ${durationMinutes.toString().padStart(2, '0')}:${durationSeconds.toString().padStart(2, '0')}`;
}
}
// 显示结果
function displayResults(data) {
// 显示文字回答
answerText.innerHTML = `
<h3><i class="fas fa-comment-medical"></i> 专业回答:</h3>
<div style="white-space: pre-wrap; line-height: 1.8;">${data.text_answer}</div>
`;
// 如果有音频数据,设置音频播放器
if (data.audio_base64) {
// 创建base64音频URL
const audioUrl = `data:audio/mp3;base64,${data.audio_base64}`;
audioSource.src = audioUrl;
audioPlayer.load();
// 设置音频事件监听器
audioPlayer.oncanplay = function() {
audioStatus.textContent = '准备播放...';
};
audioPlayer.onplay = function() {
audioStatus.textContent = '正在播放...';
};
audioPlayer.onpause = function() {
audioStatus.textContent = '已暂停';
};
audioPlayer.onended = function() {
audioStatus.textContent = '播放完成';
};
audioPlayer.ontimeupdate = updateAudioTime;
// 尝试自动播放
audioPlayer.play().catch(function(error) {
console.log('自动播放被阻止:', error);
audioStatus.textContent = '点击播放按钮开始播放';
});
} else {
audioStatus.textContent = '语音合成失败,仅显示文字回答';
}
// 显示结果容器
resultContainer.style.display = 'block';
// 滚动到结果
resultContainer.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
}
// 表单提交处理
questionForm.addEventListener('submit', async function(event) {
event.preventDefault();
// 验证表单
if (!questionForm.checkValidity()) {
questionForm.reportValidity();
return;
}
const question = questionInput.value.trim();
if (!question) {
showError('请输入问题内容');
return;
}
showLoading();
try {
// 发送问题到后端
const response = await fetch('/api/ask', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ question: question })
});
// 检查响应状态
if (!response.ok) {
// 尝试获取错误信息
const contentType = response.headers.get('content-type');
if (contentType && contentType.includes('application/json')) {
const errorData = await response.json();
throw new Error(errorData.message || `服务器错误 (${response.status})`);
} else {
const errorText = await response.text();
// 如果返回的是HTML提取有用的错误信息
if (errorText.includes('<html') || errorText.includes('<!DOCTYPE')) {
throw new Error(`服务器返回了HTML错误页面 (${response.status}),请检查后端配置`);
} else {
throw new Error(`服务器错误: ${errorText.substring(0, 100)}... (${response.status})`);
}
}
}
// 解析JSON响应
const result = await response.json();
if (result.status === 'success') {
displayResults(result);
} else {
showError(result.message || '获取回答时出错');
}
} catch (error) {
showError(`请求失败: ${error.message}。请检查服务器是否运行正常或API密钥是否正确配置。`);
console.error('详细错误:', error);
} finally {
hideLoading();
}
});
// 新问题按钮
newQuestionBtn.addEventListener('click', function() {
resultContainer.style.display = 'none';
questionInput.value = '';
questionInput.focus();
// 滚动到表单
questionForm.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
});
// 页面加载时初始化
document.addEventListener('DOMContentLoaded', async function() {
console.log('🎤 CardioAI语音助手界面已初始化');
// 检查系统状态
await checkSystemStatus();
// 设置初始焦点
questionInput.focus();
// 每30秒检查一次系统状态
setInterval(checkSystemStatus, 30000);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,333 @@
#!/usr/bin/env python3
"""
CardioAI - 语音助手模块
基于Deepseek和CosyVoice的心血管健康问答语音助手
"""
import os
import base64
from flask import Flask, request, jsonify, render_template
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import dashscope
from dashscope.audio.tts_v2 import SpeechSynthesizer, AudioFormat, ResultCallback
import json
import traceback
# 初始化Flask应用
app = Flask(__name__, template_folder='templates')
# 环境变量路径 - 从ENV_PATH环境变量读取默认为项目根目录下的.env文件
ENV_PATH = os.getenv('ENV_PATH', '/Users/anthony/PycharmProjects/ sad_test01/.env')
def load_environment_variables():
"""加载环境变量"""
try:
if os.path.exists(ENV_PATH):
print(f"📋 从 {ENV_PATH} 加载环境变量")
load_dotenv(dotenv_path=ENV_PATH)
else:
print(f"⚠️ 环境变量文件不存在: {ENV_PATH},尝试从默认位置加载")
load_dotenv() # 尝试从默认位置加载
# 检查必要的环境变量
required_vars = ['DEEPSEEK_API_KEY1', 'DASHSCOPE_API_KEY']
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
print(f"❌ 缺少必要的环境变量: {missing_vars}")
print("⚠️ 请在环境变量文件中设置以下变量:")
print(" - DEEPSEEK_API_KEY1: DeepSeek API密钥")
print(" - DASHSCOPE_API_KEY: DashScope (阿里云) API密钥")
print(" - base_url1: DeepSeek API基础URL (可选,默认: https://api.deepseek.com/v1)")
return False
else:
print("✅ 环境变量加载成功")
print(f" DeepSeek API密钥: {'已设置' if os.getenv('DEEPSEEK_API_KEY1') else '未设置'}")
print(f" DashScope API密钥: {'已设置' if os.getenv('DASHSCOPE_API_KEY') else '未设置'}")
print(f" DeepSeek基础URL: {os.getenv('base_url1', '默认: https://api.deepseek.com/v1')}")
return True
except Exception as e:
print(f"❌ 加载环境变量时出错: {e}")
traceback.print_exc()
return False
def initialize_llm():
"""初始化DeepSeek LLM"""
try:
# 设置DeepSeek API配置 (使用与llm_streaming.py一致的变量名)
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY1')
deepseek_base_url = os.getenv('base_url1', 'https://api.deepseek.com/v1')
if not deepseek_api_key:
raise ValueError("DEEPSEEK_API_KEY1环境变量未设置")
# 初始化ChatOpenAI实例兼容OpenAI接口
llm = ChatOpenAI(
base_url=deepseek_base_url,
api_key=deepseek_api_key,
model="deepseek-chat",
temperature=0.7,
max_tokens=1000
)
print("✅ DeepSeek LLM初始化成功")
return llm
except Exception as e:
print(f"❌ 初始化DeepSeek LLM时出错: {e}")
traceback.print_exc()
return None
def initialize_tts():
"""初始化语音合成"""
try:
# 设置DashScope API密钥
dashscope_api_key = os.getenv('DASHSCOPE_API_KEY')
if not dashscope_api_key:
raise ValueError("DASHSCOPE_API_KEY环境变量未设置")
dashscope.api_key = dashscope_api_key
print("✅ CosyVoice语音合成初始化成功")
except Exception as e:
print(f"❌ 初始化语音合成时出错: {e}")
traceback.print_exc()
def get_config_status():
"""获取配置状态"""
config_status = {
'deepseek': {
'api_key_set': bool(os.getenv('DEEPSEEK_API_KEY1')),
'base_url_set': bool(os.getenv('base_url1')),
'status': 'configured' if os.getenv('DEEPSEEK_API_KEY1') else 'missing_api_key'
},
'dashscope': {
'api_key_set': bool(os.getenv('DASHSCOPE_API_KEY')),
'status': 'configured' if os.getenv('DASHSCOPE_API_KEY') else 'missing_api_key'
},
'env_file_exists': os.path.exists(ENV_PATH)
}
return config_status
def get_system_prompt():
"""获取系统提示词"""
return """你是一名专业的心血管健康顾问,拥有丰富的医学知识和临床经验。你的任务是:
1. **专业准确**:基于最新的医学研究和临床指南提供准确信息
2. **通俗易懂**:用通俗易懂的语言解释医学术语和概念
3. **个性化建议**:根据用户的具体情况提供个性化建议
4. **预防为主**:强调心血管疾病的预防和早期干预
5. **安全提醒**:明确指出哪些情况需要立即就医
请保持回答的专业性、准确性和实用性,同时要富有同理心和耐心。"""
def synthesize_speech(text):
"""将文本合成为语音并返回base64编码的音频"""
try:
if not text or len(text.strip()) == 0:
raise ValueError("文本内容为空")
print(f"🔊 开始语音合成,文本长度: {len(text)} 字符")
# 创建语音合成器实例
# 使用cosyvoice-v2模型longxiaochun_v2音色MP3格式
synthesizer = SpeechSynthesizer(
model="cosyvoice-v2",
voice="longxiaochun_v2",
format=AudioFormat.MP3_22050HZ_MONO_256KBPS,
speech_rate=1.0,
pitch_rate=1.0,
volume=50
)
# 同步调用语音合成
# 注意:文本长度可能有限制,如果太长需要分段处理
max_text_length = 2000 # CosyVoice单次调用的文本长度限制
if len(text) > max_text_length:
print(f"⚠️ 文本长度超过{max_text_length}字符,将进行分段处理")
# 简单分段:按句号、问号、感叹号分段
segments = []
current_segment = ""
for char in text:
current_segment += char
if char in ['', '', '', '.', '!', '?'] and len(current_segment) > 100:
segments.append(current_segment)
current_segment = ""
if current_segment:
segments.append(current_segment)
# 合并音频数据
audio_data = b""
for i, segment in enumerate(segments):
print(f" 合成第 {i+1}/{len(segments)} 段,长度: {len(segment)} 字符")
segment_audio = synthesizer.call(segment.strip())
audio_data += segment_audio
else:
# 直接合成
audio_data = synthesizer.call(text.strip())
print(f"✅ 语音合成完成,音频大小: {len(audio_data)} 字节")
# 将音频数据编码为base64
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
return audio_base64
except Exception as e:
print(f"❌ 语音合成失败: {e}")
traceback.print_exc()
return None
# 全局变量
llm = None
@app.route('/')
def home():
"""主页面 - 语音助手界面"""
return render_template('voice_index.html')
@app.route('/api/health', methods=['GET'])
def health_check():
"""健康检查端点"""
config_status = get_config_status()
# 检查整体健康状态
llm_ready = llm is not None
tts_ready = dashscope.api_key is not None
overall_healthy = llm_ready and tts_ready
return jsonify({
'status': 'healthy' if overall_healthy else 'degraded',
'service': 'CardioAI Voice Assistant',
'llm_initialized': llm_ready,
'dashscope_initialized': tts_ready,
'config_status': config_status,
'missing_config': {
'deepseek': not config_status['deepseek']['api_key_set'],
'dashscope': not config_status['dashscope']['api_key_set']
},
'setup_required': not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'],
'setup_instructions': '请配置.env文件中的API密钥' if not config_status['deepseek']['api_key_set'] or not config_status['dashscope']['api_key_set'] else '配置完成'
})
@app.route('/api/ask', methods=['POST'])
def ask_question():
"""问答端点 - 处理用户问题并返回文本和语音回答"""
global llm
try:
# 获取用户问题
if request.is_json:
data = request.get_json()
question = data.get('question', '').strip()
else:
question = request.form.get('question', '').strip()
if not question:
return jsonify({
'status': 'error',
'message': '请提供问题内容'
}), 400
print(f"🤔 用户提问: {question[:100]}...")
# 确保LLM已初始化
if llm is None:
print("⚠️ LLM未初始化尝试重新初始化")
llm = initialize_llm()
if llm is None:
return jsonify({
'status': 'error',
'message': '语言模型未初始化,请检查配置'
}), 503
# 构建完整的消息
system_prompt = get_system_prompt()
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": question}
]
# 调用DeepSeek API获取回答
print("🧠 正在生成回答...")
response = llm.invoke(messages)
text_answer = response.content if hasattr(response, 'content') else str(response)
print(f"✅ 回答生成完成,长度: {len(text_answer)} 字符")
# 语音合成
audio_base64 = synthesize_speech(text_answer)
if audio_base64 is None:
print("⚠️ 语音合成失败,仅返回文本回答")
return jsonify({
'status': 'success',
'text_answer': text_answer,
'audio_base64': None,
'message': '语音合成失败,仅返回文本回答'
})
# 返回结果
return jsonify({
'status': 'success',
'text_answer': text_answer,
'audio_base64': audio_base64,
'audio_format': 'mp3',
'audio_sample_rate': '22050Hz'
})
except Exception as e:
print(f"❌ 处理问题时出错: {e}")
traceback.print_exc()
return jsonify({
'status': 'error',
'message': f'处理问题时出错: {str(e)}'
}), 500
def init_app():
"""初始化应用"""
print("=" * 60)
print("🎤 CardioAI - 心血管健康语音助手")
print("=" * 60)
# 加载环境变量
if not load_environment_variables():
print("⚠️ 环境变量加载失败,某些功能可能无法使用")
# 初始化LLM
global llm
llm = initialize_llm()
# 初始化语音合成
initialize_tts()
print("\n📡 API端点:")
print(" GET / - 语音助手界面")
print(" GET /api/health - 健康检查")
print(" POST /api/ask - 提问并获取语音回答")
print(f"\n🧠 LLM状态: {'已初始化' if llm is not None else '未初始化'}")
print(f"🔊 语音合成: {'已初始化' if dashscope.api_key else '未初始化'}")
if __name__ == '__main__':
# 初始化应用
init_app()
# 运行Flask应用
print(f"\n🌍 启动服务器: http://127.0.0.1:5002")
print(" 按 Ctrl+C 停止\n")
app.run(
host='0.0.0.0',
port=5002,
debug=True,
threaded=True
)
else:
# 用于WSGI部署
init_app()

17
aicodes/requirements.txt Normal file
View File

@@ -0,0 +1,17 @@
# CardioAI 项目依赖包
# 使用 conda 环境: cardioenv
# 安装命令: pip install -r requirements.txt
pandas
openpyxl
numpy
scikit-learn
xgboost
joblib
streamlit
plotly
Flask
python-dotenv
langchain-openai
dashscope
requests

20
aicodes/test_data.py Normal file
View File

@@ -0,0 +1,20 @@
#!/usr/bin/env python3
import pandas as pd
import numpy as np
DATA_PATH = "/Users/anthony/ai_lesson/ai_zhangzhongshan/心血管疾病.xlsx"
try:
df = pd.read_excel(DATA_PATH, engine='openpyxl')
print("Data loaded successfully!")
print(f"Shape: {df.shape}")
print("\nColumns:")
print(df.columns.tolist())
print("\nFirst few rows:")
print(df.head())
print("\nData types:")
print(df.dtypes)
print("\nMissing values:")
print(df.isnull().sum())
except Exception as e:
print(f"Error: {e}")

View File

@@ -0,0 +1,274 @@
# 语音合成CosyVoice Python SDK
https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#915a935d871ak
```python
dashscope.api_key = "sk-6fecb52bb96d4492bb7e58213eeeb00f"
```
### **流式调用**
在同一个语音合成任务中分多次提交文本,并通过回调的方式实时获取合成结果。
**说明**
- 流式输入时可多次调用`streaming_call`按顺序提交文本片段。服务端接收文本片段后自动进行分句:
- 完整语句立即合成
- 不完整语句缓存至完整后合成
调用 `streaming_complete` 时,服务端会强制合成所有已接收但未处理的文本片段(包括未完成的句子)。
- 发送文本片段的间隔不得超过23秒否则触发“request timeout after 23 seconds”异常。
若无待发送文本,需及时调用 `streaming_complete`结束任务。
> 服务端强制设定23秒超时机制客户端无法修改该配置。
![image](https://help-static-aliyun-doc.aliyuncs.com/assets/img/zh-CN/0165103671/CAEQVRiBgMDb7PahrBkiIDVkNjEwOTMxYjEwOTRmOWFhMmI1OTRiY2Q3ZDgzZmE54709861_20241015153444.149.svg)
1. 实例化SpeechSynthesizer类
实例化[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)绑定[请求参数](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#2fe363ace1l4k)和[回调接口ResultCallback](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)。
2. 流式传输
多次调用[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)的`streaming_call`方法分片提交待合成文本,将待合成文本分段发送至服务端。
在发送文本的过程中,服务端会通过[回调接口ResultCallback](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_data`方法,将合成结果实时返回给客户端。
每次调用`streaming_call`方法发送的文本片段(即`text`长度不得超过2000字符累计发送的文本总长度不得超过20万字符。
3. 结束处理
调用[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)的`streaming_complete`方法结束语音合成。
该方法会阻塞当前线程,直到[回调接口ResultCallback](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_complete`或者`on_error`回调触发后才会释放线程阻塞。
请务必确保调用该方法,否则可能会导致结尾部分的文本无法成功转换为语音。
~~~python
# coding=utf-8
#
# pyaudio安装说明
# 如果是macOS操作系统执行如下命令
# brew install portaudio
# pip install pyaudio
# 如果是Debian/Ubuntu操作系统执行如下命令
# sudo apt-get install python-pyaudio python3-pyaudio
# 或者
# pip install pyaudio
# 如果是CentOS操作系统执行如下命令
# sudo yum install -y portaudio portaudio-devel && pip install pyaudio
# 如果是Microsoft Windows执行如下命令
# python -m pip install pyaudio
import time
import pyaudio
import dashscope
from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse
from dashscope.audio.tts_v2 import *
from datetime import datetime
def get_timestamp():
now = datetime.now()
formatted_timestamp = now.strftime("[%Y-%m-%d %H:%M:%S.%f]")
return formatted_timestamp
# 若没有将API Key配置到环境变量中需将your-api-key替换为自己的API Key
# dashscope.api_key = "your-api-key"
# 模型
model = "cosyvoice-v2"
# 音色
voice = "longxiaochun_v2"
# 定义回调接口
class Callback(ResultCallback):
_player = None
_stream = None
def on_open(self):
print("连接建立:" + get_timestamp())
self._player = pyaudio.PyAudio()
self._stream = self._player.open(
format=pyaudio.paInt16, channels=1, rate=22050, output=True
)
def on_complete(self):
print("语音合成完成,所有合成结果已被接收:" + get_timestamp())
def on_error(self, message: str):
print(f"语音合成出现异常:{message}")
def on_close(self):
print("连接关闭:" + get_timestamp())
# 停止播放器
self._stream.stop_stream()
self._stream.close()
self._player.terminate()
def on_event(self, message):
pass
def on_data(self, data: bytes) -> None:
print(get_timestamp() + " 二进制音频长度为:" + str(len(data)))
self._stream.write(data)
callback = Callback()
test_text = [
"流式文本语音合成SDK",
"可以将输入的文本",
"合成为语音二进制数据,",
"相比于非流式语音合成,",
"流式合成的优势在于实时性",
"更强。用户在输入文本的同时",
"可以听到接近同步的语音输出,",
"极大地提升了交互体验,",
"减少了用户等待时间。",
"适用于调用大规模",
"语言模型LLM以",
"流式输入文本的方式",
"进行语音合成的场景。",
]
# 实例化SpeechSynthesizer并在构造方法中传入模型model、音色voice等请求参数
synthesizer = SpeechSynthesizer(
model=model,
voice=voice,
format=AudioFormat.PCM_22050HZ_MONO_16BIT,
callback=callback,
)
# 流式发送待合成文本。在回调接口的on_data方法中实时获取二进制音频
for text in test_text:
synthesizer.streaming_call(text)
time.sleep(0.1)
# 结束流式语音合成
synthesizer.streaming_complete()
# 首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时
print('[Metric] requestId为{},首包延迟为:{}毫秒'.format(
synthesizer.get_last_request_id(),
synthesizer.get_first_package_delay()))
~~~
## **请求参数**
请求参数通过[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)的构造方法进行设置。
| | | | | |
| ---- | ---- | ---- | ---- | ---- |
| | | | | |
| **参数** | **类型** | **默认值** | **是否必须** | **说明** |
| ---------------------- | -------------- | ---------- | ------------ | ------------------------------------------------------------ |
| model | str | - | 是 | 指定模型。不同版本的模型编码方式一致,但使用时须确保模型(`model`)与音色(`voice`)匹配:每个版本的模型只能使用本版本的默认音色或专属音色。 |
| voice | str | - | 是 | 指定语音合成所使用的音色。支持默认音色和专属音色:**默认音色**:参见**音色列表**章节。**专属音色**:通过[声音复刻](https://help.aliyun.com/zh/model-studio/voice-replica-1/)功能定制。使用复刻音色时,请确保声音复刻与语音合成使用同一账号。详细操作步骤请参见[CosyVoice声音复刻API](https://help.aliyun.com/zh/model-studio/cosyvoice-clone-api#da30eeebc4uwk)。**⚠️ 使用声音复刻系列模型合成语音时,仅能使用该模型复刻生成的专属音色,不能使用默认音色。****⚠️ 使用专属音色合成语音时,语音合成模型(**`**model**`**)必须与声音复刻模型(**`**target_model**`**)相同。** |
| format | enum | 因音色而异 | 否 | 指定音频编码格式及采样率。若未指定`format`则合成音频采样率为22.05kHz格式为mp3。**说明**默认采样率代表当前音色的最佳采样率缺省条件下默认按照该采样率输出同时支持降采样或升采样。可指定的音频编码格式及采样率如下所有模型均支持的音频编码格式及采样率AudioFormat.WAV_8000HZ_MONO_16BIT代表音频格式为wav采样率为8kHzAudioFormat.WAV_16000HZ_MONO_16BIT代表音频格式为wav采样率为16kHzAudioFormat.WAV_22050HZ_MONO_16BIT代表音频格式为wav采样率为22.05kHzAudioFormat.WAV_24000HZ_MONO_16BIT代表音频格式为wav采样率为24kHzAudioFormat.WAV_44100HZ_MONO_16BIT代表音频格式为wav采样率为44.1kHzAudioFormat.WAV_48000HZ_MONO_16BIT代表音频格式为wav采样率为48kHzAudioFormat.MP3_8000HZ_MONO_128KBPS代表音频格式为mp3采样率为8kHzAudioFormat.MP3_16000HZ_MONO_128KBPS代表音频格式为mp3采样率为16kHzAudioFormat.MP3_22050HZ_MONO_256KBPS代表音频格式为mp3采样率为22.05kHzAudioFormat.MP3_24000HZ_MONO_256KBPS代表音频格式为mp3采样率为24kHzAudioFormat.MP3_44100HZ_MONO_256KBPS代表音频格式为mp3采样率为44.1kHzAudioFormat.MP3_48000HZ_MONO_256KBPS代表音频格式为mp3采样率为48kHzAudioFormat.PCM_8000HZ_MONO_16BIT代表音频格式为pcm采样率为8kHzAudioFormat.PCM_16000HZ_MONO_16BIT代表音频格式为pcm采样率为16kHzAudioFormat.PCM_22050HZ_MONO_16BIT代表音频格式为pcm采样率为22.05kHzAudioFormat.PCM_24000HZ_MONO_16BIT代表音频格式为pcm采样率为24kHzAudioFormat.PCM_44100HZ_MONO_16BIT代表音频格式为pcm采样率为44.1kHzAudioFormat.PCM_48000HZ_MONO_16BIT代表音频格式为pcm采样率为48kHz除`cosyvoice-v1`其他模型支持的音频编码格式及采样率音频格式为opus时支持通过`bit_rate`参数调整码率。仅对1.24.0及之后版本的DashScope适用。AudioFormat.OGG_OPUS_8KHZ_MONO_32KBPS代表音频格式为opus采样率为8kHz码率为32kbpsAudioFormat.OGG_OPUS_16KHZ_MONO_16KBPS代表音频格式为opus采样率为16kHz码率为16kbpsAudioFormat.OGG_OPUS_16KHZ_MONO_32KBPS代表音频格式为opus采样率为16kHz码率为32kbpsAudioFormat.OGG_OPUS_16KHZ_MONO_64KBPS代表音频格式为opus采样率为16kHz码率为64kbpsAudioFormat.OGG_OPUS_24KHZ_MONO_16KBPS代表音频格式为opus采样率为24kHz码率为16kbpsAudioFormat.OGG_OPUS_24KHZ_MONO_32KBPS代表音频格式为opus采样率为24kHz码率为32kbpsAudioFormat.OGG_OPUS_24KHZ_MONO_64KBPS代表音频格式为opus采样率为24kHz码率为64kbpsAudioFormat.OGG_OPUS_48KHZ_MONO_16KBPS代表音频格式为opus采样率为48kHz码率为16kbpsAudioFormat.OGG_OPUS_48KHZ_MONO_32KBPS代表音频格式为opus采样率为48kHz码率为32kbpsAudioFormat.OGG_OPUS_48KHZ_MONO_64KBPS代表音频格式为opus采样率为48kHz码率为64kbps |
| volume | int | 50 | 否 | 合成音频的音量取值范围0~100。**重要**该字段在不同版本的DashScope SDK中有所不同1.20.10及以后版本的SDKvolume1.20.10以前版本的SDKvolumn |
| speech_rate | float | 1.0 | 否 | 合成音频的语速取值范围0.5~2。0.5表示默认语速的0.5倍速。1表示默认语速。默认语速是指模型默认输出的合成语速语速会因音色不同而略有不同。约每秒钟4个字。2表示默认语速的2倍速。 |
| pitch_rate | float | 1.0 | 否 | 合成音频的语调取值范围0.5~2。 |
| bit_rate | int | 32 | 否 | 指定音频的[码率](https://opus-codec.org/)取值范围6~510kbps。码率越大音质越好音频文件体积越大。仅在音频格式`format`为opus时可用。`cosyvoice-v1`模型不支持该参数。**说明**`bit_rate`需要通过`additional_params`参数进行设置: `synthesizer = SpeechSynthesizer(model="cosyvoice-v2", voice="longxiaochun_v2", format=AudioFormat.OGG_OPUS_16KHZ_MONO_16KBPS, additional_params={"bit_rate": 32})` |
| word_timestamp_enabled | bool | False | 否 | 是否开启字级别时间戳默认关闭。仅cosyvoice-v2支持该功能。时间戳结果仅能通过回调接口获取**说明**`word_timestamp_enabled`需要通过`additional_params`参数进行设置: `synthesizer = SpeechSynthesizer(model="cosyvoice-v2", voice="longxiaochun_v2", callback=callback, # 时间戳结果仅能通过回调接口获取 additional_params={'word_timestamp_enabled': True})`**点击查看完整示例代码** |
| seed | int | 0 | 否 | 生成时使用的随机数种子使合成的效果产生变化。默认值0。取值范围0~65535。cosyvoice-v1不支持该功能。 |
| language_hints | list[str] | - | 否 | 合成文本语言提示,可选值为 `zh`(中文)或 `en`英文列表中仅第一个语言生效。仅cosyvoice-v3、cosyvoice-v3-plus支持该功能。此设置会影响阿拉伯数字等内容的读法。例如当合成“123”时若设置为`zh`,则读作“一百二十三”;而`en`则会读作“one hundred and twenty-three”。如果不设置系统会根据文本内容自动判断并应用相应的合成规则。 |
| instruction | String | - | 否 | 设置提示词。仅cosyvoice-v3、cosyvoice-v3-plus支持该功能。目前仅支持设置情感。格式`你说话的情感是<情感值>。`”(注意,结尾一定不要遗漏句号,使用时将“`<情感值>`”替换为具体的情感值,例如替换为`neutral`)。示例:“`你说话的情感是neutral。`”支持的情感值:`neutral``fearful``angry``sad``surprised``happy``disgusted`。 |
| callback | ResultCallback | - | 否 | [回调接口ResultCallback](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s). |
## **关键接口**
### `SpeechSynthesizer`类
`SpeechSynthesizer`通过“`from dashscope.audio.tts_v2 import *`”方式引入,提供语音合成的关键接口。
| | | | |
| ---- | ---- | ---- | ---- |
| | | | |
| **方法** | **参数** | **返回值** | **描述** |
| ------------------------------------------------------------ | ------------------------------------------------------------ | ---------------------------------------------------------- | ------------------------------------------------------------ |
| `def call(self, text: str, timeout_millis=None)` | `text`:待合成文本`timeout_millis`阻塞线程的超时时间单位为毫秒不设置或值为0时不生效 | 没有指定`ResultCallback`时返回二进制音频数据否则返回None | 将整段文本(无论是纯文本还是包含[SSML](https://help.aliyun.com/zh/model-studio/introduction-to-cosyvoice-ssml-markup-language)的文本)转换为语音。在创建`SpeechSynthesizer`实例时,存在以下两种情况:没有指定`ResultCallback``call`方法会阻塞当前线程直到语音合成完成并返回二进制音频数据。使用方法请参见[同步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#8341058094tc3)。指定了`ResultCallback``call`方法会立刻返回None并通过[回调接口ResultCallback](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_data`方法返回语音合成的结果。使用方法请参见[异步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#cc2a504f344s2)。**重要**每次调用`call`方法前,需要重新初始化`SpeechSynthesizer`实例。 |
| `def streaming_call(self, text: str)` | `text`:待合成文本片段 | 无 | 流式发送待合成文本不支持包含SSML的文本。您可以多次调用该接口将待合成文本分多次发送给服务端。合成结果通过[回调接口ResultCallback](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_data`方法获取。使用方法请参见[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84)。 |
| `def streaming_complete(self, complete_timeout_millis=600000)` | `complete_timeout_millis`:等待时间,单位为毫秒 | 无 | 结束流式语音合成。该方法阻塞当前线程N毫秒具体时长由`complete_timeout_millis`决定),直到任务结束。如果`completeTimeoutMillis`设置为0则无限期等待。默认情况下如果等待时间超过10分钟则停止等待。使用方法请参见[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84)。**重要**在[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84)时,请务必确保调用该方法,否则可能会出现合成语音缺失的问题。 |
| `def get_last_request_id(self)` | 无 | 上一个任务的request_id | 获取上一个任务的request_id。 |
| `def get_first_package_delay(self)` | 无 | 首包延迟 | 获取首包延迟一般在500ms左右。首包延迟是开始发送文本和接收第一个音频包之间的时间单位为毫秒。在任务完成后使用。首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时。 |
| `def get_response(self)` | 无 | 最后一次报文 | 获取最后一次报文为JSON格式的数据可以用于获取task-failed报错。 |
### **回调接口(**`ResultCallback`
[异步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#cc2a504f344s2)或[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84)时,服务端会通过回调的方式,将关键流程信息和数据返回给客户端。您需要实现回调方法,处理服务端返回的信息或者数据。
通过“`from dashscope.audio.tts_v2 import *`”方式引入。
点击查看示例
| | | | |
| ---- | ---- | ---- | ---- |
| | | | |
| **方法** | **参数** | **返回值** | **描述** |
| ------------------------------------------- | ---------------------------------- | ---------- | ------------------------------------------------------------ |
| `def on_open(self) -> None` | 无 | 无 | 当和服务端建立连接完成后,该方法立刻被回调。 |
| `def on_event( self, message: str) -> None` | `message`:服务端返回的信息 | 无 | 当服务有回复时会被回调。`message`为JSON字符串解析可获取Task ID`task_id`参数)、本次请求中计费的有效字符数(`characters`参数)等信息。 |
| `def on_complete(self) -> None` | 无 | 无 | 当所有合成数据全部返回(语音合成完成)后被回调。 |
| `def on_error(self, message) -> None` | `message`:异常信息 | 无 | 发生异常时该方法被回调。 |
| `def on_data(self, data: bytes) -> None` | `data`:服务器返回的二进制音频数据 | 无 | 当服务器有合成音频返回时被回调。您可以将二进制音频数据合成为一个完整的音频文件后使用播放器播放,也可以通过支持流式播放的播放器实时播放。**重要**流式语音合成中对于mp3/opus等压缩格式音频分段传输需使用流式播放器不可逐帧播放避免解码失败。支持流式播放的播放器ffmpeg、pyaudio (Python)、AudioFormat (Java)、MediaSource (Javascript)等。将音频数据合成完整的音频文件时应以追加模式写入同一文件。流式语音合成的wav/mp3 格式音频仅首帧包含头信息,后续帧为纯音频数据。 |
| `def on_close(self) -> None` | 无 | 无 | 当服务已经关闭连接后被回调。 |
## **响应结果**
服务器返回二进制音频数据:
- [同步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#8341058094tc3):对[SpeechSynthesizer类](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#d6bc1f133f871)的`call`方法返回的二进制音频数据进行处理。
- [异步调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#cc2a504f344s2)或[流式调用](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#ba023aacfbr84):对[回调接口ResultCallback](https://help.aliyun.com/zh/model-studio/cosyvoice-python-sdk#85d698b9f9g8s)的`on_data`方法的参数bytes类型数据进行处理。
## **错误码**
如遇报错问题,请参见[错误信息](https://help.aliyun.com/zh/model-studio/error-code)进行排查。
若问题仍未解决,请加入[开发者群](https://github.com/aliyun/alibabacloud-bailian-speech-demo)反馈遇到的问题并提供Request ID以便进一步排查问题。
## **音色列表**
当前默认支持的音色如下表所示。若您需要更加个性化的音色,可通过声音复刻功能免费定制专属音色,详情请参见[使用复刻的音色进行语音合成](https://help.aliyun.com/zh/model-studio/cosyvoice-clone-api#b6d3449fb336v)。
进行语音合成时,`model`参数必须与所选音色对应,否则调用将失败。
待合成文本text应使用与所选音色语种一致的语言否则可能出现发音错误或不自然
cosyvoice-v3
cosyvoice-v2
cosyvoice-v1
| **音色** | **音频试听(右键保存音频)** | **voice参数** | **适用场景** | **语言** |
| -------- | ---------------------------- | ------------- | ------------------------------------------------------------ | --------------- |
| 龙婉 | | longwan | 语音助手、导航播报、聊天数字人 | 中文普通话 |
| 龙橙 | | longcheng | 语音助手、导航播报、聊天数字人 | 中文普通话 |
| 龙华 | | longhua | 语音助手、导航播报、聊天数字人 | 中文普通话 |
| 龙小淳 | | longxiaochun | 语音助手、导航播报、聊天数字人 | 中文普通话+英文 |
| 龙小夏 | | longxiaoxia | 语音助手、聊天数字人 | 中文普通话 |
| 龙小诚 | | longxiaocheng | 语音助手、导航播报、聊天数字人 | 中文普通话+英文 |
| 龙小白 | | longxiaobai | 聊天数字人、有声书、语音助手 | 中文普通话 |
| 龙老铁 | | longlaotie | 新闻播报、有声书、语音助手、直播带货、导航播报 | 中文东北口音 |
| 龙书 | | longshu | 有声书、语音助手、导航播报、新闻播报、智能客服 | 中文普通话 |
| 龙硕 | | longshuo | 语音助手、导航播报、新闻播报、客服催收 | 中文普通话 |
| 龙婧 | | longjing | 语音助手、导航播报、新闻播报、客服催收 | 中文普通话 |
| 龙妙 | | longmiao | 客服催收、导航播报、有声书、语音助手 | 中文普通话 |
| 龙悦 | | longyue | 语音助手、诗词朗诵、有声书朗读、导航播报、新闻播报、客服催收 | 中文普通话 |
| 龙媛 | | longyuan | 有声书、语音助手、聊天数字人 | 中文普通话 |
| 龙飞 | | longfei | 会议播报、新闻播报、有声书 | 中文普通话 |
| 龙杰力豆 | | longjielidou | 新闻播报、有声书、聊天助手 | 中文普通话+英文 |
| 龙彤 | | longtong | 有声书、导航播报、聊天数字人 | 中文普通话 |
| 龙祥 | | longxiang | 新闻播报、有声书、导航播报 | 中文普通话 |
| Stella | | loongstella | 语音助手、直播带货、导航播报、客服催收、有声书 | 中文普通话+英文 |
| Bella | | loongbella | 语音助手、客服催收、新闻播报、导航播报 | 中文普通话 |

View File

@@ -1,43 +0,0 @@
#!/usr/bin/env python3
"""
Simple calculator program for two numbers.
Supports addition, subtraction, multiplication, and division.
"""
def calculator():
"""Main calculator function."""
print("Simple Calculator")
print("Operations: + (addition), - (subtraction), * (multiplication), / (division)")
try:
# Get input from user
num1 = float(input("Enter first number: "))
num2 = float(input("Enter second number: "))
operation = input("Enter operation (+, -, *, /): ").strip()
# Perform calculation
if operation == '+':
result = num1 + num2
print(f"{num1} + {num2} = {result}")
elif operation == '-':
result = num1 - num2
print(f"{num1} - {num2} = {result}")
elif operation == '*':
result = num1 * num2
print(f"{num1} * {num2} = {result}")
elif operation == '/':
if num2 == 0:
print("Error: Division by zero!")
else:
result = num1 / num2
print(f"{num1} / {num2} = {result}")
else:
print("Invalid operation! Please use +, -, *, or /.")
except ValueError:
print("Invalid input! Please enter numeric values.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
calculator()