Update code

A
2026-04-02 19:52:38 +08:00 · 2026-04-02 17:16:19 +08:00
29 changed files with 3157 additions and 0 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -0,0 +1,13 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(py:*)",
+      "Bash(python math.py)",
+      "Bash(git remote:*)",
+      "Bash(git config:*)",
+      "Bash(git add:*)",
+      "Bash(git commit:*)",
+      "Bash(git push:*)"
+    ]
+  }
+}
--- a/.idea/AIcode.iml
+++ b/.idea/AIcode.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="D:\software\anaconda" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/AIcode.iml" filepath="$PROJECT_DIR$/.idea/AIcode.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="c2fd381e-9b12-4d32-b4e8-24d694013b53" name="更改" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="ProjectColorInfo"><![CDATA[{
+  "associatedIndex": 7
+}]]></component>
+  <component name="ProjectId" id="3BmcLYQM9mcuqt9z3i5B1kZMOfO" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent"><![CDATA[{
+  "keyToString": {
+    "ModuleVcsDetector.initialDetectionPerformed": "true",
+    "RunOnceActivity.ShowReadmeOnStart": "true",
+    "nodejs_package_manager_path": "npm",
+    "settings.editor.selected.configurable": "preferences.lookFeel",
+    "vue.rearranger.settings.migration": "true"
+  }
+}]]></component>
+  <component name="SharedIndexes">
+    <attachedChunks>
+      <set>
+        <option value="bundled-js-predefined-d6986cc7102b-6a121458b545-JavaScript-PY-251.25410.159" />
+        <option value="bundled-python-sdk-e0ed3721d81e-36ea0e71a18c-com.jetbrains.pycharm.pro.sharedIndexes.bundled-PY-251.25410.159" />
+      </set>
+    </attachedChunks>
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="默认任务">
+      <changelist id="c2fd381e-9b12-4d32-b4e8-24d694013b53" name="更改" comment="" />
+      <created>1775097630560</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1775097630560</updated>
+      <workItem from="1775097631689" duration="2000" />
+    </task>
+    <servers />
+  </component>
+</project>
--- a/test/.claude/settings.local.json
+++ b/test/.claude/settings.local.json
@@ -0,0 +1,15 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(python -c \"import pandas as pd; df = pd.read_excel\\('data/心血管疾病.xlsx', nrows=5\\); print\\('Columns:', df.columns.tolist\\(\\)\\); print\\('Data types:', df.dtypes\\); print\\('Sample data:'\\); print\\(df.head\\(\\)\\)\")",
+      "Bash(\"D:\\\\software\\\\anaconda\\\\Scripts\\\\conda.exe\" run:*)",
+      "Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" module1_dashboard/test_data.py)",
+      "Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" -m streamlit run module1_dashboard/cardio_dashboard.py --help)",
+      "Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" -c \"import pandas as pd; df = pd.read_excel\\('data/心血管疾病.xlsx', nrows=10\\); print\\('Columns:', df.columns.tolist\\(\\)\\); print\\('\\\\nData types:'\\); print\\(df.dtypes\\); print\\('\\\\nSample data:'\\); print\\(df[['age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo', 'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio']].head\\(\\)\\)\")",
+      "Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" -m py_compile module2_predictor/train_and_save.py)",
+      "Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" module2_predictor/train_and_save.py)",
+      "Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" -m py_compile module2_predictor/app.py)",
+      "Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" module2_predictor/test_api.py)"
+    ]
+  }
+}
--- a/test/.env
+++ b/test/.env
@@ -0,0 +1,36 @@
+# CardioAI Configuration
+# Environment variables for the cardiovascular disease intelligent assistant system
+
+# Data file path (relative to project root)
+DATA_PATH=./data/心血管疾病.xlsx
+
+# Flask server configuration
+FLASK_APP=module2_predictor/app.py
+FLASK_ENV=development
+FLASK_DEBUG=True
+FLASK_HOST=0.0.0.0
+FLASK_PORT=5000
+
+# Streamlit dashboard configuration
+STREAMLIT_PORT=8501
+STREAMLIT_THEME=light
+
+# AI Model API Keys (replace with your actual keys)
+# DeepSeek API (from dashscope)
+DASHSCOPE_API_KEY=your_dashscope_api_key_here
+
+# OpenAI API (for langchain-openai, if used)
+OPENAI_API_KEY=your_openai_api_key_here
+
+# Voice assistant configuration
+VOICE_ASSISTANT_PORT=5001
+VOICE_ASSISTANT_HOST=0.0.0.0
+
+# Model file paths (will be generated during training)
+MODEL_PATH=./module2_predictor/models/xgb_model.pkl
+SCALER_PATH=./module2_predictor/models/scaler.pkl
+ENCODER_PATH=./module2_predictor/models/encoder.pkl
+
+# Feature configuration
+NUMERICAL_FEATURES=age,trestbps,chol,thalach,oldpeak
+CATEGORICAL_FEATURES=sex,cp,fbs,restecg,exang,slope,ca,thal
--- a/test/.idea/.gitignore
+++ b/test/.idea/.gitignore
@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/test/.idea/inspectionProfiles/profiles_settings.xml
+++ b/test/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/test/.idea/misc.xml
+++ b/test/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="cardioenv" project-jdk-type="Python SDK" />
+</project>
--- a/test/.idea/modules.xml
+++ b/test/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/test.iml" filepath="$PROJECT_DIR$/.idea/test.iml" />
+    </modules>
+  </component>
+</project>
--- a/test/.idea/test.iml
+++ b/test/.idea/test.iml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="cardioenv" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TemplatesService">
+    <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
+    <option name="TEMPLATE_FOLDERS">
+      <list>
+        <option value="$MODULE_DIR$/module2_predictor/templates" />
+      </list>
+    </option>
+  </component>
+</module>
--- a/test/.idea/vcs.xml
+++ b/test/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
+  </component>
+</project>
--- a/test/README.md
+++ b/test/README.md
@@ -0,0 +1,493 @@
+# CardioAI - 心血管疾病智能辅助系统
+
+## 项目概述
+
+CardioAI是一个多模块应用系统，集成了数据可视化、机器学习预测和AI语音问答功能，用于心血管疾病的智能辅助分析和诊断。
+
+### 系统模块
+
+1. **Module 1: 数据可视化仪表板** (Streamlit) - 本模块
+   - 数据清洗与特征工程
+   - 交互式数据筛选
+   - 可视化分析图表
+
+2. **Module 2: 机器学习预测器** (Flask + XGBoost)
+   - 心血管疾病风险预测模型
+   - RESTful API接口
+   - 实时预测服务
+
+3. **Module 3: AI语音助手** (DeepSeek + CosyVoice)
+   - 自然语言问答
+   - 语音交互界面
+   - 疾病知识查询
+
+## Module 1: 数据可视化仪表板
+
+### 功能特性
+
+- ✅ **数据加载与清洗**: 自动处理异常值和缺失数据
+- ✅ **特征工程**: 年龄转换、BMI计算、类别编码
+- ✅ **交互式筛选**: 侧边栏多维度数据筛选
+- ✅ **可视化分析**: Plotly交互式图表
+- ✅ **性能优化**: 使用缓存加速数据加载
+
+### 数据处理流程
+
+1. **数据加载**: 从Excel文件加载原始数据
+2. **年龄转换**: 将天数转换为年数（四舍五入）
+3. **BMI计算**: `BMI = 体重(kg) / (身高(m)^2)`
+4. **异常值处理**:
+   - 删除舒张压 ≥ 收缩压的记录
+   - 删除收缩压不在[90, 250] mmHg范围的记录
+   - 删除舒张压不在[60, 150] mmHg范围的记录
+5. **类别转换**:
+   - 胆固醇水平: 1=正常, 2=高于正常, 3=极高
+   - 血糖水平: 1=正常, 2=高于正常, 3=极高
+   - 性别: 1=女性, 2=男性
+   - BMI分类: <18.5=偏瘦, 18.5-24.9=正常, 25-29.9=超重, ≥30=肥胖
+
+### 快速开始
+
+#### 1. 环境配置
+
+```bash
+# 创建并激活conda虚拟环境
+conda create -n cardioenv python=3.10
+conda activate cardioenv
+
+# 安装依赖包
+pip install -r requirements.txt
+```
+
+#### 2. 数据准备
+
+确保数据文件位于正确路径：
+```
+项目根目录/
+├── data/
+│   └── 心血管疾病.xlsx
+└── module1_dashboard/
+    └── cardio_dashboard.py
+```
+
+#### 3. 启动仪表板
+
+```bash
+# 进入项目根目录
+cd D:\Project\PythonProject\AIcode\test
+
+# 激活conda环境
+conda activate cardioenv
+
+# 启动Streamlit应用程序
+streamlit run module1_dashboard/cardio_dashboard.py
+```
+
+或者使用conda直接运行：
+
+```bash
+"D:\software\anaconda\Scripts\conda.exe" run -n cardioenv streamlit run module1_dashboard/cardio_dashboard.py
+```
+
+#### 4. 访问应用
+
+打开浏览器，访问: [http://localhost:8501](http://localhost:8501)
+
+### 界面说明
+
+#### 侧边栏筛选器
+- **年龄范围**: 滑动选择器，筛选指定年龄范围的记录
+- **性别**: 多选框，选择要分析的性别（女性/男性）
+- **心血管疾病状态**: 多选框，选择疾病状态（有/无）
+- **BMI分类**: 多选框，选择BMI分类（偏瘦/正常/超重/肥胖）
+- **胆固醇水平**: 多选框，选择胆固醇水平
+- **血糖水平**: 多选框，选择血糖水平
+
+#### 主界面区域
+
+1. **关键指标面板**
+   - 筛选后记录数
+   - 心血管疾病风险率
+   - 平均年龄
+   - 平均BMI
+
+2. **数据可视化图表**
+   - 年龄分布与心血管疾病关系直方图
+   - BMI分类对心血管疾病影响的堆叠柱状图
+   - 血压关系散点图
+   - 胆固醇水平分布饼图
+   - 血糖水平分布饼图
+
+3. **数据预览**
+   - 数据摘要（形状、类型、缺失值）
+   - 原始数据表格（可自定义显示的列）
+
+### 配置文件说明
+
+`.env` 文件包含以下配置项：
+
+```ini
+# 数据文件路径
+DATA_PATH=./data/心血管疾病.xlsx
+
+# Flask服务器配置
+FLASK_APP=module2_predictor/app.py
+FLASK_ENV=development
+
+# Streamlit配置
+STREAMLIT_PORT=8501
+
+# AI模型API密钥（需要替换为实际值）
+DASHSCOPE_API_KEY=your_dashscope_api_key_here
+OPENAI_API_KEY=your_openai_api_key_here
+
+# 模型文件路径
+MODEL_PATH=./module2_predictor/models/xgb_model.pkl
+```
+
+### 依赖包说明
+
+详细依赖见 `requirements.txt`：
+
+- **数据处理**: pandas, numpy, openpyxl
+- **机器学习**: scikit-learn, xgboost, joblib
+- **可视化**: streamlit, plotly
+- **Web服务**: Flask
+- **环境管理**: python-dotenv
+- **AI集成**: langchain-openai, dashscope, requests
+
+## Module 2: 机器学习预测器
+
+### 功能特性
+
+- ✅ **模型训练**: XGBoost分类器，准确率约73%
+- ✅ **特征工程**: 年龄转换、BMI计算、异常值处理
+- ✅ **RESTful API**: Flask提供预测接口
+- ✅ **前端界面**: 交互式Web表单，实时预测
+- ✅ **模型持久化**: Joblib保存完整Pipeline
+
+### 模型训练
+
+#### 1. 训练模型（一次性）
+```bash
+# 进入项目根目录
+cd D:\Project\PythonProject\AIcode\test
+
+# 激活conda环境
+conda activate cardioenv
+
+# 运行训练脚本
+python module2_predictor/train_and_save.py
+```
+
+训练脚本将：
+1. 加载和清洗数据（与Module 1相同）
+2. 特征工程：年龄转换、BMI计算
+3. 构建机器学习Pipeline（StandardScaler + OneHotEncoder + XGBoost）
+4. 训练模型并评估性能
+5. 保存模型到 `module2_predictor/models/cardio_predictor_model.pkl`
+
+#### 2. 模型特征
+- **连续特征**: age_years, bmi, ap_hi, ap_lo
+- **分类特征**: gender, cholesterol, gluc
+- **二元特征**: smoke, alco, active
+
+**Top 5 重要特征**:
+1. 收缩压 (ap_hi)
+2. 极高胆固醇 (cholesterol_3)
+3. 年龄 (age_years)
+4. 舒张压 (ap_lo)
+5. 极高血糖 (gluc_3)
+
+### Flask API服务
+
+#### 1. 启动API服务
+```bash
+# 进入项目根目录
+cd D:\Project\PythonProject\AIcode\test
+
+# 激活conda环境
+conda activate cardioenv
+
+# 方法1: 直接运行Python脚本
+python module2_predictor/app.py
+
+# 方法2: 使用Flask CLI
+set FLASK_APP=module2_predictor/app.py
+flask run --host=0.0.0.0 --port=5000
+
+# 方法3: 使用conda直接运行
+"D:\software\anaconda\Scripts\conda.exe" run -n cardioenv python module2_predictor/app.py
+```
+
+#### 2. API端点
+
+| 端点 | 方法 | 描述 |
+|------|------|------|
+| `/` | GET | 前端预测界面 |
+| `/predict_cardio` | POST | 预测接口（接收JSON） |
+| `/health` | GET | 健康检查 |
+| `/model_info` | GET | 模型信息 |
+
+#### 3. 预测接口示例
+
+**请求**:
+```bash
+curl -X POST http://localhost:5000/predict_cardio \
+  -H "Content-Type: application/json" \
+  -d '{
+    "age": 20228,
+    "gender": 1,
+    "height": 156,
+    "weight": 85,
+    "ap_hi": 140,
+    "ap_lo": 90,
+    "cholesterol": 1,
+    "gluc": 1,
+    "smoke": 0,
+    "alco": 0,
+    "active": 1
+  }'
+```
+
+**响应**:
+```json
+{
+  "success": true,
+  "prediction": 1,
+  "probability": 0.85,
+  "risk_level": "高危",
+  "message": "预测成功",
+  "features": {
+    "age_years": 55,
+    "bmi": 34.9,
+    "ap_hi": 140,
+    "ap_lo": 90,
+    "gender": 1,
+    "cholesterol": 1,
+    "gluc": 1,
+    "smoke": 0,
+    "alco": 0,
+    "active": 1
+  }
+}
+```
+
+### 前端界面
+
+访问 `http://localhost:5000` 使用预测界面：
+
+1. **输入表单**: 11个特征字段，包含验证和示例数据
+2. **实时预测**: 点击"开始预测"获取风险评估
+3. **结果展示**: 风险等级、概率、健康建议
+4. **示例数据**: 提供低、中、高风险示例数据
+
+### 项目结构
+
+```
+module2_predictor/
+├── app.py                    # Flask应用主程序
+├── train_and_save.py         # 模型训练脚本（一次性）
+├── test_api.py               # API测试脚本
+├── templates/
+│   └── index.html           # 前端界面模板
+└── models/                  # 模型文件目录（训练后生成）
+    ├── cardio_predictor_model.pkl
+    └── feature_info.txt
+```
+
+### 测试验证
+
+#### 1. 测试模型加载
+```bash
+python module2_predictor/test_api.py
+```
+
+#### 2. 测试API服务
+1. 启动Flask应用：`python module2_predictor/app.py`
+2. 打开浏览器访问：`http://localhost:5000`
+3. 使用示例数据测试预测功能
+4. 检查健康状态：`http://localhost:5000/health`
+
+#### 3. 验证预测准确性
+- 测试集准确率：约73%
+- 特征重要性符合医学常识
+- 风险等级划分合理
+
+### 配置说明
+
+#### 模型参数
+- **算法**: XGBoost Classifier
+- **树数量**: 100
+- **最大深度**: 5
+- **学习率**: 0.1
+- **子采样率**: 0.8
+- **随机种子**: 42
+
+#### 特征预处理
+- **连续特征**: StandardScaler标准化
+- **分类特征**: OneHotEncoder独热编码
+- **二元特征**: 直接使用（0/1）
+
+### 性能指标
+
+| 指标 | 训练集 | 测试集 |
+|------|--------|--------|
+| 准确率 | 74.21% | 73.14% |
+| 特征数量 | 10个 | 10个 |
+| 模型大小 | ~1.2 MB | ~1.2 MB |
+
+### 注意事项
+
+1. **模型更新**: 当数据变化时，重新运行训练脚本
+2. **输入验证**: API对输入数据有严格的范围验证
+3. **血压合理性**: 自动拒绝舒张压≥收缩压的输入
+4. **错误处理**: 详细的错误信息和日志记录
+5. **性能**: 单次预测时间 < 100ms
+
+### 常见问题
+
+#### Q1: 模型训练失败
+**症状**: 训练脚本报错或无法保存模型
+**解决**:
+1. 检查数据文件路径是否正确
+2. 确保有足够的磁盘空间
+3. 检查Python依赖包是否完整安装
+
+#### Q2: Flask应用无法启动
+**症状**: 启动时出现导入错误或模型加载失败
+**解决**:
+1. 检查conda环境是否激活
+2. 确保模型文件存在：`module2_predictor/models/cardio_predictor_model.pkl`
+3. 检查端口5000是否被占用
+
+#### Q3: 预测结果不合理
+**症状**: 预测概率总是0或1，或与预期不符
+**解决**:
+1. 检查输入数据是否在合理范围内
+2. 验证特征预处理是否正确
+3. 确保模型训练时使用了正确的特征
+
+#### Q4: 前端界面无法访问
+**症状**: 浏览器显示连接错误
+**解决**:
+1. 确认Flask应用正在运行
+2. 检查防火墙设置，允许端口5000
+3. 尝试访问 `http://localhost:5000/health` 检查服务状态
+
+### 下一步开发
+
+1. **模型优化**: 尝试其他算法（LightGBM, CatBoost）和超参数调优
+2. **特征扩展**: 添加更多临床特征（家族史、药物治疗等）
+3. **API增强**: 添加批量预测、模型版本管理
+4. **监控告警**: 添加性能监控和异常告警
+5. **部署优化**: Docker容器化，云平台部署
+
+---
+
+### 常见问题
+
+#### Q1: 数据加载失败
+**症状**: 应用程序无法启动，提示文件找不到或格式错误
+**解决**:
+1. 检查 `data/心血管疾病.xlsx` 文件是否存在
+2. 确认文件格式为Excel 2007+ (.xlsx)
+3. 检查文件编码，确保不是二进制损坏
+
+#### Q2: 图表显示异常
+**症状**: 图表不显示或显示错误
+**解决**:
+1. 检查Plotly是否正确安装：`pip install plotly`
+2. 确保数据经过正确清洗，没有无限值或NaN
+3. 检查筛选条件是否过于严格导致无数据
+
+#### Q3: 应用程序运行缓慢
+**症状**: 页面响应慢，筛选操作延迟
+**解决**:
+1. 利用 `@st.cache_data` 装饰器的缓存功能
+2. 减少一次性加载的数据量
+3. 优化图表复杂度，减少数据点数量
+
+#### Q4: 中文显示乱码
+**症状**: 中文文字显示为乱码
+**解决**:
+1. 确保系统支持中文字体
+2. Streamlit默认支持UTF-8编码，检查源代码文件保存为UTF-8
+3. 在Windows系统上，设置控制台编码为UTF-8
+
+### 开发说明
+
+#### 项目结构
+```
+D:\Project\PythonProject\AIcode\test\
+├── data\                   # 数据文件目录
+│   └── 心血管疾病.xlsx
+├── module1_dashboard\     # 数据可视化模块
+│   ├── cardio_dashboard.py
+│   └── test_data.py
+├── module2_predictor\     # 机器学习预测模块
+│   └── templates\
+├── module3_voice_assistant\ # AI语音助手模块
+│   └── templates\
+├── requirements.txt       # Python依赖包
+├── .env                  # 环境配置
+└── README.md            # 项目文档
+```
+
+#### 代码结构
+- `load_and_process_data()`: 数据加载和清洗主函数，使用 `@st.cache_data` 缓存
+- `create_filters()`: 创建侧边栏筛选器组件
+- `apply_filters()`: 应用筛选条件到数据框
+- `display_metrics()`: 显示关键指标卡片
+- `create_visualizations()`: 创建所有可视化图表
+- `display_data_preview()`: 显示数据预览和摘要
+
+#### 扩展开发
+
+1. **添加新图表**
+   ```python
+   def create_new_chart(df):
+       fig = px.scatter(df, x='column1', y='column2', color='cardio_str')
+       st.plotly_chart(fig, use_container_width=True)
+   ```
+
+2. **添加新筛选器**
+   ```python
+   # 在create_filters函数中添加
+   new_filter = st.sidebar.selectbox("新筛选器", options=['选项1', '选项2'])
+   ```
+
+3. **自定义样式**
+   ```python
+   st.markdown("""
+   <style>
+       .custom-class { color: blue; }
+   </style>
+   """, unsafe_allow_html=True)
+   ```
+
+### 性能优化建议
+
+1. **数据缓存**: 所有数据处理函数使用 `@st.cache_data` 装饰器
+2. **增量加载**: 对于大型数据集，考虑分页或懒加载
+3. **图表优化**: 使用采样或聚合减少数据点数量
+4. **异步处理**: 长时间操作使用异步函数避免阻塞UI
+
+### 下一步计划
+
+1. **Module 2开发**: 实现XGBoost预测模型和Flask API
+2. **Module 3开发**: 集成DeepSeek和CosyVoice语音助手
+3. **功能增强**: 添加数据导出、报告生成功能
+4. **部署优化**: Docker容器化，云平台部署
+
+### 技术支持
+
+- **问题反馈**: 检查GitHub Issues或联系开发团队
+- **文档更新**: 随着功能迭代保持文档同步
+- **版本管理**: 使用Git进行版本控制，定期发布稳定版本
+
+---
+
+**版权所有 © 2024 CardioAI项目组**  
+**版本**: 1.0.0  
+**最后更新**: 2024-04-02
--- a/test/data/心血管疾病.xlsx
+++ b/test/data/心血管疾病.xlsx
--- a/test/module1_dashboard/pycache/cardio_dashboard.cpython-310.pyc
+++ b/test/module1_dashboard/pycache/cardio_dashboard.cpython-310.pyc
--- a/test/module1_dashboard/cardio_dashboard.py
+++ b/test/module1_dashboard/cardio_dashboard.py
@@ -0,0 +1,581 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+CardioAI - 心血管疾病智能辅助系统
+数据可视化仪表板模块
+
+功能：
+1. 数据加载与清洗
+2. 特征工程（年龄转换、BMI计算、类别转换）
+3. 交互式数据筛选
+4. 可视化分析（Plotly图表）
+"""
+
+import streamlit as st
+import pandas as pd
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from pathlib import Path
+import sys
+import os
+
+# 设置页面配置
+st.set_page_config(
+    page_title="CardioAI - 心血管疾病分析仪表板",
+    page_icon="❤️",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+
+# 添加项目根目录到Python路径，确保可以导入其他模块
+project_root = Path(__file__).parent.parent
+sys.path.append(str(project_root))
+
+# 设置中文显示
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 2.5rem;
+        color: #e63946;
+        text-align: center;
+        margin-bottom: 2rem;
+        font-weight: bold;
+    }
+    .sub-header {
+        font-size: 1.5rem;
+        color: #457b9d;
+        margin-top: 1.5rem;
+        margin-bottom: 1rem;
+        font-weight: bold;
+    }
+    .metric-card {
+        background-color: #f1faee;
+        padding: 1.5rem;
+        border-radius: 10px;
+        border-left: 5px solid #1d3557;
+        margin-bottom: 1rem;
+    }
+    .stButton>button {
+        background-color: #1d3557;
+        color: white;
+        border: none;
+        padding: 0.5rem 1rem;
+        border-radius: 5px;
+    }
+</style>
+""", unsafe_allow_html=True)
+
+# 数据文件路径
+DATA_PATH = Path(__file__).parent.parent / "data" / "心血管疾病.xlsx"
+
+@st.cache_data(show_spinner="正在加载和清洗数据...")
+def load_and_process_data():
+    """
+    加载Excel数据并进行清洗和特征工程
+
+    返回:
+        pd.DataFrame: 处理后的数据框
+    """
+    try:
+        # 加载数据
+        st.info(f"正在从 {DATA_PATH} 加载数据...")
+        df = pd.read_excel(DATA_PATH)
+
+        # 检查必要列是否存在
+        required_columns = ['age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
+                           'cholesterol', 'gluc', 'cardio']
+        missing_columns = [col for col in required_columns if col not in df.columns]
+        if missing_columns:
+            st.error(f"数据文件中缺少必要列: {missing_columns}")
+            return pd.DataFrame()
+
+        # 创建数据副本
+        df_processed = df.copy()
+
+        # 1. 年龄转换：从天转换为年（四舍五入）
+        df_processed['age_years'] = (df_processed['age'] / 365.25).round().astype(int)
+
+        # 2. 计算BMI: BMI = weight(kg) / (height(m)^2)
+        # 注意：height数据单位为厘米，需要转换为米
+        df_processed['bmi'] = df_processed['weight'] / ((df_processed['height'] / 100) ** 2)
+        df_processed['bmi'] = df_processed['bmi'].round(2)
+
+        # 3. 异常值处理
+        # 删除舒张压 >= 收缩压的记录
+        invalid_bp = df_processed['ap_lo'] >= df_processed['ap_hi']
+        if invalid_bp.any():
+            st.warning(f"删除 {invalid_bp.sum()} 条舒张压 >= 收缩压的异常记录")
+            df_processed = df_processed[~invalid_bp].copy()
+
+        # 删除血压极端异常值
+        # 收缩压 ∈ [90, 250], 舒张压 ∈ [60, 150]
+        bp_outliers = ~((df_processed['ap_hi'] >= 90) & (df_processed['ap_hi'] <= 250) &
+                       (df_processed['ap_lo'] >= 60) & (df_processed['ap_lo'] <= 150))
+        if bp_outliers.any():
+            st.warning(f"删除 {bp_outliers.sum()} 条血压极端异常值记录")
+            df_processed = df_processed[~bp_outliers].copy()
+
+        # 4. 类别转换
+        # cholesterol转换
+        cholesterol_map = {
+            1: "正常",
+            2: "高于正常",
+            3: "极高"
+        }
+        df_processed['cholesterol_str'] = df_processed['cholesterol'].map(cholesterol_map)
+
+        # gluc转换
+        gluc_map = {
+            1: "正常",
+            2: "高于正常",
+            3: "极高"
+        }
+        df_processed['gluc_str'] = df_processed['gluc'].map(gluc_map)
+
+        # gender转换
+        gender_map = {
+            1: "女性",
+            2: "男性"
+        }
+        df_processed['gender_str'] = df_processed['gender'].map(gender_map)
+
+        # cardio转换
+        cardio_map = {
+            0: "无心血管疾病",
+            1: "有心血管疾病"
+        }
+        df_processed['cardio_str'] = df_processed['cardio'].map(cardio_map)
+
+        # 5. BMI分类
+        def categorize_bmi(bmi):
+            if bmi < 18.5:
+                return "偏瘦"
+            elif 18.5 <= bmi < 24.9:
+                return "正常"
+            elif 25 <= bmi < 29.9:
+                return "超重"
+            else:
+                return "肥胖"
+
+        df_processed['bmi_category'] = df_processed['bmi'].apply(categorize_bmi)
+
+        # 记录处理后的数据信息
+        st.success(f"数据加载和清洗完成！共处理 {len(df_processed)} 条记录")
+        st.info(f"原始数据: {len(df)} 条记录, 清洗后: {len(df_processed)} 条记录")
+
+        return df_processed
+
+    except Exception as e:
+        st.error(f"数据加载失败: {str(e)}")
+        return pd.DataFrame()
+
+def create_filters(df):
+    """
+    创建侧边栏筛选器
+
+    参数:
+        df: 处理后的数据框
+
+    返回:
+        dict: 筛选条件字典
+    """
+    st.sidebar.markdown("## 🔍 数据筛选")
+
+    # 年龄范围筛选
+    min_age = int(df['age_years'].min())
+    max_age = int(df['age_years'].max())
+    age_range = st.sidebar.slider(
+        "选择年龄范围:",
+        min_value=min_age,
+        max_value=max_age,
+        value=(min_age, max_age),
+        help="筛选指定年龄范围内的记录"
+    )
+
+    # 性别筛选
+    gender_options = df['gender_str'].unique().tolist()
+    selected_genders = st.sidebar.multiselect(
+        "选择性别:",
+        options=gender_options,
+        default=gender_options,
+        help="选择要分析的性别"
+    )
+
+    # 心血管疾病状态筛选
+    cardio_options = df['cardio_str'].unique().tolist()
+    selected_cardio = st.sidebar.multiselect(
+        "选择心血管疾病状态:",
+        options=cardio_options,
+        default=cardio_options,
+        help="选择要分析的心血管疾病状态"
+    )
+
+    # BMI分类筛选
+    bmi_options = df['bmi_category'].unique().tolist()
+    selected_bmi = st.sidebar.multiselect(
+        "选择BMI分类:",
+        options=bmi_options,
+        default=bmi_options,
+        help="选择要分析的BMI分类"
+    )
+
+    # 胆固醇水平筛选
+    cholesterol_options = df['cholesterol_str'].unique().tolist()
+    selected_cholesterol = st.sidebar.multiselect(
+        "选择胆固醇水平:",
+        options=cholesterol_options,
+        default=cholesterol_options,
+        help="选择要分析的胆固醇水平"
+    )
+
+    # 血糖水平筛选
+    gluc_options = df['gluc_str'].unique().tolist()
+    selected_gluc = st.sidebar.multiselect(
+        "选择血糖水平:",
+        options=gluc_options,
+        default=gluc_options,
+        help="选择要分析的血糖水平"
+    )
+
+    return {
+        'age_range': age_range,
+        'genders': selected_genders,
+        'cardio': selected_cardio,
+        'bmi_categories': selected_bmi,
+        'cholesterol': selected_cholesterol,
+        'gluc': selected_gluc
+    }
+
+def apply_filters(df, filters):
+    """
+    应用筛选条件到数据框
+
+    参数:
+        df: 原始数据框
+        filters: 筛选条件字典
+
+    返回:
+        pd.DataFrame: 筛选后的数据框
+    """
+    filtered_df = df.copy()
+
+    # 应用年龄筛选
+    filtered_df = filtered_df[
+        (filtered_df['age_years'] >= filters['age_range'][0]) &
+        (filtered_df['age_years'] <= filters['age_range'][1])
+    ]
+
+    # 应用性别筛选
+    if filters['genders']:
+        filtered_df = filtered_df[filtered_df['gender_str'].isin(filters['genders'])]
+
+    # 应用心血管疾病筛选
+    if filters['cardio']:
+        filtered_df = filtered_df[filtered_df['cardio_str'].isin(filters['cardio'])]
+
+    # 应用BMI分类筛选
+    if filters['bmi_categories']:
+        filtered_df = filtered_df[filtered_df['bmi_category'].isin(filters['bmi_categories'])]
+
+    # 应用胆固醇筛选
+    if filters['cholesterol']:
+        filtered_df = filtered_df[filtered_df['cholesterol_str'].isin(filters['cholesterol'])]
+
+    # 应用血糖筛选
+    if filters['gluc']:
+        filtered_df = filtered_df[filtered_df['gluc_str'].isin(filters['gluc'])]
+
+    return filtered_df
+
+def display_metrics(filtered_df, original_df):
+    """
+    显示关键指标
+
+    参数:
+        filtered_df: 筛选后的数据框
+        original_df: 原始数据框
+    """
+    col1, col2, col3, col4 = st.columns(4)
+
+    with col1:
+        st.metric(
+            label="筛选后记录数",
+            value=f"{len(filtered_df):,}",
+            delta=f"{len(filtered_df) - len(original_df):+,}"
+        )
+
+    with col2:
+        # 心血管疾病风险率
+        if len(filtered_df) > 0:
+            risk_rate = (filtered_df['cardio'].sum() / len(filtered_df) * 100).round(2)
+            st.metric(
+                label="心血管疾病风险率",
+                value=f"{risk_rate}%",
+                help="当前筛选条件下心血管疾病患者比例"
+            )
+        else:
+            st.metric(label="心血管疾病风险率", value="N/A")
+
+    with col3:
+        # 平均年龄
+        if len(filtered_df) > 0:
+            avg_age = filtered_df['age_years'].mean().round(1)
+            st.metric(
+                label="平均年龄",
+                value=f"{avg_age} 岁",
+                help="当前筛选条件下的平均年龄"
+            )
+        else:
+            st.metric(label="平均年龄", value="N/A")
+
+    with col4:
+        # 平均BMI
+        if len(filtered_df) > 0:
+            avg_bmi = filtered_df['bmi'].mean().round(1)
+            st.metric(
+                label="平均BMI",
+                value=str(avg_bmi),
+                help="当前筛选条件下的平均身体质量指数"
+            )
+        else:
+            st.metric(label="平均BMI", value="N/A")
+
+def create_visualizations(df):
+    """
+    创建可视化图表
+
+    参数:
+        df: 要可视化的数据框
+    """
+    if len(df) == 0:
+        st.warning("没有可用的数据进行可视化")
+        return
+
+    st.markdown("## 📊 数据可视化分析")
+
+    # 图1: 年龄分布直方图（按心血管疾病状态区分）
+    col1, col2 = st.columns(2)
+
+    with col1:
+        st.markdown("### 年龄分布分析")
+        fig_age = px.histogram(
+            df,
+            x='age_years',
+            color='cardio_str',
+            nbins=30,
+            barmode='overlay',
+            opacity=0.7,
+            labels={
+                'age_years': '年龄（岁）',
+                'cardio_str': '心血管疾病状态',
+                'count': '人数'
+            },
+            title="年龄分布与心血管疾病关系",
+            color_discrete_map={
+                "有心血管疾病": "#e63946",
+                "无心血管疾病": "#457b9d"
+            }
+        )
+        fig_age.update_layout(
+            legend_title="疾病状态",
+            hovermode='x unified'
+        )
+        st.plotly_chart(fig_age, use_container_width=True)
+
+    with col2:
+        st.markdown("### BMI分类与心血管疾病关系")
+        # 创建交叉表
+        bmi_cardio_cross = pd.crosstab(
+            df['bmi_category'],
+            df['cardio_str'],
+            normalize='index'
+        ).reset_index()
+
+        # 转换为长格式
+        bmi_cardio_long = bmi_cardio_cross.melt(
+            id_vars='bmi_category',
+            var_name='cardio_status',
+            value_name='proportion'
+        )
+
+        fig_bmi = px.bar(
+            bmi_cardio_long,
+            x='bmi_category',
+            y='proportion',
+            color='cardio_status',
+            barmode='stack',
+            labels={
+                'bmi_category': 'BMI分类',
+                'proportion': '比例',
+                'cardio_status': '心血管疾病状态'
+            },
+            title="BMI分类对心血管疾病的影响",
+            color_discrete_map={
+                "有心血管疾病": "#e63946",
+                "无心血管疾病": "#457b9d"
+            }
+        )
+        fig_bmi.update_layout(
+            yaxis_tickformat='.1%',
+            legend_title="疾病状态"
+        )
+        st.plotly_chart(fig_bmi, use_container_width=True)
+
+    # 图3: 血压关系散点图
+    st.markdown("### 血压关系分析")
+    fig_bp = px.scatter(
+        df,
+        x='ap_hi',
+        y='ap_lo',
+        color='cardio_str',
+        size='bmi',
+        hover_data=['age_years', 'gender_str', 'cholesterol_str'],
+        labels={
+            'ap_hi': '收缩压 (mmHg)',
+            'ap_lo': '舒张压 (mmHg)',
+            'cardio_str': '心血管疾病状态',
+            'bmi': 'BMI'
+        },
+        title="血压关系散点图",
+        color_discrete_map={
+            "有心血管疾病": "#e63946",
+            "无心血管疾病": "#457b9d"
+        }
+    )
+    fig_bp.update_layout(legend_title="疾病状态")
+    st.plotly_chart(fig_bp, use_container_width=True)
+
+    # 图4: 胆固醇和血糖水平分析
+    col3, col4 = st.columns(2)
+
+    with col3:
+        st.markdown("### 胆固醇水平分布")
+        cholesterol_counts = df['cholesterol_str'].value_counts().reset_index()
+        cholesterol_counts.columns = ['cholesterol_level', 'count']
+
+        fig_chol = px.pie(
+            cholesterol_counts,
+            values='count',
+            names='cholesterol_level',
+            title="胆固醇水平分布",
+            color_discrete_sequence=px.colors.sequential.RdBu
+        )
+        fig_chol.update_traces(textposition='inside', textinfo='percent+label')
+        st.plotly_chart(fig_chol, use_container_width=True)
+
+    with col4:
+        st.markdown("### 血糖水平分布")
+        gluc_counts = df['gluc_str'].value_counts().reset_index()
+        gluc_counts.columns = ['gluc_level', 'count']
+
+        fig_gluc = px.pie(
+            gluc_counts,
+            values='count',
+            names='gluc_level',
+            title="血糖水平分布",
+            color_discrete_sequence=px.colors.sequential.Blues
+        )
+        fig_gluc.update_traces(textposition='inside', textinfo='percent+label')
+        st.plotly_chart(fig_gluc, use_container_width=True)
+
+def display_data_preview(df):
+    """
+    显示数据预览
+
+    参数:
+        df: 要预览的数据框
+    """
+    st.markdown("## 📋 数据预览")
+
+    # 显示数据摘要
+    with st.expander("数据摘要", expanded=False):
+        col1, col2 = st.columns(2)
+        with col1:
+            st.write("**数据形状:**", df.shape)
+            st.write("**数据类型:**")
+            st.write(df.dtypes.astype(str).reset_index().rename(columns={0: '类型', 'index': '列名'}))
+
+        with col2:
+            st.write("**缺失值统计:**")
+            missing = df.isnull().sum().reset_index()
+            missing.columns = ['列名', '缺失值数量']
+            missing = missing[missing['缺失值数量'] > 0]
+            if len(missing) > 0:
+                st.write(missing)
+            else:
+                st.write("无缺失值")
+
+    # 显示数据表格
+    with st.expander("查看原始数据", expanded=False):
+        # 选择要显示的列
+        available_columns = df.columns.tolist()
+        default_columns = ['age_years', 'gender_str', 'bmi', 'bmi_category',
+                          'ap_hi', 'ap_lo', 'cholesterol_str', 'gluc_str', 'cardio_str']
+
+        selected_columns = st.multiselect(
+            "选择要显示的列:",
+            options=available_columns,
+            default=default_columns
+        )
+
+        if selected_columns:
+            display_df = df[selected_columns].copy()
+            st.dataframe(display_df.head(100), use_container_width=True)
+            st.caption(f"显示前 100 行（共 {len(df)} 行）")
+        else:
+            st.info("请选择要显示的列")
+
+def main():
+    """
+    主函数
+    """
+    # 标题
+    st.markdown('<h1 class="main-header">❤️ CardioAI - 心血管疾病智能分析仪表板</h1>', unsafe_allow_html=True)
+    st.markdown("---")
+
+    # 加载数据
+    with st.spinner("正在加载数据，请稍候..."):
+        df = load_and_process_data()
+
+    if df.empty:
+        st.error("数据加载失败，请检查数据文件路径和格式")
+        return
+
+    # 创建筛选器
+    filters = create_filters(df)
+
+    # 应用筛选
+    filtered_df = apply_filters(df, filters)
+
+    # 显示关键指标
+    st.markdown("## 📈 关键指标")
+    display_metrics(filtered_df, df)
+
+    # 显示数据预览
+    display_data_preview(filtered_df)
+
+    # 创建可视化图表
+    create_visualizations(filtered_df)
+
+    # 侧边栏信息
+    st.sidebar.markdown("---")
+    st.sidebar.markdown("## ℹ️ 使用说明")
+    st.sidebar.info("""
+    1. 使用左侧筛选器选择要分析的数据子集
+    2. 查看上方的关键指标了解数据概况
+    3. 探索下方的可视化图表分析趋势和关系
+    4. 点击数据预览查看详细数据
+    """)
+
+    st.sidebar.markdown("## 📊 数据说明")
+    st.sidebar.info("""
+    - **年龄**: 原始数据为天数，已转换为岁数
+    - **BMI**: 身体质量指数，计算公式：体重(kg)/身高(m)²
+    - **血压**: 收缩压(ap_hi)和舒张压(ap_lo)
+    - **胆固醇/血糖**: 1=正常, 2=高于正常, 3=极高
+    - **心血管疾病**: 0=无, 1=有
+    """)
+
+if __name__ == "__main__":
+    main()
--- a/test/module1_dashboard/test_data.py
+++ b/test/module1_dashboard/test_data.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+测试数据加载和处理的脚本
+"""
+
+import sys
+import os
+
+# 添加父目录到路径
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+# 导入数据处理函数
+from cardio_dashboard import load_and_process_data
+
+def test_data_loading():
+    """测试数据加载和清洗功能"""
+    print("开始测试数据加载和清洗...")
+
+    try:
+        # 加载数据
+        df = load_and_process_data()
+
+        if df.empty:
+            print("❌ 数据加载失败：返回空数据框")
+            return False
+
+        print(f"✅ 数据加载成功！共 {len(df)} 条记录")
+
+        # 检查必要的列
+        required_columns = ['age_years', 'bmi', 'bmi_category', 'cholesterol_str',
+                          'gluc_str', 'gender_str', 'cardio_str', 'ap_hi', 'ap_lo']
+
+        missing_columns = [col for col in required_columns if col not in df.columns]
+        if missing_columns:
+            print(f"❌ 缺少必要的列: {missing_columns}")
+            return False
+
+        print("✅ 所有必要的列都存在")
+
+        # 检查数据类型
+        print("\n数据摘要:")
+        print(f"- 年龄范围: {df['age_years'].min()} ~ {df['age_years'].max()} 岁")
+        print(f"- BMI范围: {df['bmi'].min():.1f} ~ {df['bmi'].max():.1f}")
+        print(f"- 收缩压范围: {df['ap_hi'].min()} ~ {df['ap_hi'].max()} mmHg")
+        print(f"- 舒张压范围: {df['ap_lo'].min()} ~ {df['ap_lo'].max()} mmHg")
+
+        # 检查类别转换
+        print("\n类别分布:")
+        print(f"- 性别: {df['gender_str'].value_counts().to_dict()}")
+        print(f"- 心血管疾病: {df['cardio_str'].value_counts().to_dict()}")
+        print(f"- BMI分类: {df['bmi_category'].value_counts().to_dict()}")
+        print(f"- 胆固醇水平: {df['cholesterol_str'].value_counts().to_dict()}")
+        print(f"- 血糖水平: {df['gluc_str'].value_counts().to_dict()}")
+
+        # 检查异常值处理
+        invalid_bp = df['ap_lo'] >= df['ap_hi']
+        if invalid_bp.any():
+            print(f"❌ 仍然存在舒张压 >= 收缩压的记录: {invalid_bp.sum()} 条")
+            return False
+        else:
+            print("✅ 已成功删除舒张压 >= 收缩压的记录")
+
+        # 检查血压范围
+        bp_in_range = ((df['ap_hi'] >= 90) & (df['ap_hi'] <= 250) &
+                      (df['ap_lo'] >= 60) & (df['ap_lo'] <= 150))
+        if not bp_in_range.all():
+            print(f"❌ 仍然存在血压异常值: {(~bp_in_range).sum()} 条")
+            return False
+        else:
+            print("✅ 所有血压值都在正常范围内")
+
+        print("\n🎉 所有测试通过！")
+        return True
+
+    except Exception as e:
+        print(f"❌ 测试过程中发生错误: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+if __name__ == "__main__":
+    success = test_data_loading()
+    sys.exit(0 if success else 1)
--- a/test/module2_predictor/pycache/app.cpython-310.pyc
+++ b/test/module2_predictor/pycache/app.cpython-310.pyc
--- a/test/module2_predictor/pycache/train_and_save.cpython-310.pyc
+++ b/test/module2_predictor/pycache/train_and_save.cpython-310.pyc
--- a/test/module2_predictor/app.py
+++ b/test/module2_predictor/app.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+CardioAI - 心血管疾病预测API服务
+
+功能：
+1. 加载预训练的机器学习模型
+2. 提供RESTful API接口
+3. 接收原始特征值并返回预测结果
+4. 提供Web前端界面
+
+启动方式：
+conda activate cardioenv
+python app.py
+或
+flask run
+"""
+
+from flask import Flask, request, jsonify, render_template, send_from_directory
+import pandas as pd
+import numpy as np
+import joblib
+import logging
+from pathlib import Path
+import sys
+import os
+import traceback
+
+# 添加项目根目录到Python路径
+project_root = Path(__file__).parent.parent
+sys.path.append(str(project_root))
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# 创建Flask应用
+app = Flask(__name__)
+app.config['JSON_AS_ASCII'] = False  # 确保JSON支持中文
+
+# 全局变量存储模型和特征信息
+model_data = None
+feature_names = None
+pipeline = None
+
+def load_model():
+    """加载预训练的模型"""
+    global model_data, feature_names, pipeline
+
+    try:
+        # 模型文件路径
+        model_dir = Path(__file__).parent / "models"
+        model_path = model_dir / "cardio_predictor_model.pkl"
+
+        if not model_path.exists():
+            logger.error(f"模型文件不存在: {model_path}")
+            raise FileNotFoundError(f"模型文件不存在: {model_path}")
+
+        # 加载模型
+        logger.info(f"正在加载模型: {model_path}")
+        model_data = joblib.load(model_path)
+
+        # 提取Pipeline和特征信息
+        pipeline = model_data['pipeline']
+        feature_names = model_data.get('feature_names', [])
+
+        logger.info(f"模型加载成功！版本: {model_data.get('model_version', '未知')}")
+        logger.info(f"特征数量: {len(feature_names)}")
+        logger.info(f"特征列表: {feature_names}")
+
+        return True
+
+    except Exception as e:
+        logger.error(f"模型加载失败: {str(e)}")
+        logger.error(traceback.format_exc())
+        return False
+
+def preprocess_input(input_data):
+    """
+    预处理输入数据（与训练时相同的处理）
+
+    参数：
+        input_data: 包含原始特征的字典
+
+    返回：
+        pd.DataFrame: 预处理后的特征数据框
+    """
+    try:
+        # 创建数据框
+        df = pd.DataFrame([input_data])
+
+        # 1. 年龄转换：从天转换为年（四舍五入）
+        if 'age' in df.columns:
+            df['age_years'] = (df['age'] / 365.25).round().astype(int)
+        elif 'age_years' in df.columns:
+            # 如果已经提供了转换后的年龄，直接使用
+            df['age_years'] = df['age_years'].astype(int)
+        else:
+            raise ValueError("输入数据中必须包含'age'或'age_years'字段")
+
+        # 2. 计算BMI: BMI = weight(kg) / (height(m)^2)
+        if 'height' in df.columns and 'weight' in df.columns:
+            df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
+            df['bmi'] = df['bmi'].round(2)
+        elif 'bmi' in df.columns:
+            # 如果已经提供了BMI，直接使用
+            df['bmi'] = df['bmi'].astype(float)
+        else:
+            raise ValueError("输入数据中必须包含'height'和'weight'字段或'bmi'字段")
+
+        # 3. 确保所有必要特征都存在
+        required_features = ['age_years', 'bmi', 'ap_hi', 'ap_lo',
+                            'gender', 'cholesterol', 'gluc',
+                            'smoke', 'alco', 'active']
+
+        missing_features = [f for f in required_features if f not in df.columns]
+        if missing_features:
+            raise ValueError(f"缺少必要特征: {missing_features}")
+
+        # 4. 选择模型需要的特征（按训练时的顺序）
+        processed_df = df[required_features].copy()
+
+        logger.debug(f"预处理后的特征数据框:\n{processed_df}")
+        return processed_df
+
+    except Exception as e:
+        logger.error(f"数据预处理失败: {str(e)}")
+        raise
+
+def validate_input(input_data):
+    """
+    验证输入数据的有效性
+
+    参数：
+        input_data: 输入特征字典
+
+    返回：
+        tuple: (是否有效, 错误消息)
+    """
+    try:
+        # 检查必需字段
+        required_fields = ['age', 'gender', 'height', 'weight',
+                          'ap_hi', 'ap_lo', 'cholesterol', 'gluc',
+                          'smoke', 'alco', 'active']
+
+        missing_fields = [f for f in required_fields if f not in input_data]
+        if missing_fields:
+            return False, f"缺少必需字段: {missing_fields}"
+
+        # 检查数据类型
+        for field in required_fields:
+            value = input_data[field]
+            if not isinstance(value, (int, float)):
+                try:
+                    # 尝试转换为数值
+                    input_data[field] = float(value)
+                except ValueError:
+                    return False, f"字段'{field}'必须为数值类型，当前值: {value}"
+
+        # 检查数值范围
+        validations = [
+            ('age', 0, 365*150),  # 年龄（天）：0-150岁
+            ('gender', 1, 2),      # 性别：1或2
+            ('height', 100, 250),  # 身高（cm）：100-250
+            ('weight', 20, 300),   # 体重（kg）：20-300
+            ('ap_hi', 50, 300),    # 收缩压：50-300
+            ('ap_lo', 30, 200),    # 舒张压：30-200
+            ('cholesterol', 1, 3), # 胆固醇：1-3
+            ('gluc', 1, 3),        # 血糖：1-3
+            ('smoke', 0, 1),       # 吸烟：0或1
+            ('alco', 0, 1),        # 饮酒：0或1
+            ('active', 0, 1)       # 活动：0或1
+        ]
+
+        for field, min_val, max_val in validations:
+            value = input_data[field]
+            if not (min_val <= value <= max_val):
+                return False, f"字段'{field}'的值{value}超出有效范围[{min_val}, {max_val}]"
+
+        # 检查血压合理性
+        if input_data['ap_lo'] >= input_data['ap_hi']:
+            return False, "舒张压不能高于或等于收缩压"
+
+        return True, "输入数据有效"
+
+    except Exception as e:
+        return False, f"输入数据验证失败: {str(e)}"
+
+@app.route('/')
+def index():
+    """主页 - 返回前端界面"""
+    return render_template('index.html')
+
+@app.route('/predict_cardio', methods=['POST'])
+def predict_cardio():
+    """
+    心血管疾病预测API接口
+
+    请求格式（JSON）：
+    {
+        "age": 20228,        # 年龄（天）
+        "gender": 1,         # 性别（1=女性，2=男性）
+        "height": 156,       # 身高（cm）
+        "weight": 85,        # 体重（kg）
+        "ap_hi": 140,        # 收缩压（mmHg）
+        "ap_lo": 90,         # 舒张压（mmHg）
+        "cholesterol": 1,    # 胆固醇水平（1=正常，2=高于正常，3=极高）
+        "gluc": 1,           # 血糖水平（1=正常，2=高于正常，3=极高）
+        "smoke": 0,          # 吸烟（0=否，1=是）
+        "alco": 0,           # 饮酒（0=否，1=是）
+        "active": 1          # 体育活动（0=否，1=是）
+    }
+
+    响应格式（JSON）：
+    {
+        "success": true,
+        "prediction": 1,
+        "probability": 0.85,
+        "risk_level": "高危",
+        "message": "预测成功",
+        "features": {
+            "age_years": 55,
+            "bmi": 34.9,
+            ... // 其他处理后的特征
+        }
+    }
+    """
+    try:
+        # 检查模型是否已加载
+        if pipeline is None:
+            return jsonify({
+                "success": False,
+                "message": "模型未加载，请等待或联系管理员"
+            }), 503
+
+        # 获取JSON数据
+        if not request.is_json:
+            return jsonify({
+                "success": False,
+                "message": "请求必须是JSON格式"
+            }), 400
+
+        input_data = request.get_json()
+        logger.info(f"收到预测请求: {input_data}")
+
+        # 验证输入数据
+        is_valid, error_message = validate_input(input_data)
+        if not is_valid:
+            return jsonify({
+                "success": False,
+                "message": error_message
+            }), 400
+
+        # 预处理输入数据
+        processed_df = preprocess_input(input_data)
+
+        # 进行预测
+        prediction = pipeline.predict(processed_df)[0]
+        probability = pipeline.predict_proba(processed_df)[0][1]  # 类别1的概率
+
+        # 确定风险等级
+        if probability < 0.3:
+            risk_level = "低危"
+        elif probability < 0.6:
+            risk_level = "中危"
+        else:
+            risk_level = "高危"
+
+        # 准备响应数据
+        response_data = {
+            "success": True,
+            "prediction": int(prediction),
+            "probability": float(round(probability, 4)),
+            "risk_level": risk_level,
+            "message": "预测成功",
+            "features": {
+                "age_years": int(processed_df['age_years'].iloc[0]),
+                "bmi": float(round(processed_df['bmi'].iloc[0], 2)),
+                "ap_hi": int(processed_df['ap_hi'].iloc[0]),
+                "ap_lo": int(processed_df['ap_lo'].iloc[0]),
+                "gender": int(processed_df['gender'].iloc[0]),
+                "cholesterol": int(processed_df['cholesterol'].iloc[0]),
+                "gluc": int(processed_df['gluc'].iloc[0]),
+                "smoke": int(processed_df['smoke'].iloc[0]),
+                "alco": int(processed_df['alco'].iloc[0]),
+                "active": int(processed_df['active'].iloc[0])
+            }
+        }
+
+        logger.info(f"预测结果: {response_data}")
+        return jsonify(response_data), 200
+
+    except Exception as e:
+        error_msg = f"预测过程中发生错误: {str(e)}"
+        logger.error(error_msg)
+        logger.error(traceback.format_exc())
+        return jsonify({
+            "success": False,
+            "message": error_msg
+        }), 500
+
+@app.route('/health', methods=['GET'])
+def health_check():
+    """健康检查端点"""
+    try:
+        if pipeline is None:
+            return jsonify({
+                "status": "unhealthy",
+                "message": "模型未加载"
+            }), 503
+
+        # 简单的模型测试
+        test_data = {
+            "age": 20228,
+            "gender": 1,
+            "height": 156,
+            "weight": 85,
+            "ap_hi": 140,
+            "ap_lo": 90,
+            "cholesterol": 1,
+            "gluc": 1,
+            "smoke": 0,
+            "alco": 0,
+            "active": 1
+        }
+
+        processed_df = preprocess_input(test_data)
+        _ = pipeline.predict(processed_df)
+
+        return jsonify({
+            "status": "healthy",
+            "model_version": model_data.get('model_version', '未知'),
+            "features": len(feature_names) if feature_names else 0,
+            "message": "模型服务运行正常"
+        }), 200
+
+    except Exception as e:
+        return jsonify({
+            "status": "unhealthy",
+            "message": f"健康检查失败: {str(e)}"
+        }), 500
+
+@app.route('/model_info', methods=['GET'])
+def model_info():
+    """获取模型信息"""
+    if model_data is None:
+        return jsonify({
+            "success": False,
+            "message": "模型未加载"
+        }), 503
+
+    return jsonify({
+        "success": True,
+        "model_version": model_data.get('model_version', '未知'),
+        "description": model_data.get('description', 'CardioAI心血管疾病预测模型'),
+        "feature_count": len(feature_names) if feature_names else 0,
+        "features": feature_names if feature_names else []
+    }), 200
+
+# 模型加载标志
+_model_loaded = False
+
+@app.before_request
+def ensure_model_loaded():
+    """确保模型已加载（每个请求前检查）"""
+    global pipeline, model_data, feature_names, _model_loaded
+
+    if not _model_loaded:
+        logger.info("首次请求，正在加载模型...")
+        success = load_model()
+        if success:
+            _model_loaded = True
+            logger.info("模型加载完成")
+        else:
+            logger.error("模型加载失败")
+
+if __name__ == '__main__':
+    # 加载模型
+    success = load_model()
+    if not success:
+        logger.error("启动失败: 模型加载失败")
+        sys.exit(1)
+
+    # 启动Flask应用
+    logger.info("启动CardioAI预测API服务...")
+    logger.info("访问 http://localhost:5000 使用预测界面")
+    logger.info("API文档:")
+    logger.info("  GET  /              - 前端界面")
+    logger.info("  POST /predict_cardio - 预测接口")
+    logger.info("  GET  /health       - 健康检查")
+    logger.info("  GET  /model_info   - 模型信息")
+
+    app.run(host='0.0.0.0', port=5000, debug=True)
--- a/test/module2_predictor/models/cardio_predictor_model.pkl
+++ b/test/module2_predictor/models/cardio_predictor_model.pkl
--- a/test/module2_predictor/models/feature_info.txt
+++ b/test/module2_predictor/models/feature_info.txt
@@ -0,0 +1,27 @@
+CardioAI模型特征信息
+==================================================
+
+特征列表（按输入顺序）:
+ 1. age_years
+ 2. bmi
+ 3. ap_hi
+ 4. ap_lo
+ 5. gender
+ 6. cholesterol
+ 7. gluc
+ 8. smoke
+ 9. alco
+10. active
+
+
+特征说明:
+- age_years: 年龄（岁），由原始天数转换而来
+- bmi: 身体质量指数，计算公式：体重(kg) / (身高(m)^2)
+- ap_hi: 收缩压（mmHg）
+- ap_lo: 舒张压（mmHg）
+- gender: 性别（1=女性，2=男性）
+- cholesterol: 胆固醇水平（1=正常，2=高于正常，3=极高）
+- gluc: 血糖水平（1=正常，2=高于正常，3=极高）
+- smoke: 吸烟（0=否，1=是）
+- alco: 饮酒（0=否，1=是）
+- active: 体育活动（0=否，1=是）
--- a/test/module2_predictor/templates/index.html
+++ b/test/module2_predictor/templates/index.html
@@ -0,0 +1,858 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>CardioAI - 心血管疾病风险预测</title>
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css">
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
+    <style>
+        :root {
+            --primary-color: #e63946;
+            --secondary-color: #457b9d;
+            --success-color: #2a9d8f;
+            --warning-color: #e9c46a;
+            --danger-color: #e63946;
+            --light-color: #f1faee;
+            --dark-color: #1d3557;
+        }
+
+        body {
+            font-family: 'Microsoft YaHei', 'Segoe UI', sans-serif;
+            background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+            min-height: 100vh;
+            padding-bottom: 50px;
+        }
+
+        .navbar {
+            background: linear-gradient(to right, var(--dark-color), var(--secondary-color));
+            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+        }
+
+        .navbar-brand {
+            font-weight: bold;
+            font-size: 1.5rem;
+            color: white !important;
+        }
+
+        .card {
+            border: none;
+            border-radius: 15px;
+            box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
+            transition: transform 0.3s ease;
+            margin-bottom: 20px;
+        }
+
+        .card:hover {
+            transform: translateY(-5px);
+        }
+
+        .card-header {
+            background: linear-gradient(to right, var(--secondary-color), var(--dark-color));
+            color: white;
+            border-radius: 15px 15px 0 0 !important;
+            font-weight: bold;
+            padding: 15px 20px;
+        }
+
+        .form-control, .form-select {
+            border-radius: 8px;
+            border: 1px solid #ddd;
+            padding: 10px 15px;
+            transition: all 0.3s;
+        }
+
+        .form-control:focus, .form-select:focus {
+            border-color: var(--secondary-color);
+            box-shadow: 0 0 0 0.25rem rgba(69, 123, 157, 0.25);
+        }
+
+        .btn-primary {
+            background: linear-gradient(to right, var(--primary-color), var(--secondary-color));
+            border: none;
+            border-radius: 8px;
+            padding: 12px 30px;
+            font-weight: bold;
+            transition: all 0.3s;
+        }
+
+        .btn-primary:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 5px 15px rgba(230, 57, 70, 0.3);
+        }
+
+        .btn-secondary {
+            background: linear-gradient(to right, var(--dark-color), #2c3e50);
+            border: none;
+            border-radius: 8px;
+            padding: 12px 30px;
+            font-weight: bold;
+            transition: all 0.3s;
+        }
+
+        .btn-secondary:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 5px 15px rgba(29, 53, 87, 0.3);
+        }
+
+        .result-card {
+            border-left: 5px solid var(--secondary-color);
+        }
+
+        .risk-low {
+            color: var(--success-color);
+            font-weight: bold;
+        }
+
+        .risk-medium {
+            color: var(--warning-color);
+            font-weight: bold;
+        }
+
+        .risk-high {
+            color: var(--danger-color);
+            font-weight: bold;
+        }
+
+        .feature-value {
+            background-color: var(--light-color);
+            padding: 5px 10px;
+            border-radius: 5px;
+            font-family: monospace;
+            font-weight: bold;
+        }
+
+        .loading {
+            display: none;
+            text-align: center;
+            padding: 20px;
+        }
+
+        .spinner {
+            width: 3rem;
+            height: 3rem;
+            border-width: 0.3em;
+        }
+
+        .alert {
+            border-radius: 10px;
+            border: none;
+        }
+
+        .feature-group {
+            margin-bottom: 15px;
+        }
+
+        .feature-label {
+            font-weight: 600;
+            margin-bottom: 5px;
+            color: var(--dark-color);
+        }
+
+        .help-text {
+            font-size: 0.85rem;
+            color: #6c757d;
+            margin-top: 3px;
+        }
+
+        footer {
+            background-color: var(--dark-color);
+            color: white;
+            padding: 20px 0;
+            margin-top: 40px;
+            border-radius: 15px 15px 0 0;
+        }
+
+        .heart-icon {
+            color: var(--primary-color);
+            animation: heartbeat 1.5s infinite;
+        }
+
+        @keyframes heartbeat {
+            0% { transform: scale(1); }
+            5% { transform: scale(1.1); }
+            10% { transform: scale(1); }
+            15% { transform: scale(1.1); }
+            20% { transform: scale(1); }
+            100% { transform: scale(1); }
+        }
+
+        .tooltip-inner {
+            max-width: 300px;
+            text-align: left;
+        }
+    </style>
+</head>
+<body>
+    <!-- 导航栏 -->
+    <nav class="navbar navbar-expand-lg navbar-dark">
+        <div class="container">
+            <a class="navbar-brand" href="#">
+                <i class="fas fa-heartbeat me-2 heart-icon"></i>
+                CardioAI - 心血管疾病风险预测系统
+            </a>
+            <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
+                <span class="navbar-toggler-icon"></span>
+            </button>
+            <div class="collapse navbar-collapse" id="navbarNav">
+                <ul class="navbar-nav ms-auto">
+                    <li class="nav-item">
+                        <a class="nav-link active" href="#"><i class="fas fa-home me-1"></i> 首页</a>
+                    </li>
+                    <li class="nav-item">
+                        <a class="nav-link" href="/health" target="_blank"><i class="fas fa-heart me-1"></i> 服务状态</a>
+                    </li>
+                    <li class="nav-item">
+                        <a class="nav-link" href="/model_info" target="_blank"><i class="fas fa-info-circle me-1"></i> 模型信息</a>
+                    </li>
+                </ul>
+            </div>
+        </div>
+    </nav>
+
+    <!-- 主内容区 -->
+    <div class="container mt-4">
+        <div class="row">
+            <!-- 左侧：输入表单 -->
+            <div class="col-lg-6">
+                <div class="card">
+                    <div class="card-header">
+                        <i class="fas fa-clipboard-list me-2"></i> 患者基本信息输入
+                    </div>
+                    <div class="card-body">
+                        <form id="predictionForm">
+                            <!-- 年龄 -->
+                            <div class="feature-group">
+                                <label class="feature-label" for="age">
+                                    <i class="fas fa-birthday-cake me-1"></i> 年龄（天）
+                                </label>
+                                <input type="number" class="form-control" id="age" name="age"
+                                       placeholder="请输入年龄（天数）" min="0" max="36500" required
+                                       data-bs-toggle="tooltip" data-bs-placement="top"
+                                       title="输入年龄，单位为天。例如：55岁 = 55 × 365 = 20075天">
+                                <div class="help-text">示例：55岁 ≈ 20075天</div>
+                            </div>
+
+                            <!-- 性别 -->
+                            <div class="feature-group">
+                                <label class="feature-label" for="gender">
+                                    <i class="fas fa-venus-mars me-1"></i> 性别
+                                </label>
+                                <select class="form-select" id="gender" name="gender" required>
+                                    <option value="">请选择性别</option>
+                                    <option value="1">女性</option>
+                                    <option value="2">男性</option>
+                                </select>
+                                <div class="help-text">1=女性，2=男性</div>
+                            </div>
+
+                            <div class="row">
+                                <!-- 身高 -->
+                                <div class="col-md-6">
+                                    <div class="feature-group">
+                                        <label class="feature-label" for="height">
+                                            <i class="fas fa-ruler-vertical me-1"></i> 身高（cm）
+                                        </label>
+                                        <input type="number" class="form-control" id="height" name="height"
+                                               placeholder="身高（厘米）" min="100" max="250" required>
+                                        <div class="help-text">范围：100-250 cm</div>
+                                    </div>
+                                </div>
+
+                                <!-- 体重 -->
+                                <div class="col-md-6">
+                                    <div class="feature-group">
+                                        <label class="feature-label" for="weight">
+                                            <i class="fas fa-weight me-1"></i> 体重（kg）
+                                        </label>
+                                        <input type="number" class="form-control" id="weight" name="weight"
+                                               placeholder="体重（千克）" min="20" max="300" required>
+                                        <div class="help-text">范围：20-300 kg</div>
+                                    </div>
+                                </div>
+                            </div>
+
+                            <div class="row">
+                                <!-- 收缩压 -->
+                                <div class="col-md-6">
+                                    <div class="feature-group">
+                                        <label class="feature-label" for="ap_hi">
+                                            <i class="fas fa-tachometer-alt me-1"></i> 收缩压（mmHg）
+                                        </label>
+                                        <input type="number" class="form-control" id="ap_hi" name="ap_hi"
+                                               placeholder="收缩压" min="50" max="300" required>
+                                        <div class="help-text">范围：50-300 mmHg</div>
+                                    </div>
+                                </div>
+
+                                <!-- 舒张压 -->
+                                <div class="col-md-6">
+                                    <div class="feature-group">
+                                        <label class="feature-label" for="ap_lo">
+                                            <i class="fas fa-tachometer-alt me-1"></i> 舒张压（mmHg）
+                                        </label>
+                                        <input type="number" class="form-control" id="ap_lo" name="ap_lo"
+                                               placeholder="舒张压" min="30" max="200" required>
+                                        <div class="help-text">范围：30-200 mmHg</div>
+                                    </div>
+                                </div>
+                            </div>
+
+                            <!-- 胆固醇水平 -->
+                            <div class="feature-group">
+                                <label class="feature-label" for="cholesterol">
+                                    <i class="fas fa-vial me-1"></i> 胆固醇水平
+                                </label>
+                                <select class="form-select" id="cholesterol" name="cholesterol" required>
+                                    <option value="">请选择胆固醇水平</option>
+                                    <option value="1">正常</option>
+                                    <option value="2">高于正常</option>
+                                    <option value="3">极高</option>
+                                </select>
+                                <div class="help-text">1=正常，2=高于正常，3=极高</div>
+                            </div>
+
+                            <!-- 血糖水平 -->
+                            <div class="feature-group">
+                                <label class="feature-label" for="gluc">
+                                    <i class="fas fa-vial me-1"></i> 血糖水平
+                                </label>
+                                <select class="form-select" id="gluc" name="gluc" required>
+                                    <option value="">请选择血糖水平</option>
+                                    <option value="1">正常</option>
+                                    <option value="2">高于正常</option>
+                                    <option value="3">极高</option>
+                                </select>
+                                <div class="help-text">1=正常，2=高于正常，3=极高</div>
+                            </div>
+
+                            <!-- 生活方式 -->
+                            <div class="row">
+                                <div class="col-md-4">
+                                    <div class="feature-group">
+                                        <label class="feature-label" for="smoke">
+                                            <i class="fas fa-smoking me-1"></i> 吸烟
+                                        </label>
+                                        <select class="form-select" id="smoke" name="smoke" required>
+                                            <option value="0">否</option>
+                                            <option value="1">是</option>
+                                        </select>
+                                    </div>
+                                </div>
+
+                                <div class="col-md-4">
+                                    <div class="feature-group">
+                                        <label class="feature-label" for="alco">
+                                            <i class="fas fa-wine-glass-alt me-1"></i> 饮酒
+                                        </label>
+                                        <select class="form-select" id="alco" name="alco" required>
+                                            <option value="0">否</option>
+                                            <option value="1">是</option>
+                                        </select>
+                                    </div>
+                                </div>
+
+                                <div class="col-md-4">
+                                    <div class="feature-group">
+                                        <label class="feature-label" for="active">
+                                            <i class="fas fa-running me-1"></i> 体育活动
+                                        </label>
+                                        <select class="form-select" id="active" name="active" required>
+                                            <option value="0">否</option>
+                                            <option value="1">是</option>
+                                        </select>
+                                    </div>
+                                </div>
+                            </div>
+
+                            <!-- 按钮组 -->
+                            <div class="d-grid gap-2 d-md-flex justify-content-md-end mt-4">
+                                <button type="button" class="btn btn-secondary me-md-2" id="btnReset">
+                                    <i class="fas fa-redo me-1"></i> 重置表单
+                                </button>
+                                <button type="submit" class="btn btn-primary" id="btnPredict">
+                                    <i class="fas fa-stethoscope me-1"></i> 开始预测
+                                </button>
+                            </div>
+                        </form>
+
+                        <!-- 加载动画 -->
+                        <div class="loading mt-4" id="loading">
+                            <div class="spinner-border text-primary spinner" role="status">
+                                <span class="visually-hidden">加载中...</span>
+                            </div>
+                            <p class="mt-3">正在分析数据，请稍候...</p>
+                        </div>
+                    </div>
+                </div>
+
+                <!-- 示例数据卡片 -->
+                <div class="card">
+                    <div class="card-header">
+                        <i class="fas fa-lightbulb me-2"></i> 示例数据
+                    </div>
+                    <div class="card-body">
+                        <p class="card-text">点击下方按钮填充示例数据：</p>
+                        <div class="d-grid gap-2">
+                            <button type="button" class="btn btn-outline-primary" id="btnExampleLow">
+                                <i class="fas fa-user-check me-1"></i> 低风险示例
+                            </button>
+                            <button type="button" class="btn btn-outline-warning" id="btnExampleMedium">
+                                <i class="fas fa-user me-1"></i> 中风险示例
+                            </button>
+                            <button type="button" class="btn btn-outline-danger" id="btnExampleHigh">
+                                <i class="fas fa-user-injured me-1"></i> 高风险示例
+                            </button>
+                        </div>
+                    </div>
+                </div>
+            </div>
+
+            <!-- 右侧：结果显示 -->
+            <div class="col-lg-6">
+                <div class="card result-card">
+                    <div class="card-header">
+                        <i class="fas fa-chart-line me-2"></i> 预测结果分析
+                    </div>
+                    <div class="card-body">
+                        <div id="resultPlaceholder" class="text-center">
+                            <i class="fas fa-chart-bar fa-4x text-muted mb-3"></i>
+                            <h5 class="text-muted">等待预测结果</h5>
+                            <p class="text-muted">填写左侧表单并点击"开始预测"按钮，系统将分析您的心血管疾病风险。</p>
+                        </div>
+
+                        <div id="resultContent" style="display: none;">
+                            <!-- 风险等级 -->
+                            <div class="alert" id="riskAlert">
+                                <h4 class="alert-heading" id="riskTitle"></h4>
+                                <p id="riskDescription"></p>
+                                <hr>
+                                <p class="mb-0" id="riskRecommendation"></p>
+                            </div>
+
+                            <!-- 预测结果详情 -->
+                            <div class="mt-4">
+                                <h5><i class="fas fa-info-circle me-2"></i> 预测详情</h5>
+                                <table class="table table-borderless">
+                                    <tr>
+                                        <th width="40%">预测结果：</th>
+                                        <td><span class="badge bg-primary" id="predictionResult"></span></td>
+                                    </tr>
+                                    <tr>
+                                        <th>患病概率：</th>
+                                        <td><span class="feature-value" id="probabilityValue"></span></td>
+                                    </tr>
+                                    <tr>
+                                        <th>风险等级：</th>
+                                        <td><span id="riskLevel"></span></td>
+                                    </tr>
+                                    <tr>
+                                        <th>处理后的年龄：</th>
+                                        <td><span class="feature-value" id="ageYears"></span> 岁</td>
+                                    </tr>
+                                    <tr>
+                                        <th>身体质量指数（BMI）：</th>
+                                        <td><span class="feature-value" id="bmiValue"></span></td>
+                                    </tr>
+                                </table>
+                            </div>
+
+                            <!-- 特征总结 -->
+                            <div class="mt-4">
+                                <h5><i class="fas fa-list-ul me-2"></i> 输入特征总结</h5>
+                                <div class="row" id="featureSummary">
+                                    <!-- 特征将通过JavaScript动态填充 -->
+                                </div>
+                            </div>
+
+                            <!-- 行动建议 -->
+                            <div class="alert alert-info mt-4">
+                                <h5><i class="fas fa-hands-helping me-2"></i> 健康建议</h5>
+                                <ul id="healthAdvice">
+                                    <!-- 建议将通过JavaScript动态填充 -->
+                                </ul>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+
+                <!-- 系统信息 -->
+                <div class="card">
+                    <div class="card-header">
+                        <i class="fas fa-cogs me-2"></i> 系统信息
+                    </div>
+                    <div class="card-body">
+                        <div class="row">
+                            <div class="col-md-6">
+                                <p><strong><i class="fas fa-server me-2"></i> 服务状态：</strong>
+                                    <span class="badge bg-success" id="serviceStatus">正常</span>
+                                </p>
+                                <p><strong><i class="fas fa-brain me-2"></i> 预测模型：</strong>
+                                    <span id="modelName">CardioAI XGBoost</span>
+                                </p>
+                            </div>
+                            <div class="col-md-6">
+                                <p><strong><i class="fas fa-history me-2"></i> 响应时间：</strong>
+                                    <span id="responseTime">--</span> ms
+                                </p>
+                                <p><strong><i class="fas fa-calendar-alt me-2"></i> 最后更新：</strong>
+                                    <span id="lastUpdate">2024-04-02</span>
+                                </p>
+                            </div>
+                        </div>
+                        <div class="d-grid gap-2 d-md-flex justify-content-md-end mt-2">
+                            <button class="btn btn-sm btn-outline-secondary" id="btnRefreshStatus">
+                                <i class="fas fa-sync-alt me-1"></i> 刷新状态
+                            </button>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <!-- 页脚 -->
+    <footer>
+        <div class="container text-center">
+            <p class="mb-2">
+                <i class="fas fa-heartbeat me-2 heart-icon"></i>
+                CardioAI - 心血管疾病智能辅助系统 v1.0
+            </p>
+            <p class="small mb-0">
+                本系统基于机器学习模型提供风险评估，结果仅供参考，不能替代专业医疗诊断。
+                <br>
+                如有健康问题，请及时咨询专业医生。
+            </p>
+        </div>
+    </footer>
+
+    <!-- Bootstrap JavaScript -->
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
+
+    <!-- 自定义JavaScript -->
+    <script>
+        // 页面加载完成后初始化
+        document.addEventListener('DOMContentLoaded', function() {
+            // 初始化工具提示
+            const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
+            tooltipTriggerList.map(function (tooltipTriggerEl) {
+                return new bootstrap.Tooltip(tooltipTriggerEl);
+            });
+
+            // 检查服务状态
+            checkServiceStatus();
+
+            // 绑定事件
+            document.getElementById('predictionForm').addEventListener('submit', handlePrediction);
+            document.getElementById('btnReset').addEventListener('click', resetForm);
+            document.getElementById('btnRefreshStatus').addEventListener('click', checkServiceStatus);
+
+            // 示例数据按钮
+            document.getElementById('btnExampleLow').addEventListener('click', () => fillExampleData('low'));
+            document.getElementById('btnExampleMedium').addEventListener('click', () => fillExampleData('medium'));
+            document.getElementById('btnExampleHigh').addEventListener('click', () => fillExampleData('high'));
+
+            // 初始填充低风险示例
+            setTimeout(() => fillExampleData('low'), 500);
+        });
+
+        // 检查服务状态
+        async function checkServiceStatus() {
+            try {
+                const response = await fetch('/health');
+                const data = await response.json();
+
+                if (data.status === 'healthy') {
+                    document.getElementById('serviceStatus').className = 'badge bg-success';
+                    document.getElementById('serviceStatus').textContent = '正常';
+                    document.getElementById('modelName').textContent = data.model_version || 'CardioAI XGBoost';
+                } else {
+                    document.getElementById('serviceStatus').className = 'badge bg-danger';
+                    document.getElementById('serviceStatus').textContent = '异常';
+                }
+            } catch (error) {
+                console.error('服务状态检查失败:', error);
+                document.getElementById('serviceStatus').className = 'badge bg-danger';
+                document.getElementById('serviceStatus').textContent = '连接失败';
+            }
+        }
+
+        // 处理预测表单提交
+        async function handlePrediction(event) {
+            event.preventDefault();
+
+            // 显示加载动画
+            document.getElementById('loading').style.display = 'block';
+            document.getElementById('btnPredict').disabled = true;
+
+            // 收集表单数据
+            const formData = {
+                age: parseInt(document.getElementById('age').value),
+                gender: parseInt(document.getElementById('gender').value),
+                height: parseInt(document.getElementById('height').value),
+                weight: parseInt(document.getElementById('weight').value),
+                ap_hi: parseInt(document.getElementById('ap_hi').value),
+                ap_lo: parseInt(document.getElementById('ap_lo').value),
+                cholesterol: parseInt(document.getElementById('cholesterol').value),
+                gluc: parseInt(document.getElementById('gluc').value),
+                smoke: parseInt(document.getElementById('smoke').value),
+                alco: parseInt(document.getElementById('alco').value),
+                active: parseInt(document.getElementById('active').value)
+            };
+
+            // 验证血压
+            if (formData.ap_lo >= formData.ap_hi) {
+                alert('错误：舒张压不能高于或等于收缩压');
+                document.getElementById('loading').style.display = 'none';
+                document.getElementById('btnPredict').disabled = false;
+                return;
+            }
+
+            try {
+                const startTime = Date.now();
+
+                // 发送预测请求
+                const response = await fetch('/predict_cardio', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify(formData)
+                });
+
+                const responseTime = Date.now() - startTime;
+                document.getElementById('responseTime').textContent = responseTime;
+
+                const data = await response.json();
+
+                // 隐藏加载动画
+                document.getElementById('loading').style.display = 'none';
+                document.getElementById('btnPredict').disabled = false;
+
+                if (data.success) {
+                    // 显示结果
+                    displayPredictionResult(data);
+                } else {
+                    alert('预测失败：' + data.message);
+                }
+
+            } catch (error) {
+                console.error('预测请求失败:', error);
+                document.getElementById('loading').style.display = 'none';
+                document.getElementById('btnPredict').disabled = false;
+                alert('网络请求失败，请检查服务器状态');
+            }
+        }
+
+        // 显示预测结果
+        function displayPredictionResult(data) {
+            // 隐藏占位符，显示结果内容
+            document.getElementById('resultPlaceholder').style.display = 'none';
+            document.getElementById('resultContent').style.display = 'block';
+
+            // 更新预测结果
+            const predictionText = data.prediction === 1 ? '有心血管疾病风险' : '无心血管疾病风险';
+            document.getElementById('predictionResult').textContent = predictionText;
+
+            // 更新概率
+            const probabilityPercent = (data.probability * 100).toFixed(1);
+            document.getElementById('probabilityValue').textContent = `${probabilityPercent}%`;
+
+            // 更新风险等级
+            let riskClass = '';
+            let riskIcon = '';
+            if (data.risk_level === '低危') {
+                riskClass = 'risk-low';
+                riskIcon = 'fa-smile';
+            } else if (data.risk_level === '中危') {
+                riskClass = 'risk-medium';
+                riskIcon = 'fa-meh';
+            } else {
+                riskClass = 'risk-high';
+                riskIcon = 'fa-frown';
+            }
+
+            document.getElementById('riskLevel').innerHTML =
+                `<i class="fas ${riskIcon} me-1"></i><span class="${riskClass}">${data.risk_level}</span>`;
+
+            // 更新风险警告框
+            const alertElement = document.getElementById('riskAlert');
+            if (data.risk_level === '低危') {
+                alertElement.className = 'alert alert-success';
+                alertElement.innerHTML = `
+                    <h4 class="alert-heading"><i class="fas fa-thumbs-up me-2"></i> 低风险</h4>
+                    <p>根据模型分析，您当前的心血管疾病风险较低。继续保持健康的生活方式！</p>
+                    <hr>
+                    <p class="mb-0">建议定期进行健康检查，维持当前的健康状态。</p>
+                `;
+            } else if (data.risk_level === '中危') {
+                alertElement.className = 'alert alert-warning';
+                alertElement.innerHTML = `
+                    <h4 class="alert-heading"><i class="fas fa-exclamation-triangle me-2"></i> 中风险</h4>
+                    <p>根据模型分析，您有一定的心血管疾病风险，建议关注相关健康指标。</p>
+                    <hr>
+                    <p class="mb-0">建议改善生活方式，并考虑进行更详细的医学检查。</p>
+                `;
+            } else {
+                alertElement.className = 'alert alert-danger';
+                alertElement.innerHTML = `
+                    <h4 class="alert-heading"><i class="fas fa-exclamation-circle me-2"></i> 高风险</h4>
+                    <p>根据模型分析，您的心血管疾病风险较高，建议尽快咨询专业医生。</p>
+                    <hr>
+                    <p class="mb-0">请及时就医，进行全面的心血管健康评估。</p>
+                `;
+            }
+
+            // 更新处理后的特征
+            document.getElementById('ageYears').textContent = data.features.age_years;
+            document.getElementById('bmiValue').textContent = data.features.bmi;
+
+            // 更新特征总结
+            const featureSummary = document.getElementById('featureSummary');
+            featureSummary.innerHTML = `
+                <div class="col-md-6">
+                    <p><strong>年龄：</strong> ${data.features.age_years} 岁</p>
+                    <p><strong>性别：</strong> ${data.features.gender === 1 ? '女性' : '男性'}</p>
+                    <p><strong>BMI：</strong> ${data.features.bmi}</p>
+                    <p><strong>血压：</strong> ${data.features.ap_hi}/${data.features.ap_lo} mmHg</p>
+                </div>
+                <div class="col-md-6">
+                    <p><strong>胆固醇：</strong> ${getCholesterolText(data.features.cholesterol)}</p>
+                    <p><strong>血糖：</strong> ${getGlucText(data.features.gluc)}</p>
+                    <p><strong>吸烟：</strong> ${data.features.smoke === 1 ? '是' : '否'}</p>
+                    <p><strong>饮酒：</strong> ${data.features.alco === 1 ? '是' : '否'}</p>
+                    <p><strong>体育活动：</strong> ${data.features.active === 1 ? '是' : '否'}</p>
+                </div>
+            `;
+
+            // 更新健康建议
+            const healthAdvice = document.getElementById('healthAdvice');
+            let adviceItems = [];
+
+            if (data.features.bmi > 25) {
+                adviceItems.push('<li>您的BMI偏高，建议控制体重，保持健康饮食</li>');
+            }
+
+            if (data.features.ap_hi > 140 || data.features.ap_lo > 90) {
+                adviceItems.push('<li>您的血压偏高，建议定期监测血压，减少盐分摄入</li>');
+            }
+
+            if (data.features.cholesterol > 1) {
+                adviceItems.push('<li>您的胆固醇水平偏高，建议减少高胆固醇食物摄入</li>');
+            }
+
+            if (data.features.gluc > 1) {
+                adviceItems.push('<li>您的血糖水平偏高，建议控制糖分摄入，定期监测血糖</li>');
+            }
+
+            if (data.features.smoke === 1) {
+                adviceItems.push('<li>吸烟是心血管疾病的重要风险因素，建议戒烟</li>');
+            }
+
+            if (data.features.active === 0) {
+                adviceItems.push('<li>缺乏体育活动，建议每周进行至少150分钟的中等强度运动</li>');
+            }
+
+            if (adviceItems.length === 0) {
+                adviceItems.push('<li>保持当前健康的生活方式，定期进行体检</li>');
+            }
+
+            healthAdvice.innerHTML = adviceItems.join('');
+        }
+
+        // 重置表单
+        function resetForm() {
+            document.getElementById('predictionForm').reset();
+            document.getElementById('resultPlaceholder').style.display = 'block';
+            document.getElementById('resultContent').style.display = 'none';
+        }
+
+        // 填充示例数据
+        function fillExampleData(type) {
+            let exampleData;
+
+            switch(type) {
+                case 'low':
+                    exampleData = {
+                        age: 18000,      // 约49岁
+                        gender: 1,        // 女性
+                        height: 165,
+                        weight: 60,
+                        ap_hi: 120,
+                        ap_lo: 80,
+                        cholesterol: 1,
+                        gluc: 1,
+                        smoke: 0,
+                        alco: 0,
+                        active: 1
+                    };
+                    break;
+
+                case 'medium':
+                    exampleData = {
+                        age: 25000,      // 约68岁
+                        gender: 2,        // 男性
+                        height: 170,
+                        weight: 80,
+                        ap_hi: 140,
+                        ap_lo: 90,
+                        cholesterol: 2,
+                        gluc: 1,
+                        smoke: 1,
+                        alco: 1,
+                        active: 0
+                    };
+                    break;
+
+                case 'high':
+                    exampleData = {
+                        age: 30000,      // 约82岁
+                        gender: 2,        // 男性
+                        height: 168,
+                        weight: 95,
+                        ap_hi: 160,
+                        ap_lo: 100,
+                        cholesterol: 3,
+                        gluc: 2,
+                        smoke: 1,
+                        alco: 1,
+                        active: 0
+                    };
+                    break;
+            }
+
+            // 填充表单
+            for (const [key, value] of Object.entries(exampleData)) {
+                const element = document.getElementById(key);
+                if (element) {
+                    element.value = value;
+                }
+            }
+
+            // 重置结果显示
+            document.getElementById('resultPlaceholder').style.display = 'block';
+            document.getElementById('resultContent').style.display = 'none';
+        }
+
+        // 辅助函数：获取胆固醇文本描述
+        function getCholesterolText(value) {
+            switch(value) {
+                case 1: return '正常';
+                case 2: return '高于正常';
+                case 3: return '极高';
+                default: return '未知';
+            }
+        }
+
+        // 辅助函数：获取血糖文本描述
+        function getGlucText(value) {
+            switch(value) {
+                case 1: return '正常';
+                case 2: return '高于正常';
+                case 3: return '极高';
+                default: return '未知';
+            }
+        }
+    </script>
+</body>
+</html>
--- a/test/module2_predictor/test_api.py
+++ b/test/module2_predictor/test_api.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+CardioAI API测试脚本
+
+测试模型加载和预测功能
+"""
+
+import sys
+import os
+import json
+from pathlib import Path
+
+# 添加项目根目录到Python路径
+project_root = Path(__file__).parent.parent
+sys.path.append(str(project_root))
+
+# 导入Flask应用中的函数
+from app import load_model, preprocess_input
+
+def test_model_loading():
+    """测试模型加载"""
+    print("测试模型加载...")
+    try:
+        success = load_model()
+        if success:
+            print("✅ 模型加载成功")
+            return True
+        else:
+            print("❌ 模型加载失败")
+            return False
+    except Exception as e:
+        print(f"❌ 模型加载异常: {str(e)}")
+        return False
+
+def test_data_preprocessing():
+    """测试数据预处理"""
+    print("\n测试数据预处理...")
+
+    # 测试数据
+    test_data = {
+        "age": 20228,        # 约55岁
+        "gender": 1,         # 女性
+        "height": 156,       # 身高（cm）
+        "weight": 85,        # 体重（kg）
+        "ap_hi": 140,        # 收缩压（mmHg）
+        "ap_lo": 90,         # 舒张压（mmHg）
+        "cholesterol": 1,    # 胆固醇水平
+        "gluc": 1,           # 血糖水平
+        "smoke": 0,          # 吸烟
+        "alco": 0,           # 饮酒
+        "active": 1          # 体育活动
+    }
+
+    try:
+        processed_df = preprocess_input(test_data)
+        print(f"✅ 数据预处理成功")
+        print(f"   处理后的特征:")
+        for col in processed_df.columns:
+            print(f"     {col}: {processed_df[col].iloc[0]}")
+        return True
+    except Exception as e:
+        print(f"❌ 数据预处理失败: {str(e)}")
+        return False
+
+def test_prediction():
+    """测试预测功能"""
+    print("\n测试预测功能...")
+
+    # 需要导入pipeline
+    from app import pipeline
+
+    if pipeline is None:
+        print("❌ 模型未加载，无法测试预测")
+        return False
+
+    # 测试数据
+    test_data = {
+        "age": 20228,
+        "gender": 1,
+        "height": 156,
+        "weight": 85,
+        "ap_hi": 140,
+        "ap_lo": 90,
+        "cholesterol": 1,
+        "gluc": 1,
+        "smoke": 0,
+        "alco": 0,
+        "active": 1
+    }
+
+    try:
+        processed_df = preprocess_input(test_data)
+        prediction = pipeline.predict(processed_df)[0]
+        probability = pipeline.predict_proba(processed_df)[0][1]
+
+        print(f"✅ 预测成功")
+        print(f"   预测结果: {prediction} ({'有风险' if prediction == 1 else '无风险'})")
+        print(f"   患病概率: {probability:.4f} ({(probability*100):.1f}%)")
+
+        # 确定风险等级
+        if probability < 0.3:
+            risk_level = "低危"
+        elif probability < 0.6:
+            risk_level = "中危"
+        else:
+            risk_level = "高危"
+
+        print(f"   风险等级: {risk_level}")
+        return True
+    except Exception as e:
+        print(f"❌ 预测失败: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+def test_api_endpoint():
+    """测试API端点（需要启动服务器）"""
+    print("\n测试API端点...")
+    print("注意：此测试需要Flask服务器正在运行")
+    print("请先启动Flask应用，然后运行此测试")
+
+    # 这里可以添加实际的HTTP请求测试
+    # 但为了简单起见，我们只是提示用户
+    print("使用以下命令启动服务器:")
+    print('  cd "D:\\Project\\PythonProject\\AIcode\\test"')
+    print('  "D:\\software\\anaconda\\envs\\cardioenv\\python.exe" module2_predictor/app.py')
+    print("\n然后使用curl或浏览器测试API:")
+    print('  curl -X POST http://localhost:5000/predict_cardio \\')
+    print('       -H "Content-Type: application/json" \\')
+    print('       -d \'{"age":20228,"gender":1,"height":156,"weight":85,"ap_hi":140,"ap_lo":90,"cholesterol":1,"gluc":1,"smoke":0,"alco":0,"active":1}\'')
+
+def main():
+    """主测试函数"""
+    print("=" * 60)
+    print("CardioAI API 测试")
+    print("=" * 60)
+
+    # 测试模型加载
+    model_loaded = test_model_loading()
+
+    if model_loaded:
+        # 测试数据预处理
+        preprocessing_ok = test_data_preprocessing()
+
+        # 测试预测功能
+        prediction_ok = test_prediction()
+
+        # 汇总结果
+        print("\n" + "=" * 60)
+        print("测试结果汇总:")
+        print(f"  模型加载: {'✅ 通过' if model_loaded else '❌ 失败'}")
+        print(f"  数据预处理: {'✅ 通过' if preprocessing_ok else '❌ 失败'}")
+        print(f"  预测功能: {'✅ 通过' if prediction_ok else '❌ 失败'}")
+
+        if model_loaded and preprocessing_ok and prediction_ok:
+            print("\n🎉 所有测试通过！")
+            print("Flask API可以正常运行。")
+            return True
+        else:
+            print("\n⚠️ 部分测试失败，请检查问题。")
+            return False
+    else:
+        print("\n❌ 模型加载失败，无法继续测试。")
+        return False
+
+    # 显示API测试说明
+    test_api_endpoint()
+
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)
--- a/test/module2_predictor/train_and_save.py
+++ b/test/module2_predictor/train_and_save.py
@@ -0,0 +1,331 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+CardioAI - 心血管疾病预测模型训练脚本
+
+功能：
+1. 加载和清洗数据（与模块1相同的流程）
+2. 特征工程：年龄转换、BMI计算、异常值处理
+3. 构建机器学习Pipeline
+4. 训练XGBoost分类器
+5. 保存完整Pipeline到文件
+
+注意：此脚本为一次性训练脚本，生成模型文件供Flask应用使用。
+"""
+
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from xgboost import XGBClassifier
+import joblib
+import warnings
+import sys
+import os
+from pathlib import Path
+
+# 忽略警告
+warnings.filterwarnings('ignore')
+
+# 添加项目根目录到Python路径
+project_root = Path(__file__).parent.parent
+sys.path.append(str(project_root))
+
+def load_and_preprocess_data():
+    """
+    加载数据并进行预处理（与模块1相同的清洗和特征工程）
+
+    返回：
+        pd.DataFrame: 预处理后的数据框
+    """
+    print("开始加载和预处理数据...")
+
+    # 数据文件路径
+    data_path = project_root / "data" / "心血管疾病.xlsx"
+
+    try:
+        # 加载数据
+        df = pd.read_excel(data_path)
+        print(f"原始数据形状: {df.shape}")
+
+        # 检查必要列
+        required_columns = ['id', 'age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
+                           'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio']
+        missing_columns = [col for col in required_columns if col not in df.columns]
+        if missing_columns:
+            raise ValueError(f"数据文件中缺少必要列: {missing_columns}")
+
+        # 创建数据副本
+        df_processed = df.copy()
+
+        # 1. 年龄转换：从天转换为年（四舍五入）
+        df_processed['age_years'] = (df_processed['age'] / 365.25).round().astype(int)
+
+        # 2. 计算BMI: BMI = weight(kg) / (height(m)^2)
+        df_processed['bmi'] = df_processed['weight'] / ((df_processed['height'] / 100) ** 2)
+        df_processed['bmi'] = df_processed['bmi'].round(2)
+
+        # 3. 异常值处理
+        # 删除舒张压 >= 收缩压的记录
+        invalid_bp = df_processed['ap_lo'] >= df_processed['ap_hi']
+        if invalid_bp.any():
+            print(f"删除 {invalid_bp.sum()} 条舒张压 >= 收缩压的异常记录")
+            df_processed = df_processed[~invalid_bp].copy()
+
+        # 删除血压极端异常值
+        # 收缩压 ∈ [90, 250], 舒张压 ∈ [60, 150]
+        bp_outliers = ~((df_processed['ap_hi'] >= 90) & (df_processed['ap_hi'] <= 250) &
+                       (df_processed['ap_lo'] >= 60) & (df_processed['ap_lo'] <= 150))
+        if bp_outliers.any():
+            print(f"删除 {bp_outliers.sum()} 条血压极端异常值记录")
+            df_processed = df_processed[~bp_outliers].copy()
+
+        # 4. 删除不需要的列
+        # 删除id和原始age字段（使用转换后的age_years）
+        df_processed = df_processed.drop(['id', 'age'], axis=1)
+
+        print(f"预处理后数据形状: {df_processed.shape}")
+        print("数据预处理完成！")
+
+        return df_processed
+
+    except Exception as e:
+        print(f"数据加载和预处理失败: {str(e)}")
+        raise
+
+def prepare_features_and_target(df):
+    """
+    准备特征矩阵X和目标向量y
+
+    参数：
+        df: 预处理后的数据框
+
+    返回：
+        X: 特征矩阵
+        y: 目标向量
+        feature_names: 特征名称列表
+    """
+    print("准备特征和目标变量...")
+
+    # 目标变量
+    y = df['cardio'].values
+
+    # 特征矩阵 - 删除目标变量
+    X = df.drop('cardio', axis=1)
+
+    print(f"特征矩阵形状: {X.shape}")
+    print(f"目标变量分布: 0={sum(y==0)}, 1={sum(y==1)}")
+
+    return X, y, X.columns.tolist()
+
+def build_pipeline():
+    """
+    构建机器学习Pipeline
+
+    返回：
+        Pipeline: 包含预处理和分类器的完整Pipeline
+    """
+    print("构建机器学习Pipeline...")
+
+    # 定义特征类型
+    # 连续特征：需要标准化
+    numerical_features = ['age_years', 'bmi', 'ap_hi', 'ap_lo']
+
+    # 分类特征：需要独热编码
+    categorical_features = ['gender', 'cholesterol', 'gluc']
+
+    # 二元特征：直接使用（不需要编码）
+    binary_features = ['smoke', 'alco', 'active']
+
+    # 所有特征顺序
+    all_features = numerical_features + categorical_features + binary_features
+
+    # 创建列转换器
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ('num', StandardScaler(), numerical_features),
+            ('cat', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'),
+             categorical_features),
+            # 二元特征直接通过（不进行变换）
+            ('binary', 'passthrough', binary_features)
+        ],
+        remainder='drop'  # 丢弃其他列
+    )
+
+    # 创建完整Pipeline
+    pipeline = Pipeline([
+        ('preprocessor', preprocessor),
+        ('classifier', XGBClassifier(
+            n_estimators=100,
+            max_depth=5,
+            learning_rate=0.1,
+            subsample=0.8,
+            colsample_bytree=0.8,
+            random_state=42,
+            eval_metric='logloss',
+            use_label_encoder=False
+        ))
+    ])
+
+    print("Pipeline构建完成！")
+    return pipeline, all_features
+
+def train_model(X, y, pipeline):
+    """
+    训练模型
+
+    参数：
+        X: 特征矩阵
+        y: 目标向量
+        pipeline: 机器学习Pipeline
+
+    返回：
+        训练好的Pipeline
+    """
+    print("开始训练模型...")
+
+    # 划分训练集和测试集
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+
+    print(f"训练集大小: {X_train.shape}")
+    print(f"测试集大小: {X_test.shape}")
+
+    # 训练模型
+    pipeline.fit(X_train, y_train)
+
+    # 评估模型
+    train_score = pipeline.score(X_train, y_train)
+    test_score = pipeline.score(X_test, y_test)
+
+    print(f"训练集准确率: {train_score:.4f}")
+    print(f"测试集准确率: {test_score:.4f}")
+
+    # 特征重要性（如果可用）
+    if hasattr(pipeline.named_steps['classifier'], 'feature_importances_'):
+        importances = pipeline.named_steps['classifier'].feature_importances_
+        print(f"特征重要性数量: {len(importances)}")
+
+        # 获取特征名称（需要从预处理器中提取）
+        preprocessor = pipeline.named_steps['preprocessor']
+
+        # 获取转换后的特征名称
+        feature_names = []
+
+        # 数值特征名称
+        feature_names.extend(preprocessor.transformers_[0][2])
+
+        # 分类特征名称（独热编码后）
+        if len(preprocessor.transformers_) > 1:
+            cat_encoder = preprocessor.transformers_[1][1]
+            if hasattr(cat_encoder, 'get_feature_names_out'):
+                cat_features = cat_encoder.get_feature_names_out(
+                    preprocessor.transformers_[1][2]
+                )
+                feature_names.extend(cat_features)
+
+        # 二元特征名称
+        if len(preprocessor.transformers_) > 2:
+            feature_names.extend(preprocessor.transformers_[2][2])
+
+        # 打印最重要的特征
+        if len(feature_names) == len(importances):
+            print("\nTop 10 特征重要性:")
+            indices = np.argsort(importances)[::-1]
+            for i in range(min(10, len(importances))):
+                print(f"  {feature_names[indices[i]]}: {importances[indices[i]]:.4f}")
+
+    return pipeline
+
+def save_pipeline(pipeline, all_features):
+    """
+    保存Pipeline到文件
+
+    参数：
+        pipeline: 训练好的Pipeline
+        all_features: 特征名称列表
+    """
+    print("保存模型和特征信息...")
+
+    # 创建模型保存目录
+    model_dir = Path(__file__).parent / "models"
+    model_dir.mkdir(exist_ok=True)
+
+    # 模型文件路径
+    model_path = model_dir / "cardio_predictor_model.pkl"
+
+    # 保存Pipeline对象
+    model_data = {
+        'pipeline': pipeline,
+        'feature_names': all_features,
+        'model_version': '1.0.0',
+        'description': 'CardioAI心血管疾病预测模型'
+    }
+
+    joblib.dump(model_data, model_path)
+    print(f"模型已保存到: {model_path}")
+
+    # 保存特征信息到单独文件（可选）
+    features_path = model_dir / "feature_info.txt"
+    with open(features_path, 'w', encoding='utf-8') as f:
+        f.write("CardioAI模型特征信息\n")
+        f.write("=" * 50 + "\n\n")
+        f.write("特征列表（按输入顺序）:\n")
+        for i, feature in enumerate(all_features, 1):
+            f.write(f"{i:2d}. {feature}\n")
+
+        f.write("\n\n特征说明:\n")
+        f.write("- age_years: 年龄（岁），由原始天数转换而来\n")
+        f.write("- bmi: 身体质量指数，计算公式：体重(kg) / (身高(m)^2)\n")
+        f.write("- ap_hi: 收缩压（mmHg）\n")
+        f.write("- ap_lo: 舒张压（mmHg）\n")
+        f.write("- gender: 性别（1=女性，2=男性）\n")
+        f.write("- cholesterol: 胆固醇水平（1=正常，2=高于正常，3=极高）\n")
+        f.write("- gluc: 血糖水平（1=正常，2=高于正常，3=极高）\n")
+        f.write("- smoke: 吸烟（0=否，1=是）\n")
+        f.write("- alco: 饮酒（0=否，1=是）\n")
+        f.write("- active: 体育活动（0=否，1=是）\n")
+
+    print(f"特征信息已保存到: {features_path}")
+
+    return model_path
+
+def main():
+    """主函数"""
+    print("=" * 60)
+    print("CardioAI - 心血管疾病预测模型训练")
+    print("=" * 60)
+
+    try:
+        # 1. 加载和预处理数据
+        df = load_and_preprocess_data()
+
+        # 2. 准备特征和目标
+        X, y, original_features = prepare_features_and_target(df)
+
+        # 3. 构建Pipeline
+        pipeline, all_features = build_pipeline()
+
+        # 4. 训练模型
+        trained_pipeline = train_model(X, y, pipeline)
+
+        # 5. 保存模型
+        model_path = save_pipeline(trained_pipeline, all_features)
+
+        print("\n" + "=" * 60)
+        print("模型训练完成！")
+        print(f"模型文件: {model_path}")
+        print("下一步：使用Flask应用部署模型")
+        print("=" * 60)
+
+    except Exception as e:
+        print(f"\n训练过程出现错误: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
--- a/test/requirements.txt
+++ b/test/requirements.txt
@@ -0,0 +1,33 @@
+# CardioAI - Cardiovascular Disease Intelligent Assistant System
+# Python dependencies for the multi-module application
+
+# Conda Environment Setup Instructions:
+# 1. Create a new conda environment named 'cardioenv' with Python 3.10:
+#    conda create -n cardioenv python=3.10
+# 2. Activate the environment:
+#    conda activate cardioenv
+# 3. Install dependencies from this file:
+#    pip install -r requirements.txt
+
+# Core data processing and machine learning
+pandas>=2.0.0
+openpyxl>=3.1.0
+numpy>=1.24.0
+scikit-learn>=1.3.0
+xgboost>=2.0.0
+joblib>=1.3.0
+
+# Data visualization and dashboard
+streamlit>=1.28.0
+plotly>=5.18.0
+
+# Web API and prediction server
+Flask>=3.0.0
+
+# Environment configuration
+python-dotenv>=1.0.0
+
+# AI and language model integration
+langchain-openai>=0.0.5
+dashscope>=1.14.0
+requests>=2.31.0
--- a/1
+++ b/1
--- a/1
+++ b/1
Author	SHA1	Message	Date
group04_tangzhuoyang	9c040b3dec	Update code	2026-04-02 19:52:38 +08:00
group04_tangzhuoyang	f77195a9d7	A	2026-04-02 17:16:19 +08:00