Compare commits
2 Commits
420bbac9d7
...
9c040b3dec
| Author | SHA1 | Date | |
|---|---|---|---|
| 9c040b3dec | |||
| f77195a9d7 |
13
.claude/settings.local.json
Normal file
13
.claude/settings.local.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(py:*)",
|
||||
"Bash(python math.py)",
|
||||
"Bash(git remote:*)",
|
||||
"Bash(git config:*)",
|
||||
"Bash(git add:*)",
|
||||
"Bash(git commit:*)",
|
||||
"Bash(git push:*)"
|
||||
]
|
||||
}
|
||||
}
|
||||
8
.idea/AIcode.iml
generated
Normal file
8
.idea/AIcode.iml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="D:\software\anaconda" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/AIcode.iml" filepath="$PROJECT_DIR$/.idea/AIcode.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
46
.idea/workspace.xml
generated
Normal file
46
.idea/workspace.xml
generated
Normal file
@@ -0,0 +1,46 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="c2fd381e-9b12-4d32-b4e8-24d694013b53" name="更改" comment="" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="ProjectColorInfo"><![CDATA[{
|
||||
"associatedIndex": 7
|
||||
}]]></component>
|
||||
<component name="ProjectId" id="3BmcLYQM9mcuqt9z3i5B1kZMOfO" />
|
||||
<component name="ProjectViewState">
|
||||
<option name="hideEmptyMiddlePackages" value="true" />
|
||||
<option name="showLibraryContents" value="true" />
|
||||
</component>
|
||||
<component name="PropertiesComponent"><![CDATA[{
|
||||
"keyToString": {
|
||||
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||
"nodejs_package_manager_path": "npm",
|
||||
"settings.editor.selected.configurable": "preferences.lookFeel",
|
||||
"vue.rearranger.settings.migration": "true"
|
||||
}
|
||||
}]]></component>
|
||||
<component name="SharedIndexes">
|
||||
<attachedChunks>
|
||||
<set>
|
||||
<option value="bundled-js-predefined-d6986cc7102b-6a121458b545-JavaScript-PY-251.25410.159" />
|
||||
<option value="bundled-python-sdk-e0ed3721d81e-36ea0e71a18c-com.jetbrains.pycharm.pro.sharedIndexes.bundled-PY-251.25410.159" />
|
||||
</set>
|
||||
</attachedChunks>
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="默认任务">
|
||||
<changelist id="c2fd381e-9b12-4d32-b4e8-24d694013b53" name="更改" comment="" />
|
||||
<created>1775097630560</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1775097630560</updated>
|
||||
<workItem from="1775097631689" duration="2000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
</project>
|
||||
15
test/.claude/settings.local.json
Normal file
15
test/.claude/settings.local.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(python -c \"import pandas as pd; df = pd.read_excel\\('data/心血管疾病.xlsx', nrows=5\\); print\\('Columns:', df.columns.tolist\\(\\)\\); print\\('Data types:', df.dtypes\\); print\\('Sample data:'\\); print\\(df.head\\(\\)\\)\")",
|
||||
"Bash(\"D:\\\\software\\\\anaconda\\\\Scripts\\\\conda.exe\" run:*)",
|
||||
"Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" module1_dashboard/test_data.py)",
|
||||
"Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" -m streamlit run module1_dashboard/cardio_dashboard.py --help)",
|
||||
"Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" -c \"import pandas as pd; df = pd.read_excel\\('data/心血管疾病.xlsx', nrows=10\\); print\\('Columns:', df.columns.tolist\\(\\)\\); print\\('\\\\nData types:'\\); print\\(df.dtypes\\); print\\('\\\\nSample data:'\\); print\\(df[['age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo', 'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio']].head\\(\\)\\)\")",
|
||||
"Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" -m py_compile module2_predictor/train_and_save.py)",
|
||||
"Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" module2_predictor/train_and_save.py)",
|
||||
"Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" -m py_compile module2_predictor/app.py)",
|
||||
"Bash(\"D:\\\\software\\\\anaconda\\\\envs\\\\cardioenv\\\\python.exe\" module2_predictor/test_api.py)"
|
||||
]
|
||||
}
|
||||
}
|
||||
36
test/.env
Normal file
36
test/.env
Normal file
@@ -0,0 +1,36 @@
|
||||
# CardioAI Configuration
|
||||
# Environment variables for the cardiovascular disease intelligent assistant system
|
||||
|
||||
# Data file path (relative to project root)
|
||||
DATA_PATH=./data/心血管疾病.xlsx
|
||||
|
||||
# Flask server configuration
|
||||
FLASK_APP=module2_predictor/app.py
|
||||
FLASK_ENV=development
|
||||
FLASK_DEBUG=True
|
||||
FLASK_HOST=0.0.0.0
|
||||
FLASK_PORT=5000
|
||||
|
||||
# Streamlit dashboard configuration
|
||||
STREAMLIT_PORT=8501
|
||||
STREAMLIT_THEME=light
|
||||
|
||||
# AI Model API Keys (replace with your actual keys)
|
||||
# DeepSeek API (from dashscope)
|
||||
DASHSCOPE_API_KEY=your_dashscope_api_key_here
|
||||
|
||||
# OpenAI API (for langchain-openai, if used)
|
||||
OPENAI_API_KEY=your_openai_api_key_here
|
||||
|
||||
# Voice assistant configuration
|
||||
VOICE_ASSISTANT_PORT=5001
|
||||
VOICE_ASSISTANT_HOST=0.0.0.0
|
||||
|
||||
# Model file paths (will be generated during training)
|
||||
MODEL_PATH=./module2_predictor/models/xgb_model.pkl
|
||||
SCALER_PATH=./module2_predictor/models/scaler.pkl
|
||||
ENCODER_PATH=./module2_predictor/models/encoder.pkl
|
||||
|
||||
# Feature configuration
|
||||
NUMERICAL_FEATURES=age,trestbps,chol,thalach,oldpeak
|
||||
CATEGORICAL_FEATURES=sex,cp,fbs,restecg,exang,slope,ca,thal
|
||||
8
test/.idea/.gitignore
generated
vendored
Normal file
8
test/.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# 基于编辑器的 HTTP 客户端请求
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
6
test/.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
test/.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
4
test/.idea/misc.xml
generated
Normal file
4
test/.idea/misc.xml
generated
Normal file
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="cardioenv" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
test/.idea/modules.xml
generated
Normal file
8
test/.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/test.iml" filepath="$PROJECT_DIR$/.idea/test.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
16
test/.idea/test.iml
generated
Normal file
16
test/.idea/test.iml
generated
Normal file
@@ -0,0 +1,16 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="cardioenv" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TemplatesService">
|
||||
<option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
|
||||
<option name="TEMPLATE_FOLDERS">
|
||||
<list>
|
||||
<option value="$MODULE_DIR$/module2_predictor/templates" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
</module>
|
||||
6
test/.idea/vcs.xml
generated
Normal file
6
test/.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
493
test/README.md
Normal file
493
test/README.md
Normal file
@@ -0,0 +1,493 @@
|
||||
# CardioAI - 心血管疾病智能辅助系统
|
||||
|
||||
## 项目概述
|
||||
|
||||
CardioAI是一个多模块应用系统,集成了数据可视化、机器学习预测和AI语音问答功能,用于心血管疾病的智能辅助分析和诊断。
|
||||
|
||||
### 系统模块
|
||||
|
||||
1. **Module 1: 数据可视化仪表板** (Streamlit) - 本模块
|
||||
- 数据清洗与特征工程
|
||||
- 交互式数据筛选
|
||||
- 可视化分析图表
|
||||
|
||||
2. **Module 2: 机器学习预测器** (Flask + XGBoost)
|
||||
- 心血管疾病风险预测模型
|
||||
- RESTful API接口
|
||||
- 实时预测服务
|
||||
|
||||
3. **Module 3: AI语音助手** (DeepSeek + CosyVoice)
|
||||
- 自然语言问答
|
||||
- 语音交互界面
|
||||
- 疾病知识查询
|
||||
|
||||
## Module 1: 数据可视化仪表板
|
||||
|
||||
### 功能特性
|
||||
|
||||
- ✅ **数据加载与清洗**: 自动处理异常值和缺失数据
|
||||
- ✅ **特征工程**: 年龄转换、BMI计算、类别编码
|
||||
- ✅ **交互式筛选**: 侧边栏多维度数据筛选
|
||||
- ✅ **可视化分析**: Plotly交互式图表
|
||||
- ✅ **性能优化**: 使用缓存加速数据加载
|
||||
|
||||
### 数据处理流程
|
||||
|
||||
1. **数据加载**: 从Excel文件加载原始数据
|
||||
2. **年龄转换**: 将天数转换为年数(四舍五入)
|
||||
3. **BMI计算**: `BMI = 体重(kg) / (身高(m)^2)`
|
||||
4. **异常值处理**:
|
||||
- 删除舒张压 ≥ 收缩压的记录
|
||||
- 删除收缩压不在[90, 250] mmHg范围的记录
|
||||
- 删除舒张压不在[60, 150] mmHg范围的记录
|
||||
5. **类别转换**:
|
||||
- 胆固醇水平: 1=正常, 2=高于正常, 3=极高
|
||||
- 血糖水平: 1=正常, 2=高于正常, 3=极高
|
||||
- 性别: 1=女性, 2=男性
|
||||
- BMI分类: <18.5=偏瘦, 18.5-24.9=正常, 25-29.9=超重, ≥30=肥胖
|
||||
|
||||
### 快速开始
|
||||
|
||||
#### 1. 环境配置
|
||||
|
||||
```bash
|
||||
# 创建并激活conda虚拟环境
|
||||
conda create -n cardioenv python=3.10
|
||||
conda activate cardioenv
|
||||
|
||||
# 安装依赖包
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
#### 2. 数据准备
|
||||
|
||||
确保数据文件位于正确路径:
|
||||
```
|
||||
项目根目录/
|
||||
├── data/
|
||||
│ └── 心血管疾病.xlsx
|
||||
└── module1_dashboard/
|
||||
└── cardio_dashboard.py
|
||||
```
|
||||
|
||||
#### 3. 启动仪表板
|
||||
|
||||
```bash
|
||||
# 进入项目根目录
|
||||
cd D:\Project\PythonProject\AIcode\test
|
||||
|
||||
# 激活conda环境
|
||||
conda activate cardioenv
|
||||
|
||||
# 启动Streamlit应用程序
|
||||
streamlit run module1_dashboard/cardio_dashboard.py
|
||||
```
|
||||
|
||||
或者使用conda直接运行:
|
||||
|
||||
```bash
|
||||
"D:\software\anaconda\Scripts\conda.exe" run -n cardioenv streamlit run module1_dashboard/cardio_dashboard.py
|
||||
```
|
||||
|
||||
#### 4. 访问应用
|
||||
|
||||
打开浏览器,访问: [http://localhost:8501](http://localhost:8501)
|
||||
|
||||
### 界面说明
|
||||
|
||||
#### 侧边栏筛选器
|
||||
- **年龄范围**: 滑动选择器,筛选指定年龄范围的记录
|
||||
- **性别**: 多选框,选择要分析的性别(女性/男性)
|
||||
- **心血管疾病状态**: 多选框,选择疾病状态(有/无)
|
||||
- **BMI分类**: 多选框,选择BMI分类(偏瘦/正常/超重/肥胖)
|
||||
- **胆固醇水平**: 多选框,选择胆固醇水平
|
||||
- **血糖水平**: 多选框,选择血糖水平
|
||||
|
||||
#### 主界面区域
|
||||
|
||||
1. **关键指标面板**
|
||||
- 筛选后记录数
|
||||
- 心血管疾病风险率
|
||||
- 平均年龄
|
||||
- 平均BMI
|
||||
|
||||
2. **数据可视化图表**
|
||||
- 年龄分布与心血管疾病关系直方图
|
||||
- BMI分类对心血管疾病影响的堆叠柱状图
|
||||
- 血压关系散点图
|
||||
- 胆固醇水平分布饼图
|
||||
- 血糖水平分布饼图
|
||||
|
||||
3. **数据预览**
|
||||
- 数据摘要(形状、类型、缺失值)
|
||||
- 原始数据表格(可自定义显示的列)
|
||||
|
||||
### 配置文件说明
|
||||
|
||||
`.env` 文件包含以下配置项:
|
||||
|
||||
```ini
|
||||
# 数据文件路径
|
||||
DATA_PATH=./data/心血管疾病.xlsx
|
||||
|
||||
# Flask服务器配置
|
||||
FLASK_APP=module2_predictor/app.py
|
||||
FLASK_ENV=development
|
||||
|
||||
# Streamlit配置
|
||||
STREAMLIT_PORT=8501
|
||||
|
||||
# AI模型API密钥(需要替换为实际值)
|
||||
DASHSCOPE_API_KEY=your_dashscope_api_key_here
|
||||
OPENAI_API_KEY=your_openai_api_key_here
|
||||
|
||||
# 模型文件路径
|
||||
MODEL_PATH=./module2_predictor/models/xgb_model.pkl
|
||||
```
|
||||
|
||||
### 依赖包说明
|
||||
|
||||
详细依赖见 `requirements.txt`:
|
||||
|
||||
- **数据处理**: pandas, numpy, openpyxl
|
||||
- **机器学习**: scikit-learn, xgboost, joblib
|
||||
- **可视化**: streamlit, plotly
|
||||
- **Web服务**: Flask
|
||||
- **环境管理**: python-dotenv
|
||||
- **AI集成**: langchain-openai, dashscope, requests
|
||||
|
||||
## Module 2: 机器学习预测器
|
||||
|
||||
### 功能特性
|
||||
|
||||
- ✅ **模型训练**: XGBoost分类器,准确率约73%
|
||||
- ✅ **特征工程**: 年龄转换、BMI计算、异常值处理
|
||||
- ✅ **RESTful API**: Flask提供预测接口
|
||||
- ✅ **前端界面**: 交互式Web表单,实时预测
|
||||
- ✅ **模型持久化**: Joblib保存完整Pipeline
|
||||
|
||||
### 模型训练
|
||||
|
||||
#### 1. 训练模型(一次性)
|
||||
```bash
|
||||
# 进入项目根目录
|
||||
cd D:\Project\PythonProject\AIcode\test
|
||||
|
||||
# 激活conda环境
|
||||
conda activate cardioenv
|
||||
|
||||
# 运行训练脚本
|
||||
python module2_predictor/train_and_save.py
|
||||
```
|
||||
|
||||
训练脚本将:
|
||||
1. 加载和清洗数据(与Module 1相同)
|
||||
2. 特征工程:年龄转换、BMI计算
|
||||
3. 构建机器学习Pipeline(StandardScaler + OneHotEncoder + XGBoost)
|
||||
4. 训练模型并评估性能
|
||||
5. 保存模型到 `module2_predictor/models/cardio_predictor_model.pkl`
|
||||
|
||||
#### 2. 模型特征
|
||||
- **连续特征**: age_years, bmi, ap_hi, ap_lo
|
||||
- **分类特征**: gender, cholesterol, gluc
|
||||
- **二元特征**: smoke, alco, active
|
||||
|
||||
**Top 5 重要特征**:
|
||||
1. 收缩压 (ap_hi)
|
||||
2. 极高胆固醇 (cholesterol_3)
|
||||
3. 年龄 (age_years)
|
||||
4. 舒张压 (ap_lo)
|
||||
5. 极高血糖 (gluc_3)
|
||||
|
||||
### Flask API服务
|
||||
|
||||
#### 1. 启动API服务
|
||||
```bash
|
||||
# 进入项目根目录
|
||||
cd D:\Project\PythonProject\AIcode\test
|
||||
|
||||
# 激活conda环境
|
||||
conda activate cardioenv
|
||||
|
||||
# 方法1: 直接运行Python脚本
|
||||
python module2_predictor/app.py
|
||||
|
||||
# 方法2: 使用Flask CLI
|
||||
set FLASK_APP=module2_predictor/app.py
|
||||
flask run --host=0.0.0.0 --port=5000
|
||||
|
||||
# 方法3: 使用conda直接运行
|
||||
"D:\software\anaconda\Scripts\conda.exe" run -n cardioenv python module2_predictor/app.py
|
||||
```
|
||||
|
||||
#### 2. API端点
|
||||
|
||||
| 端点 | 方法 | 描述 |
|
||||
|------|------|------|
|
||||
| `/` | GET | 前端预测界面 |
|
||||
| `/predict_cardio` | POST | 预测接口(接收JSON) |
|
||||
| `/health` | GET | 健康检查 |
|
||||
| `/model_info` | GET | 模型信息 |
|
||||
|
||||
#### 3. 预测接口示例
|
||||
|
||||
**请求**:
|
||||
```bash
|
||||
curl -X POST http://localhost:5000/predict_cardio \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"age": 20228,
|
||||
"gender": 1,
|
||||
"height": 156,
|
||||
"weight": 85,
|
||||
"ap_hi": 140,
|
||||
"ap_lo": 90,
|
||||
"cholesterol": 1,
|
||||
"gluc": 1,
|
||||
"smoke": 0,
|
||||
"alco": 0,
|
||||
"active": 1
|
||||
}'
|
||||
```
|
||||
|
||||
**响应**:
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"prediction": 1,
|
||||
"probability": 0.85,
|
||||
"risk_level": "高危",
|
||||
"message": "预测成功",
|
||||
"features": {
|
||||
"age_years": 55,
|
||||
"bmi": 34.9,
|
||||
"ap_hi": 140,
|
||||
"ap_lo": 90,
|
||||
"gender": 1,
|
||||
"cholesterol": 1,
|
||||
"gluc": 1,
|
||||
"smoke": 0,
|
||||
"alco": 0,
|
||||
"active": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 前端界面
|
||||
|
||||
访问 `http://localhost:5000` 使用预测界面:
|
||||
|
||||
1. **输入表单**: 11个特征字段,包含验证和示例数据
|
||||
2. **实时预测**: 点击"开始预测"获取风险评估
|
||||
3. **结果展示**: 风险等级、概率、健康建议
|
||||
4. **示例数据**: 提供低、中、高风险示例数据
|
||||
|
||||
### 项目结构
|
||||
|
||||
```
|
||||
module2_predictor/
|
||||
├── app.py # Flask应用主程序
|
||||
├── train_and_save.py # 模型训练脚本(一次性)
|
||||
├── test_api.py # API测试脚本
|
||||
├── templates/
|
||||
│ └── index.html # 前端界面模板
|
||||
└── models/ # 模型文件目录(训练后生成)
|
||||
├── cardio_predictor_model.pkl
|
||||
└── feature_info.txt
|
||||
```
|
||||
|
||||
### 测试验证
|
||||
|
||||
#### 1. 测试模型加载
|
||||
```bash
|
||||
python module2_predictor/test_api.py
|
||||
```
|
||||
|
||||
#### 2. 测试API服务
|
||||
1. 启动Flask应用:`python module2_predictor/app.py`
|
||||
2. 打开浏览器访问:`http://localhost:5000`
|
||||
3. 使用示例数据测试预测功能
|
||||
4. 检查健康状态:`http://localhost:5000/health`
|
||||
|
||||
#### 3. 验证预测准确性
|
||||
- 测试集准确率:约73%
|
||||
- 特征重要性符合医学常识
|
||||
- 风险等级划分合理
|
||||
|
||||
### 配置说明
|
||||
|
||||
#### 模型参数
|
||||
- **算法**: XGBoost Classifier
|
||||
- **树数量**: 100
|
||||
- **最大深度**: 5
|
||||
- **学习率**: 0.1
|
||||
- **子采样率**: 0.8
|
||||
- **随机种子**: 42
|
||||
|
||||
#### 特征预处理
|
||||
- **连续特征**: StandardScaler标准化
|
||||
- **分类特征**: OneHotEncoder独热编码
|
||||
- **二元特征**: 直接使用(0/1)
|
||||
|
||||
### 性能指标
|
||||
|
||||
| 指标 | 训练集 | 测试集 |
|
||||
|------|--------|--------|
|
||||
| 准确率 | 74.21% | 73.14% |
|
||||
| 特征数量 | 10个 | 10个 |
|
||||
| 模型大小 | ~1.2 MB | ~1.2 MB |
|
||||
|
||||
### 注意事项
|
||||
|
||||
1. **模型更新**: 当数据变化时,重新运行训练脚本
|
||||
2. **输入验证**: API对输入数据有严格的范围验证
|
||||
3. **血压合理性**: 自动拒绝舒张压≥收缩压的输入
|
||||
4. **错误处理**: 详细的错误信息和日志记录
|
||||
5. **性能**: 单次预测时间 < 100ms
|
||||
|
||||
### 常见问题
|
||||
|
||||
#### Q1: 模型训练失败
|
||||
**症状**: 训练脚本报错或无法保存模型
|
||||
**解决**:
|
||||
1. 检查数据文件路径是否正确
|
||||
2. 确保有足够的磁盘空间
|
||||
3. 检查Python依赖包是否完整安装
|
||||
|
||||
#### Q2: Flask应用无法启动
|
||||
**症状**: 启动时出现导入错误或模型加载失败
|
||||
**解决**:
|
||||
1. 检查conda环境是否激活
|
||||
2. 确保模型文件存在:`module2_predictor/models/cardio_predictor_model.pkl`
|
||||
3. 检查端口5000是否被占用
|
||||
|
||||
#### Q3: 预测结果不合理
|
||||
**症状**: 预测概率总是0或1,或与预期不符
|
||||
**解决**:
|
||||
1. 检查输入数据是否在合理范围内
|
||||
2. 验证特征预处理是否正确
|
||||
3. 确保模型训练时使用了正确的特征
|
||||
|
||||
#### Q4: 前端界面无法访问
|
||||
**症状**: 浏览器显示连接错误
|
||||
**解决**:
|
||||
1. 确认Flask应用正在运行
|
||||
2. 检查防火墙设置,允许端口5000
|
||||
3. 尝试访问 `http://localhost:5000/health` 检查服务状态
|
||||
|
||||
### 下一步开发
|
||||
|
||||
1. **模型优化**: 尝试其他算法(LightGBM, CatBoost)和超参数调优
|
||||
2. **特征扩展**: 添加更多临床特征(家族史、药物治疗等)
|
||||
3. **API增强**: 添加批量预测、模型版本管理
|
||||
4. **监控告警**: 添加性能监控和异常告警
|
||||
5. **部署优化**: Docker容器化,云平台部署
|
||||
|
||||
---
|
||||
|
||||
### 常见问题
|
||||
|
||||
#### Q1: 数据加载失败
|
||||
**症状**: 应用程序无法启动,提示文件找不到或格式错误
|
||||
**解决**:
|
||||
1. 检查 `data/心血管疾病.xlsx` 文件是否存在
|
||||
2. 确认文件格式为Excel 2007+ (.xlsx)
|
||||
3. 检查文件编码,确保不是二进制损坏
|
||||
|
||||
#### Q2: 图表显示异常
|
||||
**症状**: 图表不显示或显示错误
|
||||
**解决**:
|
||||
1. 检查Plotly是否正确安装:`pip install plotly`
|
||||
2. 确保数据经过正确清洗,没有无限值或NaN
|
||||
3. 检查筛选条件是否过于严格导致无数据
|
||||
|
||||
#### Q3: 应用程序运行缓慢
|
||||
**症状**: 页面响应慢,筛选操作延迟
|
||||
**解决**:
|
||||
1. 利用 `@st.cache_data` 装饰器的缓存功能
|
||||
2. 减少一次性加载的数据量
|
||||
3. 优化图表复杂度,减少数据点数量
|
||||
|
||||
#### Q4: 中文显示乱码
|
||||
**症状**: 中文文字显示为乱码
|
||||
**解决**:
|
||||
1. 确保系统支持中文字体
|
||||
2. Streamlit默认支持UTF-8编码,检查源代码文件保存为UTF-8
|
||||
3. 在Windows系统上,设置控制台编码为UTF-8
|
||||
|
||||
### 开发说明
|
||||
|
||||
#### 项目结构
|
||||
```
|
||||
D:\Project\PythonProject\AIcode\test\
|
||||
├── data\ # 数据文件目录
|
||||
│ └── 心血管疾病.xlsx
|
||||
├── module1_dashboard\ # 数据可视化模块
|
||||
│ ├── cardio_dashboard.py
|
||||
│ └── test_data.py
|
||||
├── module2_predictor\ # 机器学习预测模块
|
||||
│ └── templates\
|
||||
├── module3_voice_assistant\ # AI语音助手模块
|
||||
│ └── templates\
|
||||
├── requirements.txt # Python依赖包
|
||||
├── .env # 环境配置
|
||||
└── README.md # 项目文档
|
||||
```
|
||||
|
||||
#### 代码结构
|
||||
- `load_and_process_data()`: 数据加载和清洗主函数,使用 `@st.cache_data` 缓存
|
||||
- `create_filters()`: 创建侧边栏筛选器组件
|
||||
- `apply_filters()`: 应用筛选条件到数据框
|
||||
- `display_metrics()`: 显示关键指标卡片
|
||||
- `create_visualizations()`: 创建所有可视化图表
|
||||
- `display_data_preview()`: 显示数据预览和摘要
|
||||
|
||||
#### 扩展开发
|
||||
|
||||
1. **添加新图表**
|
||||
```python
|
||||
def create_new_chart(df):
|
||||
fig = px.scatter(df, x='column1', y='column2', color='cardio_str')
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
```
|
||||
|
||||
2. **添加新筛选器**
|
||||
```python
|
||||
# 在create_filters函数中添加
|
||||
new_filter = st.sidebar.selectbox("新筛选器", options=['选项1', '选项2'])
|
||||
```
|
||||
|
||||
3. **自定义样式**
|
||||
```python
|
||||
st.markdown("""
|
||||
<style>
|
||||
.custom-class { color: blue; }
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
```
|
||||
|
||||
### 性能优化建议
|
||||
|
||||
1. **数据缓存**: 所有数据处理函数使用 `@st.cache_data` 装饰器
|
||||
2. **增量加载**: 对于大型数据集,考虑分页或懒加载
|
||||
3. **图表优化**: 使用采样或聚合减少数据点数量
|
||||
4. **异步处理**: 长时间操作使用异步函数避免阻塞UI
|
||||
|
||||
### 下一步计划
|
||||
|
||||
1. **Module 2开发**: 实现XGBoost预测模型和Flask API
|
||||
2. **Module 3开发**: 集成DeepSeek和CosyVoice语音助手
|
||||
3. **功能增强**: 添加数据导出、报告生成功能
|
||||
4. **部署优化**: Docker容器化,云平台部署
|
||||
|
||||
### 技术支持
|
||||
|
||||
- **问题反馈**: 检查GitHub Issues或联系开发团队
|
||||
- **文档更新**: 随着功能迭代保持文档同步
|
||||
- **版本管理**: 使用Git进行版本控制,定期发布稳定版本
|
||||
|
||||
---
|
||||
|
||||
**版权所有 © 2024 CardioAI项目组**
|
||||
**版本**: 1.0.0
|
||||
**最后更新**: 2024-04-02
|
||||
BIN
test/data/心血管疾病.xlsx
Normal file
BIN
test/data/心血管疾病.xlsx
Normal file
Binary file not shown.
Binary file not shown.
581
test/module1_dashboard/cardio_dashboard.py
Normal file
581
test/module1_dashboard/cardio_dashboard.py
Normal file
@@ -0,0 +1,581 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
CardioAI - 心血管疾病智能辅助系统
|
||||
数据可视化仪表板模块
|
||||
|
||||
功能:
|
||||
1. 数据加载与清洗
|
||||
2. 特征工程(年龄转换、BMI计算、类别转换)
|
||||
3. 交互式数据筛选
|
||||
4. 可视化分析(Plotly图表)
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 设置页面配置
|
||||
st.set_page_config(
|
||||
page_title="CardioAI - 心血管疾病分析仪表板",
|
||||
page_icon="❤️",
|
||||
layout="wide",
|
||||
initial_sidebar_state="expanded"
|
||||
)
|
||||
|
||||
# 添加项目根目录到Python路径,确保可以导入其他模块
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.append(str(project_root))
|
||||
|
||||
# 设置中文显示
|
||||
st.markdown("""
|
||||
<style>
|
||||
.main-header {
|
||||
font-size: 2.5rem;
|
||||
color: #e63946;
|
||||
text-align: center;
|
||||
margin-bottom: 2rem;
|
||||
font-weight: bold;
|
||||
}
|
||||
.sub-header {
|
||||
font-size: 1.5rem;
|
||||
color: #457b9d;
|
||||
margin-top: 1.5rem;
|
||||
margin-bottom: 1rem;
|
||||
font-weight: bold;
|
||||
}
|
||||
.metric-card {
|
||||
background-color: #f1faee;
|
||||
padding: 1.5rem;
|
||||
border-radius: 10px;
|
||||
border-left: 5px solid #1d3557;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
.stButton>button {
|
||||
background-color: #1d3557;
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 0.5rem 1rem;
|
||||
border-radius: 5px;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# 数据文件路径
|
||||
DATA_PATH = Path(__file__).parent.parent / "data" / "心血管疾病.xlsx"
|
||||
|
||||
@st.cache_data(show_spinner="正在加载和清洗数据...")
|
||||
def load_and_process_data():
|
||||
"""
|
||||
加载Excel数据并进行清洗和特征工程
|
||||
|
||||
返回:
|
||||
pd.DataFrame: 处理后的数据框
|
||||
"""
|
||||
try:
|
||||
# 加载数据
|
||||
st.info(f"正在从 {DATA_PATH} 加载数据...")
|
||||
df = pd.read_excel(DATA_PATH)
|
||||
|
||||
# 检查必要列是否存在
|
||||
required_columns = ['age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
|
||||
'cholesterol', 'gluc', 'cardio']
|
||||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||||
if missing_columns:
|
||||
st.error(f"数据文件中缺少必要列: {missing_columns}")
|
||||
return pd.DataFrame()
|
||||
|
||||
# 创建数据副本
|
||||
df_processed = df.copy()
|
||||
|
||||
# 1. 年龄转换:从天转换为年(四舍五入)
|
||||
df_processed['age_years'] = (df_processed['age'] / 365.25).round().astype(int)
|
||||
|
||||
# 2. 计算BMI: BMI = weight(kg) / (height(m)^2)
|
||||
# 注意:height数据单位为厘米,需要转换为米
|
||||
df_processed['bmi'] = df_processed['weight'] / ((df_processed['height'] / 100) ** 2)
|
||||
df_processed['bmi'] = df_processed['bmi'].round(2)
|
||||
|
||||
# 3. 异常值处理
|
||||
# 删除舒张压 >= 收缩压的记录
|
||||
invalid_bp = df_processed['ap_lo'] >= df_processed['ap_hi']
|
||||
if invalid_bp.any():
|
||||
st.warning(f"删除 {invalid_bp.sum()} 条舒张压 >= 收缩压的异常记录")
|
||||
df_processed = df_processed[~invalid_bp].copy()
|
||||
|
||||
# 删除血压极端异常值
|
||||
# 收缩压 ∈ [90, 250], 舒张压 ∈ [60, 150]
|
||||
bp_outliers = ~((df_processed['ap_hi'] >= 90) & (df_processed['ap_hi'] <= 250) &
|
||||
(df_processed['ap_lo'] >= 60) & (df_processed['ap_lo'] <= 150))
|
||||
if bp_outliers.any():
|
||||
st.warning(f"删除 {bp_outliers.sum()} 条血压极端异常值记录")
|
||||
df_processed = df_processed[~bp_outliers].copy()
|
||||
|
||||
# 4. 类别转换
|
||||
# cholesterol转换
|
||||
cholesterol_map = {
|
||||
1: "正常",
|
||||
2: "高于正常",
|
||||
3: "极高"
|
||||
}
|
||||
df_processed['cholesterol_str'] = df_processed['cholesterol'].map(cholesterol_map)
|
||||
|
||||
# gluc转换
|
||||
gluc_map = {
|
||||
1: "正常",
|
||||
2: "高于正常",
|
||||
3: "极高"
|
||||
}
|
||||
df_processed['gluc_str'] = df_processed['gluc'].map(gluc_map)
|
||||
|
||||
# gender转换
|
||||
gender_map = {
|
||||
1: "女性",
|
||||
2: "男性"
|
||||
}
|
||||
df_processed['gender_str'] = df_processed['gender'].map(gender_map)
|
||||
|
||||
# cardio转换
|
||||
cardio_map = {
|
||||
0: "无心血管疾病",
|
||||
1: "有心血管疾病"
|
||||
}
|
||||
df_processed['cardio_str'] = df_processed['cardio'].map(cardio_map)
|
||||
|
||||
# 5. BMI分类
|
||||
def categorize_bmi(bmi):
|
||||
if bmi < 18.5:
|
||||
return "偏瘦"
|
||||
elif 18.5 <= bmi < 24.9:
|
||||
return "正常"
|
||||
elif 25 <= bmi < 29.9:
|
||||
return "超重"
|
||||
else:
|
||||
return "肥胖"
|
||||
|
||||
df_processed['bmi_category'] = df_processed['bmi'].apply(categorize_bmi)
|
||||
|
||||
# 记录处理后的数据信息
|
||||
st.success(f"数据加载和清洗完成!共处理 {len(df_processed)} 条记录")
|
||||
st.info(f"原始数据: {len(df)} 条记录, 清洗后: {len(df_processed)} 条记录")
|
||||
|
||||
return df_processed
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"数据加载失败: {str(e)}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def create_filters(df):
|
||||
"""
|
||||
创建侧边栏筛选器
|
||||
|
||||
参数:
|
||||
df: 处理后的数据框
|
||||
|
||||
返回:
|
||||
dict: 筛选条件字典
|
||||
"""
|
||||
st.sidebar.markdown("## 🔍 数据筛选")
|
||||
|
||||
# 年龄范围筛选
|
||||
min_age = int(df['age_years'].min())
|
||||
max_age = int(df['age_years'].max())
|
||||
age_range = st.sidebar.slider(
|
||||
"选择年龄范围:",
|
||||
min_value=min_age,
|
||||
max_value=max_age,
|
||||
value=(min_age, max_age),
|
||||
help="筛选指定年龄范围内的记录"
|
||||
)
|
||||
|
||||
# 性别筛选
|
||||
gender_options = df['gender_str'].unique().tolist()
|
||||
selected_genders = st.sidebar.multiselect(
|
||||
"选择性别:",
|
||||
options=gender_options,
|
||||
default=gender_options,
|
||||
help="选择要分析的性别"
|
||||
)
|
||||
|
||||
# 心血管疾病状态筛选
|
||||
cardio_options = df['cardio_str'].unique().tolist()
|
||||
selected_cardio = st.sidebar.multiselect(
|
||||
"选择心血管疾病状态:",
|
||||
options=cardio_options,
|
||||
default=cardio_options,
|
||||
help="选择要分析的心血管疾病状态"
|
||||
)
|
||||
|
||||
# BMI分类筛选
|
||||
bmi_options = df['bmi_category'].unique().tolist()
|
||||
selected_bmi = st.sidebar.multiselect(
|
||||
"选择BMI分类:",
|
||||
options=bmi_options,
|
||||
default=bmi_options,
|
||||
help="选择要分析的BMI分类"
|
||||
)
|
||||
|
||||
# 胆固醇水平筛选
|
||||
cholesterol_options = df['cholesterol_str'].unique().tolist()
|
||||
selected_cholesterol = st.sidebar.multiselect(
|
||||
"选择胆固醇水平:",
|
||||
options=cholesterol_options,
|
||||
default=cholesterol_options,
|
||||
help="选择要分析的胆固醇水平"
|
||||
)
|
||||
|
||||
# 血糖水平筛选
|
||||
gluc_options = df['gluc_str'].unique().tolist()
|
||||
selected_gluc = st.sidebar.multiselect(
|
||||
"选择血糖水平:",
|
||||
options=gluc_options,
|
||||
default=gluc_options,
|
||||
help="选择要分析的血糖水平"
|
||||
)
|
||||
|
||||
return {
|
||||
'age_range': age_range,
|
||||
'genders': selected_genders,
|
||||
'cardio': selected_cardio,
|
||||
'bmi_categories': selected_bmi,
|
||||
'cholesterol': selected_cholesterol,
|
||||
'gluc': selected_gluc
|
||||
}
|
||||
|
||||
def apply_filters(df, filters):
|
||||
"""
|
||||
应用筛选条件到数据框
|
||||
|
||||
参数:
|
||||
df: 原始数据框
|
||||
filters: 筛选条件字典
|
||||
|
||||
返回:
|
||||
pd.DataFrame: 筛选后的数据框
|
||||
"""
|
||||
filtered_df = df.copy()
|
||||
|
||||
# 应用年龄筛选
|
||||
filtered_df = filtered_df[
|
||||
(filtered_df['age_years'] >= filters['age_range'][0]) &
|
||||
(filtered_df['age_years'] <= filters['age_range'][1])
|
||||
]
|
||||
|
||||
# 应用性别筛选
|
||||
if filters['genders']:
|
||||
filtered_df = filtered_df[filtered_df['gender_str'].isin(filters['genders'])]
|
||||
|
||||
# 应用心血管疾病筛选
|
||||
if filters['cardio']:
|
||||
filtered_df = filtered_df[filtered_df['cardio_str'].isin(filters['cardio'])]
|
||||
|
||||
# 应用BMI分类筛选
|
||||
if filters['bmi_categories']:
|
||||
filtered_df = filtered_df[filtered_df['bmi_category'].isin(filters['bmi_categories'])]
|
||||
|
||||
# 应用胆固醇筛选
|
||||
if filters['cholesterol']:
|
||||
filtered_df = filtered_df[filtered_df['cholesterol_str'].isin(filters['cholesterol'])]
|
||||
|
||||
# 应用血糖筛选
|
||||
if filters['gluc']:
|
||||
filtered_df = filtered_df[filtered_df['gluc_str'].isin(filters['gluc'])]
|
||||
|
||||
return filtered_df
|
||||
|
||||
def display_metrics(filtered_df, original_df):
|
||||
"""
|
||||
显示关键指标
|
||||
|
||||
参数:
|
||||
filtered_df: 筛选后的数据框
|
||||
original_df: 原始数据框
|
||||
"""
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
label="筛选后记录数",
|
||||
value=f"{len(filtered_df):,}",
|
||||
delta=f"{len(filtered_df) - len(original_df):+,}"
|
||||
)
|
||||
|
||||
with col2:
|
||||
# 心血管疾病风险率
|
||||
if len(filtered_df) > 0:
|
||||
risk_rate = (filtered_df['cardio'].sum() / len(filtered_df) * 100).round(2)
|
||||
st.metric(
|
||||
label="心血管疾病风险率",
|
||||
value=f"{risk_rate}%",
|
||||
help="当前筛选条件下心血管疾病患者比例"
|
||||
)
|
||||
else:
|
||||
st.metric(label="心血管疾病风险率", value="N/A")
|
||||
|
||||
with col3:
|
||||
# 平均年龄
|
||||
if len(filtered_df) > 0:
|
||||
avg_age = filtered_df['age_years'].mean().round(1)
|
||||
st.metric(
|
||||
label="平均年龄",
|
||||
value=f"{avg_age} 岁",
|
||||
help="当前筛选条件下的平均年龄"
|
||||
)
|
||||
else:
|
||||
st.metric(label="平均年龄", value="N/A")
|
||||
|
||||
with col4:
|
||||
# 平均BMI
|
||||
if len(filtered_df) > 0:
|
||||
avg_bmi = filtered_df['bmi'].mean().round(1)
|
||||
st.metric(
|
||||
label="平均BMI",
|
||||
value=str(avg_bmi),
|
||||
help="当前筛选条件下的平均身体质量指数"
|
||||
)
|
||||
else:
|
||||
st.metric(label="平均BMI", value="N/A")
|
||||
|
||||
def create_visualizations(df):
|
||||
"""
|
||||
创建可视化图表
|
||||
|
||||
参数:
|
||||
df: 要可视化的数据框
|
||||
"""
|
||||
if len(df) == 0:
|
||||
st.warning("没有可用的数据进行可视化")
|
||||
return
|
||||
|
||||
st.markdown("## 📊 数据可视化分析")
|
||||
|
||||
# 图1: 年龄分布直方图(按心血管疾病状态区分)
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.markdown("### 年龄分布分析")
|
||||
fig_age = px.histogram(
|
||||
df,
|
||||
x='age_years',
|
||||
color='cardio_str',
|
||||
nbins=30,
|
||||
barmode='overlay',
|
||||
opacity=0.7,
|
||||
labels={
|
||||
'age_years': '年龄(岁)',
|
||||
'cardio_str': '心血管疾病状态',
|
||||
'count': '人数'
|
||||
},
|
||||
title="年龄分布与心血管疾病关系",
|
||||
color_discrete_map={
|
||||
"有心血管疾病": "#e63946",
|
||||
"无心血管疾病": "#457b9d"
|
||||
}
|
||||
)
|
||||
fig_age.update_layout(
|
||||
legend_title="疾病状态",
|
||||
hovermode='x unified'
|
||||
)
|
||||
st.plotly_chart(fig_age, use_container_width=True)
|
||||
|
||||
with col2:
|
||||
st.markdown("### BMI分类与心血管疾病关系")
|
||||
# 创建交叉表
|
||||
bmi_cardio_cross = pd.crosstab(
|
||||
df['bmi_category'],
|
||||
df['cardio_str'],
|
||||
normalize='index'
|
||||
).reset_index()
|
||||
|
||||
# 转换为长格式
|
||||
bmi_cardio_long = bmi_cardio_cross.melt(
|
||||
id_vars='bmi_category',
|
||||
var_name='cardio_status',
|
||||
value_name='proportion'
|
||||
)
|
||||
|
||||
fig_bmi = px.bar(
|
||||
bmi_cardio_long,
|
||||
x='bmi_category',
|
||||
y='proportion',
|
||||
color='cardio_status',
|
||||
barmode='stack',
|
||||
labels={
|
||||
'bmi_category': 'BMI分类',
|
||||
'proportion': '比例',
|
||||
'cardio_status': '心血管疾病状态'
|
||||
},
|
||||
title="BMI分类对心血管疾病的影响",
|
||||
color_discrete_map={
|
||||
"有心血管疾病": "#e63946",
|
||||
"无心血管疾病": "#457b9d"
|
||||
}
|
||||
)
|
||||
fig_bmi.update_layout(
|
||||
yaxis_tickformat='.1%',
|
||||
legend_title="疾病状态"
|
||||
)
|
||||
st.plotly_chart(fig_bmi, use_container_width=True)
|
||||
|
||||
# 图3: 血压关系散点图
|
||||
st.markdown("### 血压关系分析")
|
||||
fig_bp = px.scatter(
|
||||
df,
|
||||
x='ap_hi',
|
||||
y='ap_lo',
|
||||
color='cardio_str',
|
||||
size='bmi',
|
||||
hover_data=['age_years', 'gender_str', 'cholesterol_str'],
|
||||
labels={
|
||||
'ap_hi': '收缩压 (mmHg)',
|
||||
'ap_lo': '舒张压 (mmHg)',
|
||||
'cardio_str': '心血管疾病状态',
|
||||
'bmi': 'BMI'
|
||||
},
|
||||
title="血压关系散点图",
|
||||
color_discrete_map={
|
||||
"有心血管疾病": "#e63946",
|
||||
"无心血管疾病": "#457b9d"
|
||||
}
|
||||
)
|
||||
fig_bp.update_layout(legend_title="疾病状态")
|
||||
st.plotly_chart(fig_bp, use_container_width=True)
|
||||
|
||||
# 图4: 胆固醇和血糖水平分析
|
||||
col3, col4 = st.columns(2)
|
||||
|
||||
with col3:
|
||||
st.markdown("### 胆固醇水平分布")
|
||||
cholesterol_counts = df['cholesterol_str'].value_counts().reset_index()
|
||||
cholesterol_counts.columns = ['cholesterol_level', 'count']
|
||||
|
||||
fig_chol = px.pie(
|
||||
cholesterol_counts,
|
||||
values='count',
|
||||
names='cholesterol_level',
|
||||
title="胆固醇水平分布",
|
||||
color_discrete_sequence=px.colors.sequential.RdBu
|
||||
)
|
||||
fig_chol.update_traces(textposition='inside', textinfo='percent+label')
|
||||
st.plotly_chart(fig_chol, use_container_width=True)
|
||||
|
||||
with col4:
|
||||
st.markdown("### 血糖水平分布")
|
||||
gluc_counts = df['gluc_str'].value_counts().reset_index()
|
||||
gluc_counts.columns = ['gluc_level', 'count']
|
||||
|
||||
fig_gluc = px.pie(
|
||||
gluc_counts,
|
||||
values='count',
|
||||
names='gluc_level',
|
||||
title="血糖水平分布",
|
||||
color_discrete_sequence=px.colors.sequential.Blues
|
||||
)
|
||||
fig_gluc.update_traces(textposition='inside', textinfo='percent+label')
|
||||
st.plotly_chart(fig_gluc, use_container_width=True)
|
||||
|
||||
def display_data_preview(df):
|
||||
"""
|
||||
显示数据预览
|
||||
|
||||
参数:
|
||||
df: 要预览的数据框
|
||||
"""
|
||||
st.markdown("## 📋 数据预览")
|
||||
|
||||
# 显示数据摘要
|
||||
with st.expander("数据摘要", expanded=False):
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.write("**数据形状:**", df.shape)
|
||||
st.write("**数据类型:**")
|
||||
st.write(df.dtypes.astype(str).reset_index().rename(columns={0: '类型', 'index': '列名'}))
|
||||
|
||||
with col2:
|
||||
st.write("**缺失值统计:**")
|
||||
missing = df.isnull().sum().reset_index()
|
||||
missing.columns = ['列名', '缺失值数量']
|
||||
missing = missing[missing['缺失值数量'] > 0]
|
||||
if len(missing) > 0:
|
||||
st.write(missing)
|
||||
else:
|
||||
st.write("无缺失值")
|
||||
|
||||
# 显示数据表格
|
||||
with st.expander("查看原始数据", expanded=False):
|
||||
# 选择要显示的列
|
||||
available_columns = df.columns.tolist()
|
||||
default_columns = ['age_years', 'gender_str', 'bmi', 'bmi_category',
|
||||
'ap_hi', 'ap_lo', 'cholesterol_str', 'gluc_str', 'cardio_str']
|
||||
|
||||
selected_columns = st.multiselect(
|
||||
"选择要显示的列:",
|
||||
options=available_columns,
|
||||
default=default_columns
|
||||
)
|
||||
|
||||
if selected_columns:
|
||||
display_df = df[selected_columns].copy()
|
||||
st.dataframe(display_df.head(100), use_container_width=True)
|
||||
st.caption(f"显示前 100 行(共 {len(df)} 行)")
|
||||
else:
|
||||
st.info("请选择要显示的列")
|
||||
|
||||
def main():
|
||||
"""
|
||||
主函数
|
||||
"""
|
||||
# 标题
|
||||
st.markdown('<h1 class="main-header">❤️ CardioAI - 心血管疾病智能分析仪表板</h1>', unsafe_allow_html=True)
|
||||
st.markdown("---")
|
||||
|
||||
# 加载数据
|
||||
with st.spinner("正在加载数据,请稍候..."):
|
||||
df = load_and_process_data()
|
||||
|
||||
if df.empty:
|
||||
st.error("数据加载失败,请检查数据文件路径和格式")
|
||||
return
|
||||
|
||||
# 创建筛选器
|
||||
filters = create_filters(df)
|
||||
|
||||
# 应用筛选
|
||||
filtered_df = apply_filters(df, filters)
|
||||
|
||||
# 显示关键指标
|
||||
st.markdown("## 📈 关键指标")
|
||||
display_metrics(filtered_df, df)
|
||||
|
||||
# 显示数据预览
|
||||
display_data_preview(filtered_df)
|
||||
|
||||
# 创建可视化图表
|
||||
create_visualizations(filtered_df)
|
||||
|
||||
# 侧边栏信息
|
||||
st.sidebar.markdown("---")
|
||||
st.sidebar.markdown("## ℹ️ 使用说明")
|
||||
st.sidebar.info("""
|
||||
1. 使用左侧筛选器选择要分析的数据子集
|
||||
2. 查看上方的关键指标了解数据概况
|
||||
3. 探索下方的可视化图表分析趋势和关系
|
||||
4. 点击数据预览查看详细数据
|
||||
""")
|
||||
|
||||
st.sidebar.markdown("## 📊 数据说明")
|
||||
st.sidebar.info("""
|
||||
- **年龄**: 原始数据为天数,已转换为岁数
|
||||
- **BMI**: 身体质量指数,计算公式:体重(kg)/身高(m)²
|
||||
- **血压**: 收缩压(ap_hi)和舒张压(ap_lo)
|
||||
- **胆固醇/血糖**: 1=正常, 2=高于正常, 3=极高
|
||||
- **心血管疾病**: 0=无, 1=有
|
||||
""")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
84
test/module1_dashboard/test_data.py
Normal file
84
test/module1_dashboard/test_data.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
测试数据加载和处理的脚本
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 添加父目录到路径
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# 导入数据处理函数
|
||||
from cardio_dashboard import load_and_process_data
|
||||
|
||||
def test_data_loading():
|
||||
"""测试数据加载和清洗功能"""
|
||||
print("开始测试数据加载和清洗...")
|
||||
|
||||
try:
|
||||
# 加载数据
|
||||
df = load_and_process_data()
|
||||
|
||||
if df.empty:
|
||||
print("❌ 数据加载失败:返回空数据框")
|
||||
return False
|
||||
|
||||
print(f"✅ 数据加载成功!共 {len(df)} 条记录")
|
||||
|
||||
# 检查必要的列
|
||||
required_columns = ['age_years', 'bmi', 'bmi_category', 'cholesterol_str',
|
||||
'gluc_str', 'gender_str', 'cardio_str', 'ap_hi', 'ap_lo']
|
||||
|
||||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||||
if missing_columns:
|
||||
print(f"❌ 缺少必要的列: {missing_columns}")
|
||||
return False
|
||||
|
||||
print("✅ 所有必要的列都存在")
|
||||
|
||||
# 检查数据类型
|
||||
print("\n数据摘要:")
|
||||
print(f"- 年龄范围: {df['age_years'].min()} ~ {df['age_years'].max()} 岁")
|
||||
print(f"- BMI范围: {df['bmi'].min():.1f} ~ {df['bmi'].max():.1f}")
|
||||
print(f"- 收缩压范围: {df['ap_hi'].min()} ~ {df['ap_hi'].max()} mmHg")
|
||||
print(f"- 舒张压范围: {df['ap_lo'].min()} ~ {df['ap_lo'].max()} mmHg")
|
||||
|
||||
# 检查类别转换
|
||||
print("\n类别分布:")
|
||||
print(f"- 性别: {df['gender_str'].value_counts().to_dict()}")
|
||||
print(f"- 心血管疾病: {df['cardio_str'].value_counts().to_dict()}")
|
||||
print(f"- BMI分类: {df['bmi_category'].value_counts().to_dict()}")
|
||||
print(f"- 胆固醇水平: {df['cholesterol_str'].value_counts().to_dict()}")
|
||||
print(f"- 血糖水平: {df['gluc_str'].value_counts().to_dict()}")
|
||||
|
||||
# 检查异常值处理
|
||||
invalid_bp = df['ap_lo'] >= df['ap_hi']
|
||||
if invalid_bp.any():
|
||||
print(f"❌ 仍然存在舒张压 >= 收缩压的记录: {invalid_bp.sum()} 条")
|
||||
return False
|
||||
else:
|
||||
print("✅ 已成功删除舒张压 >= 收缩压的记录")
|
||||
|
||||
# 检查血压范围
|
||||
bp_in_range = ((df['ap_hi'] >= 90) & (df['ap_hi'] <= 250) &
|
||||
(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150))
|
||||
if not bp_in_range.all():
|
||||
print(f"❌ 仍然存在血压异常值: {(~bp_in_range).sum()} 条")
|
||||
return False
|
||||
else:
|
||||
print("✅ 所有血压值都在正常范围内")
|
||||
|
||||
print("\n🎉 所有测试通过!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 测试过程中发生错误: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = test_data_loading()
|
||||
sys.exit(0 if success else 1)
|
||||
BIN
test/module2_predictor/__pycache__/app.cpython-310.pyc
Normal file
BIN
test/module2_predictor/__pycache__/app.cpython-310.pyc
Normal file
Binary file not shown.
Binary file not shown.
396
test/module2_predictor/app.py
Normal file
396
test/module2_predictor/app.py
Normal file
@@ -0,0 +1,396 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
CardioAI - 心血管疾病预测API服务
|
||||
|
||||
功能:
|
||||
1. 加载预训练的机器学习模型
|
||||
2. 提供RESTful API接口
|
||||
3. 接收原始特征值并返回预测结果
|
||||
4. 提供Web前端界面
|
||||
|
||||
启动方式:
|
||||
conda activate cardioenv
|
||||
python app.py
|
||||
或
|
||||
flask run
|
||||
"""
|
||||
|
||||
from flask import Flask, request, jsonify, render_template, send_from_directory
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import joblib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
|
||||
# 添加项目根目录到Python路径
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.append(str(project_root))
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 创建Flask应用
|
||||
app = Flask(__name__)
|
||||
app.config['JSON_AS_ASCII'] = False # 确保JSON支持中文
|
||||
|
||||
# 全局变量存储模型和特征信息
|
||||
model_data = None
|
||||
feature_names = None
|
||||
pipeline = None
|
||||
|
||||
def load_model():
|
||||
"""加载预训练的模型"""
|
||||
global model_data, feature_names, pipeline
|
||||
|
||||
try:
|
||||
# 模型文件路径
|
||||
model_dir = Path(__file__).parent / "models"
|
||||
model_path = model_dir / "cardio_predictor_model.pkl"
|
||||
|
||||
if not model_path.exists():
|
||||
logger.error(f"模型文件不存在: {model_path}")
|
||||
raise FileNotFoundError(f"模型文件不存在: {model_path}")
|
||||
|
||||
# 加载模型
|
||||
logger.info(f"正在加载模型: {model_path}")
|
||||
model_data = joblib.load(model_path)
|
||||
|
||||
# 提取Pipeline和特征信息
|
||||
pipeline = model_data['pipeline']
|
||||
feature_names = model_data.get('feature_names', [])
|
||||
|
||||
logger.info(f"模型加载成功!版本: {model_data.get('model_version', '未知')}")
|
||||
logger.info(f"特征数量: {len(feature_names)}")
|
||||
logger.info(f"特征列表: {feature_names}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"模型加载失败: {str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
return False
|
||||
|
||||
def preprocess_input(input_data):
|
||||
"""
|
||||
预处理输入数据(与训练时相同的处理)
|
||||
|
||||
参数:
|
||||
input_data: 包含原始特征的字典
|
||||
|
||||
返回:
|
||||
pd.DataFrame: 预处理后的特征数据框
|
||||
"""
|
||||
try:
|
||||
# 创建数据框
|
||||
df = pd.DataFrame([input_data])
|
||||
|
||||
# 1. 年龄转换:从天转换为年(四舍五入)
|
||||
if 'age' in df.columns:
|
||||
df['age_years'] = (df['age'] / 365.25).round().astype(int)
|
||||
elif 'age_years' in df.columns:
|
||||
# 如果已经提供了转换后的年龄,直接使用
|
||||
df['age_years'] = df['age_years'].astype(int)
|
||||
else:
|
||||
raise ValueError("输入数据中必须包含'age'或'age_years'字段")
|
||||
|
||||
# 2. 计算BMI: BMI = weight(kg) / (height(m)^2)
|
||||
if 'height' in df.columns and 'weight' in df.columns:
|
||||
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
|
||||
df['bmi'] = df['bmi'].round(2)
|
||||
elif 'bmi' in df.columns:
|
||||
# 如果已经提供了BMI,直接使用
|
||||
df['bmi'] = df['bmi'].astype(float)
|
||||
else:
|
||||
raise ValueError("输入数据中必须包含'height'和'weight'字段或'bmi'字段")
|
||||
|
||||
# 3. 确保所有必要特征都存在
|
||||
required_features = ['age_years', 'bmi', 'ap_hi', 'ap_lo',
|
||||
'gender', 'cholesterol', 'gluc',
|
||||
'smoke', 'alco', 'active']
|
||||
|
||||
missing_features = [f for f in required_features if f not in df.columns]
|
||||
if missing_features:
|
||||
raise ValueError(f"缺少必要特征: {missing_features}")
|
||||
|
||||
# 4. 选择模型需要的特征(按训练时的顺序)
|
||||
processed_df = df[required_features].copy()
|
||||
|
||||
logger.debug(f"预处理后的特征数据框:\n{processed_df}")
|
||||
return processed_df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"数据预处理失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def validate_input(input_data):
|
||||
"""
|
||||
验证输入数据的有效性
|
||||
|
||||
参数:
|
||||
input_data: 输入特征字典
|
||||
|
||||
返回:
|
||||
tuple: (是否有效, 错误消息)
|
||||
"""
|
||||
try:
|
||||
# 检查必需字段
|
||||
required_fields = ['age', 'gender', 'height', 'weight',
|
||||
'ap_hi', 'ap_lo', 'cholesterol', 'gluc',
|
||||
'smoke', 'alco', 'active']
|
||||
|
||||
missing_fields = [f for f in required_fields if f not in input_data]
|
||||
if missing_fields:
|
||||
return False, f"缺少必需字段: {missing_fields}"
|
||||
|
||||
# 检查数据类型
|
||||
for field in required_fields:
|
||||
value = input_data[field]
|
||||
if not isinstance(value, (int, float)):
|
||||
try:
|
||||
# 尝试转换为数值
|
||||
input_data[field] = float(value)
|
||||
except ValueError:
|
||||
return False, f"字段'{field}'必须为数值类型,当前值: {value}"
|
||||
|
||||
# 检查数值范围
|
||||
validations = [
|
||||
('age', 0, 365*150), # 年龄(天):0-150岁
|
||||
('gender', 1, 2), # 性别:1或2
|
||||
('height', 100, 250), # 身高(cm):100-250
|
||||
('weight', 20, 300), # 体重(kg):20-300
|
||||
('ap_hi', 50, 300), # 收缩压:50-300
|
||||
('ap_lo', 30, 200), # 舒张压:30-200
|
||||
('cholesterol', 1, 3), # 胆固醇:1-3
|
||||
('gluc', 1, 3), # 血糖:1-3
|
||||
('smoke', 0, 1), # 吸烟:0或1
|
||||
('alco', 0, 1), # 饮酒:0或1
|
||||
('active', 0, 1) # 活动:0或1
|
||||
]
|
||||
|
||||
for field, min_val, max_val in validations:
|
||||
value = input_data[field]
|
||||
if not (min_val <= value <= max_val):
|
||||
return False, f"字段'{field}'的值{value}超出有效范围[{min_val}, {max_val}]"
|
||||
|
||||
# 检查血压合理性
|
||||
if input_data['ap_lo'] >= input_data['ap_hi']:
|
||||
return False, "舒张压不能高于或等于收缩压"
|
||||
|
||||
return True, "输入数据有效"
|
||||
|
||||
except Exception as e:
|
||||
return False, f"输入数据验证失败: {str(e)}"
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
"""主页 - 返回前端界面"""
|
||||
return render_template('index.html')
|
||||
|
||||
@app.route('/predict_cardio', methods=['POST'])
|
||||
def predict_cardio():
|
||||
"""
|
||||
心血管疾病预测API接口
|
||||
|
||||
请求格式(JSON):
|
||||
{
|
||||
"age": 20228, # 年龄(天)
|
||||
"gender": 1, # 性别(1=女性,2=男性)
|
||||
"height": 156, # 身高(cm)
|
||||
"weight": 85, # 体重(kg)
|
||||
"ap_hi": 140, # 收缩压(mmHg)
|
||||
"ap_lo": 90, # 舒张压(mmHg)
|
||||
"cholesterol": 1, # 胆固醇水平(1=正常,2=高于正常,3=极高)
|
||||
"gluc": 1, # 血糖水平(1=正常,2=高于正常,3=极高)
|
||||
"smoke": 0, # 吸烟(0=否,1=是)
|
||||
"alco": 0, # 饮酒(0=否,1=是)
|
||||
"active": 1 # 体育活动(0=否,1=是)
|
||||
}
|
||||
|
||||
响应格式(JSON):
|
||||
{
|
||||
"success": true,
|
||||
"prediction": 1,
|
||||
"probability": 0.85,
|
||||
"risk_level": "高危",
|
||||
"message": "预测成功",
|
||||
"features": {
|
||||
"age_years": 55,
|
||||
"bmi": 34.9,
|
||||
... // 其他处理后的特征
|
||||
}
|
||||
}
|
||||
"""
|
||||
try:
|
||||
# 检查模型是否已加载
|
||||
if pipeline is None:
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": "模型未加载,请等待或联系管理员"
|
||||
}), 503
|
||||
|
||||
# 获取JSON数据
|
||||
if not request.is_json:
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": "请求必须是JSON格式"
|
||||
}), 400
|
||||
|
||||
input_data = request.get_json()
|
||||
logger.info(f"收到预测请求: {input_data}")
|
||||
|
||||
# 验证输入数据
|
||||
is_valid, error_message = validate_input(input_data)
|
||||
if not is_valid:
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": error_message
|
||||
}), 400
|
||||
|
||||
# 预处理输入数据
|
||||
processed_df = preprocess_input(input_data)
|
||||
|
||||
# 进行预测
|
||||
prediction = pipeline.predict(processed_df)[0]
|
||||
probability = pipeline.predict_proba(processed_df)[0][1] # 类别1的概率
|
||||
|
||||
# 确定风险等级
|
||||
if probability < 0.3:
|
||||
risk_level = "低危"
|
||||
elif probability < 0.6:
|
||||
risk_level = "中危"
|
||||
else:
|
||||
risk_level = "高危"
|
||||
|
||||
# 准备响应数据
|
||||
response_data = {
|
||||
"success": True,
|
||||
"prediction": int(prediction),
|
||||
"probability": float(round(probability, 4)),
|
||||
"risk_level": risk_level,
|
||||
"message": "预测成功",
|
||||
"features": {
|
||||
"age_years": int(processed_df['age_years'].iloc[0]),
|
||||
"bmi": float(round(processed_df['bmi'].iloc[0], 2)),
|
||||
"ap_hi": int(processed_df['ap_hi'].iloc[0]),
|
||||
"ap_lo": int(processed_df['ap_lo'].iloc[0]),
|
||||
"gender": int(processed_df['gender'].iloc[0]),
|
||||
"cholesterol": int(processed_df['cholesterol'].iloc[0]),
|
||||
"gluc": int(processed_df['gluc'].iloc[0]),
|
||||
"smoke": int(processed_df['smoke'].iloc[0]),
|
||||
"alco": int(processed_df['alco'].iloc[0]),
|
||||
"active": int(processed_df['active'].iloc[0])
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(f"预测结果: {response_data}")
|
||||
return jsonify(response_data), 200
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"预测过程中发生错误: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
logger.error(traceback.format_exc())
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": error_msg
|
||||
}), 500
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
def health_check():
|
||||
"""健康检查端点"""
|
||||
try:
|
||||
if pipeline is None:
|
||||
return jsonify({
|
||||
"status": "unhealthy",
|
||||
"message": "模型未加载"
|
||||
}), 503
|
||||
|
||||
# 简单的模型测试
|
||||
test_data = {
|
||||
"age": 20228,
|
||||
"gender": 1,
|
||||
"height": 156,
|
||||
"weight": 85,
|
||||
"ap_hi": 140,
|
||||
"ap_lo": 90,
|
||||
"cholesterol": 1,
|
||||
"gluc": 1,
|
||||
"smoke": 0,
|
||||
"alco": 0,
|
||||
"active": 1
|
||||
}
|
||||
|
||||
processed_df = preprocess_input(test_data)
|
||||
_ = pipeline.predict(processed_df)
|
||||
|
||||
return jsonify({
|
||||
"status": "healthy",
|
||||
"model_version": model_data.get('model_version', '未知'),
|
||||
"features": len(feature_names) if feature_names else 0,
|
||||
"message": "模型服务运行正常"
|
||||
}), 200
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
"status": "unhealthy",
|
||||
"message": f"健康检查失败: {str(e)}"
|
||||
}), 500
|
||||
|
||||
@app.route('/model_info', methods=['GET'])
|
||||
def model_info():
|
||||
"""获取模型信息"""
|
||||
if model_data is None:
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": "模型未加载"
|
||||
}), 503
|
||||
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"model_version": model_data.get('model_version', '未知'),
|
||||
"description": model_data.get('description', 'CardioAI心血管疾病预测模型'),
|
||||
"feature_count": len(feature_names) if feature_names else 0,
|
||||
"features": feature_names if feature_names else []
|
||||
}), 200
|
||||
|
||||
# 模型加载标志
|
||||
_model_loaded = False
|
||||
|
||||
@app.before_request
|
||||
def ensure_model_loaded():
|
||||
"""确保模型已加载(每个请求前检查)"""
|
||||
global pipeline, model_data, feature_names, _model_loaded
|
||||
|
||||
if not _model_loaded:
|
||||
logger.info("首次请求,正在加载模型...")
|
||||
success = load_model()
|
||||
if success:
|
||||
_model_loaded = True
|
||||
logger.info("模型加载完成")
|
||||
else:
|
||||
logger.error("模型加载失败")
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 加载模型
|
||||
success = load_model()
|
||||
if not success:
|
||||
logger.error("启动失败: 模型加载失败")
|
||||
sys.exit(1)
|
||||
|
||||
# 启动Flask应用
|
||||
logger.info("启动CardioAI预测API服务...")
|
||||
logger.info("访问 http://localhost:5000 使用预测界面")
|
||||
logger.info("API文档:")
|
||||
logger.info(" GET / - 前端界面")
|
||||
logger.info(" POST /predict_cardio - 预测接口")
|
||||
logger.info(" GET /health - 健康检查")
|
||||
logger.info(" GET /model_info - 模型信息")
|
||||
|
||||
app.run(host='0.0.0.0', port=5000, debug=True)
|
||||
BIN
test/module2_predictor/models/cardio_predictor_model.pkl
Normal file
BIN
test/module2_predictor/models/cardio_predictor_model.pkl
Normal file
Binary file not shown.
27
test/module2_predictor/models/feature_info.txt
Normal file
27
test/module2_predictor/models/feature_info.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
CardioAI模型特征信息
|
||||
==================================================
|
||||
|
||||
特征列表(按输入顺序):
|
||||
1. age_years
|
||||
2. bmi
|
||||
3. ap_hi
|
||||
4. ap_lo
|
||||
5. gender
|
||||
6. cholesterol
|
||||
7. gluc
|
||||
8. smoke
|
||||
9. alco
|
||||
10. active
|
||||
|
||||
|
||||
特征说明:
|
||||
- age_years: 年龄(岁),由原始天数转换而来
|
||||
- bmi: 身体质量指数,计算公式:体重(kg) / (身高(m)^2)
|
||||
- ap_hi: 收缩压(mmHg)
|
||||
- ap_lo: 舒张压(mmHg)
|
||||
- gender: 性别(1=女性,2=男性)
|
||||
- cholesterol: 胆固醇水平(1=正常,2=高于正常,3=极高)
|
||||
- gluc: 血糖水平(1=正常,2=高于正常,3=极高)
|
||||
- smoke: 吸烟(0=否,1=是)
|
||||
- alco: 饮酒(0=否,1=是)
|
||||
- active: 体育活动(0=否,1=是)
|
||||
858
test/module2_predictor/templates/index.html
Normal file
858
test/module2_predictor/templates/index.html
Normal file
@@ -0,0 +1,858 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>CardioAI - 心血管疾病风险预测</title>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
||||
<style>
|
||||
:root {
|
||||
--primary-color: #e63946;
|
||||
--secondary-color: #457b9d;
|
||||
--success-color: #2a9d8f;
|
||||
--warning-color: #e9c46a;
|
||||
--danger-color: #e63946;
|
||||
--light-color: #f1faee;
|
||||
--dark-color: #1d3557;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: 'Microsoft YaHei', 'Segoe UI', sans-serif;
|
||||
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
|
||||
min-height: 100vh;
|
||||
padding-bottom: 50px;
|
||||
}
|
||||
|
||||
.navbar {
|
||||
background: linear-gradient(to right, var(--dark-color), var(--secondary-color));
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
.navbar-brand {
|
||||
font-weight: bold;
|
||||
font-size: 1.5rem;
|
||||
color: white !important;
|
||||
}
|
||||
|
||||
.card {
|
||||
border: none;
|
||||
border-radius: 15px;
|
||||
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
|
||||
transition: transform 0.3s ease;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.card:hover {
|
||||
transform: translateY(-5px);
|
||||
}
|
||||
|
||||
.card-header {
|
||||
background: linear-gradient(to right, var(--secondary-color), var(--dark-color));
|
||||
color: white;
|
||||
border-radius: 15px 15px 0 0 !important;
|
||||
font-weight: bold;
|
||||
padding: 15px 20px;
|
||||
}
|
||||
|
||||
.form-control, .form-select {
|
||||
border-radius: 8px;
|
||||
border: 1px solid #ddd;
|
||||
padding: 10px 15px;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
|
||||
.form-control:focus, .form-select:focus {
|
||||
border-color: var(--secondary-color);
|
||||
box-shadow: 0 0 0 0.25rem rgba(69, 123, 157, 0.25);
|
||||
}
|
||||
|
||||
.btn-primary {
|
||||
background: linear-gradient(to right, var(--primary-color), var(--secondary-color));
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
padding: 12px 30px;
|
||||
font-weight: bold;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
|
||||
.btn-primary:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 5px 15px rgba(230, 57, 70, 0.3);
|
||||
}
|
||||
|
||||
.btn-secondary {
|
||||
background: linear-gradient(to right, var(--dark-color), #2c3e50);
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
padding: 12px 30px;
|
||||
font-weight: bold;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
|
||||
.btn-secondary:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 5px 15px rgba(29, 53, 87, 0.3);
|
||||
}
|
||||
|
||||
.result-card {
|
||||
border-left: 5px solid var(--secondary-color);
|
||||
}
|
||||
|
||||
.risk-low {
|
||||
color: var(--success-color);
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.risk-medium {
|
||||
color: var(--warning-color);
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.risk-high {
|
||||
color: var(--danger-color);
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.feature-value {
|
||||
background-color: var(--light-color);
|
||||
padding: 5px 10px;
|
||||
border-radius: 5px;
|
||||
font-family: monospace;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.loading {
|
||||
display: none;
|
||||
text-align: center;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.spinner {
|
||||
width: 3rem;
|
||||
height: 3rem;
|
||||
border-width: 0.3em;
|
||||
}
|
||||
|
||||
.alert {
|
||||
border-radius: 10px;
|
||||
border: none;
|
||||
}
|
||||
|
||||
.feature-group {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.feature-label {
|
||||
font-weight: 600;
|
||||
margin-bottom: 5px;
|
||||
color: var(--dark-color);
|
||||
}
|
||||
|
||||
.help-text {
|
||||
font-size: 0.85rem;
|
||||
color: #6c757d;
|
||||
margin-top: 3px;
|
||||
}
|
||||
|
||||
footer {
|
||||
background-color: var(--dark-color);
|
||||
color: white;
|
||||
padding: 20px 0;
|
||||
margin-top: 40px;
|
||||
border-radius: 15px 15px 0 0;
|
||||
}
|
||||
|
||||
.heart-icon {
|
||||
color: var(--primary-color);
|
||||
animation: heartbeat 1.5s infinite;
|
||||
}
|
||||
|
||||
@keyframes heartbeat {
|
||||
0% { transform: scale(1); }
|
||||
5% { transform: scale(1.1); }
|
||||
10% { transform: scale(1); }
|
||||
15% { transform: scale(1.1); }
|
||||
20% { transform: scale(1); }
|
||||
100% { transform: scale(1); }
|
||||
}
|
||||
|
||||
.tooltip-inner {
|
||||
max-width: 300px;
|
||||
text-align: left;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<!-- 导航栏 -->
|
||||
<nav class="navbar navbar-expand-lg navbar-dark">
|
||||
<div class="container">
|
||||
<a class="navbar-brand" href="#">
|
||||
<i class="fas fa-heartbeat me-2 heart-icon"></i>
|
||||
CardioAI - 心血管疾病风险预测系统
|
||||
</a>
|
||||
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="collapse navbar-collapse" id="navbarNav">
|
||||
<ul class="navbar-nav ms-auto">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link active" href="#"><i class="fas fa-home me-1"></i> 首页</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="/health" target="_blank"><i class="fas fa-heart me-1"></i> 服务状态</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="/model_info" target="_blank"><i class="fas fa-info-circle me-1"></i> 模型信息</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<!-- 主内容区 -->
|
||||
<div class="container mt-4">
|
||||
<div class="row">
|
||||
<!-- 左侧:输入表单 -->
|
||||
<div class="col-lg-6">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<i class="fas fa-clipboard-list me-2"></i> 患者基本信息输入
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<form id="predictionForm">
|
||||
<!-- 年龄 -->
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="age">
|
||||
<i class="fas fa-birthday-cake me-1"></i> 年龄(天)
|
||||
</label>
|
||||
<input type="number" class="form-control" id="age" name="age"
|
||||
placeholder="请输入年龄(天数)" min="0" max="36500" required
|
||||
data-bs-toggle="tooltip" data-bs-placement="top"
|
||||
title="输入年龄,单位为天。例如:55岁 = 55 × 365 = 20075天">
|
||||
<div class="help-text">示例:55岁 ≈ 20075天</div>
|
||||
</div>
|
||||
|
||||
<!-- 性别 -->
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="gender">
|
||||
<i class="fas fa-venus-mars me-1"></i> 性别
|
||||
</label>
|
||||
<select class="form-select" id="gender" name="gender" required>
|
||||
<option value="">请选择性别</option>
|
||||
<option value="1">女性</option>
|
||||
<option value="2">男性</option>
|
||||
</select>
|
||||
<div class="help-text">1=女性,2=男性</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<!-- 身高 -->
|
||||
<div class="col-md-6">
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="height">
|
||||
<i class="fas fa-ruler-vertical me-1"></i> 身高(cm)
|
||||
</label>
|
||||
<input type="number" class="form-control" id="height" name="height"
|
||||
placeholder="身高(厘米)" min="100" max="250" required>
|
||||
<div class="help-text">范围:100-250 cm</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 体重 -->
|
||||
<div class="col-md-6">
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="weight">
|
||||
<i class="fas fa-weight me-1"></i> 体重(kg)
|
||||
</label>
|
||||
<input type="number" class="form-control" id="weight" name="weight"
|
||||
placeholder="体重(千克)" min="20" max="300" required>
|
||||
<div class="help-text">范围:20-300 kg</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<!-- 收缩压 -->
|
||||
<div class="col-md-6">
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="ap_hi">
|
||||
<i class="fas fa-tachometer-alt me-1"></i> 收缩压(mmHg)
|
||||
</label>
|
||||
<input type="number" class="form-control" id="ap_hi" name="ap_hi"
|
||||
placeholder="收缩压" min="50" max="300" required>
|
||||
<div class="help-text">范围:50-300 mmHg</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 舒张压 -->
|
||||
<div class="col-md-6">
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="ap_lo">
|
||||
<i class="fas fa-tachometer-alt me-1"></i> 舒张压(mmHg)
|
||||
</label>
|
||||
<input type="number" class="form-control" id="ap_lo" name="ap_lo"
|
||||
placeholder="舒张压" min="30" max="200" required>
|
||||
<div class="help-text">范围:30-200 mmHg</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 胆固醇水平 -->
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="cholesterol">
|
||||
<i class="fas fa-vial me-1"></i> 胆固醇水平
|
||||
</label>
|
||||
<select class="form-select" id="cholesterol" name="cholesterol" required>
|
||||
<option value="">请选择胆固醇水平</option>
|
||||
<option value="1">正常</option>
|
||||
<option value="2">高于正常</option>
|
||||
<option value="3">极高</option>
|
||||
</select>
|
||||
<div class="help-text">1=正常,2=高于正常,3=极高</div>
|
||||
</div>
|
||||
|
||||
<!-- 血糖水平 -->
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="gluc">
|
||||
<i class="fas fa-vial me-1"></i> 血糖水平
|
||||
</label>
|
||||
<select class="form-select" id="gluc" name="gluc" required>
|
||||
<option value="">请选择血糖水平</option>
|
||||
<option value="1">正常</option>
|
||||
<option value="2">高于正常</option>
|
||||
<option value="3">极高</option>
|
||||
</select>
|
||||
<div class="help-text">1=正常,2=高于正常,3=极高</div>
|
||||
</div>
|
||||
|
||||
<!-- 生活方式 -->
|
||||
<div class="row">
|
||||
<div class="col-md-4">
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="smoke">
|
||||
<i class="fas fa-smoking me-1"></i> 吸烟
|
||||
</label>
|
||||
<select class="form-select" id="smoke" name="smoke" required>
|
||||
<option value="0">否</option>
|
||||
<option value="1">是</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-4">
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="alco">
|
||||
<i class="fas fa-wine-glass-alt me-1"></i> 饮酒
|
||||
</label>
|
||||
<select class="form-select" id="alco" name="alco" required>
|
||||
<option value="0">否</option>
|
||||
<option value="1">是</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-4">
|
||||
<div class="feature-group">
|
||||
<label class="feature-label" for="active">
|
||||
<i class="fas fa-running me-1"></i> 体育活动
|
||||
</label>
|
||||
<select class="form-select" id="active" name="active" required>
|
||||
<option value="0">否</option>
|
||||
<option value="1">是</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 按钮组 -->
|
||||
<div class="d-grid gap-2 d-md-flex justify-content-md-end mt-4">
|
||||
<button type="button" class="btn btn-secondary me-md-2" id="btnReset">
|
||||
<i class="fas fa-redo me-1"></i> 重置表单
|
||||
</button>
|
||||
<button type="submit" class="btn btn-primary" id="btnPredict">
|
||||
<i class="fas fa-stethoscope me-1"></i> 开始预测
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<!-- 加载动画 -->
|
||||
<div class="loading mt-4" id="loading">
|
||||
<div class="spinner-border text-primary spinner" role="status">
|
||||
<span class="visually-hidden">加载中...</span>
|
||||
</div>
|
||||
<p class="mt-3">正在分析数据,请稍候...</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 示例数据卡片 -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<i class="fas fa-lightbulb me-2"></i> 示例数据
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p class="card-text">点击下方按钮填充示例数据:</p>
|
||||
<div class="d-grid gap-2">
|
||||
<button type="button" class="btn btn-outline-primary" id="btnExampleLow">
|
||||
<i class="fas fa-user-check me-1"></i> 低风险示例
|
||||
</button>
|
||||
<button type="button" class="btn btn-outline-warning" id="btnExampleMedium">
|
||||
<i class="fas fa-user me-1"></i> 中风险示例
|
||||
</button>
|
||||
<button type="button" class="btn btn-outline-danger" id="btnExampleHigh">
|
||||
<i class="fas fa-user-injured me-1"></i> 高风险示例
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 右侧:结果显示 -->
|
||||
<div class="col-lg-6">
|
||||
<div class="card result-card">
|
||||
<div class="card-header">
|
||||
<i class="fas fa-chart-line me-2"></i> 预测结果分析
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div id="resultPlaceholder" class="text-center">
|
||||
<i class="fas fa-chart-bar fa-4x text-muted mb-3"></i>
|
||||
<h5 class="text-muted">等待预测结果</h5>
|
||||
<p class="text-muted">填写左侧表单并点击"开始预测"按钮,系统将分析您的心血管疾病风险。</p>
|
||||
</div>
|
||||
|
||||
<div id="resultContent" style="display: none;">
|
||||
<!-- 风险等级 -->
|
||||
<div class="alert" id="riskAlert">
|
||||
<h4 class="alert-heading" id="riskTitle"></h4>
|
||||
<p id="riskDescription"></p>
|
||||
<hr>
|
||||
<p class="mb-0" id="riskRecommendation"></p>
|
||||
</div>
|
||||
|
||||
<!-- 预测结果详情 -->
|
||||
<div class="mt-4">
|
||||
<h5><i class="fas fa-info-circle me-2"></i> 预测详情</h5>
|
||||
<table class="table table-borderless">
|
||||
<tr>
|
||||
<th width="40%">预测结果:</th>
|
||||
<td><span class="badge bg-primary" id="predictionResult"></span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>患病概率:</th>
|
||||
<td><span class="feature-value" id="probabilityValue"></span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>风险等级:</th>
|
||||
<td><span id="riskLevel"></span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>处理后的年龄:</th>
|
||||
<td><span class="feature-value" id="ageYears"></span> 岁</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>身体质量指数(BMI):</th>
|
||||
<td><span class="feature-value" id="bmiValue"></span></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- 特征总结 -->
|
||||
<div class="mt-4">
|
||||
<h5><i class="fas fa-list-ul me-2"></i> 输入特征总结</h5>
|
||||
<div class="row" id="featureSummary">
|
||||
<!-- 特征将通过JavaScript动态填充 -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 行动建议 -->
|
||||
<div class="alert alert-info mt-4">
|
||||
<h5><i class="fas fa-hands-helping me-2"></i> 健康建议</h5>
|
||||
<ul id="healthAdvice">
|
||||
<!-- 建议将通过JavaScript动态填充 -->
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 系统信息 -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<i class="fas fa-cogs me-2"></i> 系统信息
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
<p><strong><i class="fas fa-server me-2"></i> 服务状态:</strong>
|
||||
<span class="badge bg-success" id="serviceStatus">正常</span>
|
||||
</p>
|
||||
<p><strong><i class="fas fa-brain me-2"></i> 预测模型:</strong>
|
||||
<span id="modelName">CardioAI XGBoost</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<p><strong><i class="fas fa-history me-2"></i> 响应时间:</strong>
|
||||
<span id="responseTime">--</span> ms
|
||||
</p>
|
||||
<p><strong><i class="fas fa-calendar-alt me-2"></i> 最后更新:</strong>
|
||||
<span id="lastUpdate">2024-04-02</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="d-grid gap-2 d-md-flex justify-content-md-end mt-2">
|
||||
<button class="btn btn-sm btn-outline-secondary" id="btnRefreshStatus">
|
||||
<i class="fas fa-sync-alt me-1"></i> 刷新状态
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 页脚 -->
|
||||
<footer>
|
||||
<div class="container text-center">
|
||||
<p class="mb-2">
|
||||
<i class="fas fa-heartbeat me-2 heart-icon"></i>
|
||||
CardioAI - 心血管疾病智能辅助系统 v1.0
|
||||
</p>
|
||||
<p class="small mb-0">
|
||||
本系统基于机器学习模型提供风险评估,结果仅供参考,不能替代专业医疗诊断。
|
||||
<br>
|
||||
如有健康问题,请及时咨询专业医生。
|
||||
</p>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
<!-- Bootstrap JavaScript -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
|
||||
|
||||
<!-- 自定义JavaScript -->
|
||||
<script>
|
||||
// 页面加载完成后初始化
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
// 初始化工具提示
|
||||
const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
|
||||
tooltipTriggerList.map(function (tooltipTriggerEl) {
|
||||
return new bootstrap.Tooltip(tooltipTriggerEl);
|
||||
});
|
||||
|
||||
// 检查服务状态
|
||||
checkServiceStatus();
|
||||
|
||||
// 绑定事件
|
||||
document.getElementById('predictionForm').addEventListener('submit', handlePrediction);
|
||||
document.getElementById('btnReset').addEventListener('click', resetForm);
|
||||
document.getElementById('btnRefreshStatus').addEventListener('click', checkServiceStatus);
|
||||
|
||||
// 示例数据按钮
|
||||
document.getElementById('btnExampleLow').addEventListener('click', () => fillExampleData('low'));
|
||||
document.getElementById('btnExampleMedium').addEventListener('click', () => fillExampleData('medium'));
|
||||
document.getElementById('btnExampleHigh').addEventListener('click', () => fillExampleData('high'));
|
||||
|
||||
// 初始填充低风险示例
|
||||
setTimeout(() => fillExampleData('low'), 500);
|
||||
});
|
||||
|
||||
// 检查服务状态
|
||||
async function checkServiceStatus() {
|
||||
try {
|
||||
const response = await fetch('/health');
|
||||
const data = await response.json();
|
||||
|
||||
if (data.status === 'healthy') {
|
||||
document.getElementById('serviceStatus').className = 'badge bg-success';
|
||||
document.getElementById('serviceStatus').textContent = '正常';
|
||||
document.getElementById('modelName').textContent = data.model_version || 'CardioAI XGBoost';
|
||||
} else {
|
||||
document.getElementById('serviceStatus').className = 'badge bg-danger';
|
||||
document.getElementById('serviceStatus').textContent = '异常';
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('服务状态检查失败:', error);
|
||||
document.getElementById('serviceStatus').className = 'badge bg-danger';
|
||||
document.getElementById('serviceStatus').textContent = '连接失败';
|
||||
}
|
||||
}
|
||||
|
||||
// 处理预测表单提交
|
||||
async function handlePrediction(event) {
|
||||
event.preventDefault();
|
||||
|
||||
// 显示加载动画
|
||||
document.getElementById('loading').style.display = 'block';
|
||||
document.getElementById('btnPredict').disabled = true;
|
||||
|
||||
// 收集表单数据
|
||||
const formData = {
|
||||
age: parseInt(document.getElementById('age').value),
|
||||
gender: parseInt(document.getElementById('gender').value),
|
||||
height: parseInt(document.getElementById('height').value),
|
||||
weight: parseInt(document.getElementById('weight').value),
|
||||
ap_hi: parseInt(document.getElementById('ap_hi').value),
|
||||
ap_lo: parseInt(document.getElementById('ap_lo').value),
|
||||
cholesterol: parseInt(document.getElementById('cholesterol').value),
|
||||
gluc: parseInt(document.getElementById('gluc').value),
|
||||
smoke: parseInt(document.getElementById('smoke').value),
|
||||
alco: parseInt(document.getElementById('alco').value),
|
||||
active: parseInt(document.getElementById('active').value)
|
||||
};
|
||||
|
||||
// 验证血压
|
||||
if (formData.ap_lo >= formData.ap_hi) {
|
||||
alert('错误:舒张压不能高于或等于收缩压');
|
||||
document.getElementById('loading').style.display = 'none';
|
||||
document.getElementById('btnPredict').disabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
|
||||
// 发送预测请求
|
||||
const response = await fetch('/predict_cardio', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(formData)
|
||||
});
|
||||
|
||||
const responseTime = Date.now() - startTime;
|
||||
document.getElementById('responseTime').textContent = responseTime;
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// 隐藏加载动画
|
||||
document.getElementById('loading').style.display = 'none';
|
||||
document.getElementById('btnPredict').disabled = false;
|
||||
|
||||
if (data.success) {
|
||||
// 显示结果
|
||||
displayPredictionResult(data);
|
||||
} else {
|
||||
alert('预测失败:' + data.message);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('预测请求失败:', error);
|
||||
document.getElementById('loading').style.display = 'none';
|
||||
document.getElementById('btnPredict').disabled = false;
|
||||
alert('网络请求失败,请检查服务器状态');
|
||||
}
|
||||
}
|
||||
|
||||
// 显示预测结果
|
||||
function displayPredictionResult(data) {
|
||||
// 隐藏占位符,显示结果内容
|
||||
document.getElementById('resultPlaceholder').style.display = 'none';
|
||||
document.getElementById('resultContent').style.display = 'block';
|
||||
|
||||
// 更新预测结果
|
||||
const predictionText = data.prediction === 1 ? '有心血管疾病风险' : '无心血管疾病风险';
|
||||
document.getElementById('predictionResult').textContent = predictionText;
|
||||
|
||||
// 更新概率
|
||||
const probabilityPercent = (data.probability * 100).toFixed(1);
|
||||
document.getElementById('probabilityValue').textContent = `${probabilityPercent}%`;
|
||||
|
||||
// 更新风险等级
|
||||
let riskClass = '';
|
||||
let riskIcon = '';
|
||||
if (data.risk_level === '低危') {
|
||||
riskClass = 'risk-low';
|
||||
riskIcon = 'fa-smile';
|
||||
} else if (data.risk_level === '中危') {
|
||||
riskClass = 'risk-medium';
|
||||
riskIcon = 'fa-meh';
|
||||
} else {
|
||||
riskClass = 'risk-high';
|
||||
riskIcon = 'fa-frown';
|
||||
}
|
||||
|
||||
document.getElementById('riskLevel').innerHTML =
|
||||
`<i class="fas ${riskIcon} me-1"></i><span class="${riskClass}">${data.risk_level}</span>`;
|
||||
|
||||
// 更新风险警告框
|
||||
const alertElement = document.getElementById('riskAlert');
|
||||
if (data.risk_level === '低危') {
|
||||
alertElement.className = 'alert alert-success';
|
||||
alertElement.innerHTML = `
|
||||
<h4 class="alert-heading"><i class="fas fa-thumbs-up me-2"></i> 低风险</h4>
|
||||
<p>根据模型分析,您当前的心血管疾病风险较低。继续保持健康的生活方式!</p>
|
||||
<hr>
|
||||
<p class="mb-0">建议定期进行健康检查,维持当前的健康状态。</p>
|
||||
`;
|
||||
} else if (data.risk_level === '中危') {
|
||||
alertElement.className = 'alert alert-warning';
|
||||
alertElement.innerHTML = `
|
||||
<h4 class="alert-heading"><i class="fas fa-exclamation-triangle me-2"></i> 中风险</h4>
|
||||
<p>根据模型分析,您有一定的心血管疾病风险,建议关注相关健康指标。</p>
|
||||
<hr>
|
||||
<p class="mb-0">建议改善生活方式,并考虑进行更详细的医学检查。</p>
|
||||
`;
|
||||
} else {
|
||||
alertElement.className = 'alert alert-danger';
|
||||
alertElement.innerHTML = `
|
||||
<h4 class="alert-heading"><i class="fas fa-exclamation-circle me-2"></i> 高风险</h4>
|
||||
<p>根据模型分析,您的心血管疾病风险较高,建议尽快咨询专业医生。</p>
|
||||
<hr>
|
||||
<p class="mb-0">请及时就医,进行全面的心血管健康评估。</p>
|
||||
`;
|
||||
}
|
||||
|
||||
// 更新处理后的特征
|
||||
document.getElementById('ageYears').textContent = data.features.age_years;
|
||||
document.getElementById('bmiValue').textContent = data.features.bmi;
|
||||
|
||||
// 更新特征总结
|
||||
const featureSummary = document.getElementById('featureSummary');
|
||||
featureSummary.innerHTML = `
|
||||
<div class="col-md-6">
|
||||
<p><strong>年龄:</strong> ${data.features.age_years} 岁</p>
|
||||
<p><strong>性别:</strong> ${data.features.gender === 1 ? '女性' : '男性'}</p>
|
||||
<p><strong>BMI:</strong> ${data.features.bmi}</p>
|
||||
<p><strong>血压:</strong> ${data.features.ap_hi}/${data.features.ap_lo} mmHg</p>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<p><strong>胆固醇:</strong> ${getCholesterolText(data.features.cholesterol)}</p>
|
||||
<p><strong>血糖:</strong> ${getGlucText(data.features.gluc)}</p>
|
||||
<p><strong>吸烟:</strong> ${data.features.smoke === 1 ? '是' : '否'}</p>
|
||||
<p><strong>饮酒:</strong> ${data.features.alco === 1 ? '是' : '否'}</p>
|
||||
<p><strong>体育活动:</strong> ${data.features.active === 1 ? '是' : '否'}</p>
|
||||
</div>
|
||||
`;
|
||||
|
||||
// 更新健康建议
|
||||
const healthAdvice = document.getElementById('healthAdvice');
|
||||
let adviceItems = [];
|
||||
|
||||
if (data.features.bmi > 25) {
|
||||
adviceItems.push('<li>您的BMI偏高,建议控制体重,保持健康饮食</li>');
|
||||
}
|
||||
|
||||
if (data.features.ap_hi > 140 || data.features.ap_lo > 90) {
|
||||
adviceItems.push('<li>您的血压偏高,建议定期监测血压,减少盐分摄入</li>');
|
||||
}
|
||||
|
||||
if (data.features.cholesterol > 1) {
|
||||
adviceItems.push('<li>您的胆固醇水平偏高,建议减少高胆固醇食物摄入</li>');
|
||||
}
|
||||
|
||||
if (data.features.gluc > 1) {
|
||||
adviceItems.push('<li>您的血糖水平偏高,建议控制糖分摄入,定期监测血糖</li>');
|
||||
}
|
||||
|
||||
if (data.features.smoke === 1) {
|
||||
adviceItems.push('<li>吸烟是心血管疾病的重要风险因素,建议戒烟</li>');
|
||||
}
|
||||
|
||||
if (data.features.active === 0) {
|
||||
adviceItems.push('<li>缺乏体育活动,建议每周进行至少150分钟的中等强度运动</li>');
|
||||
}
|
||||
|
||||
if (adviceItems.length === 0) {
|
||||
adviceItems.push('<li>保持当前健康的生活方式,定期进行体检</li>');
|
||||
}
|
||||
|
||||
healthAdvice.innerHTML = adviceItems.join('');
|
||||
}
|
||||
|
||||
// 重置表单
|
||||
function resetForm() {
|
||||
document.getElementById('predictionForm').reset();
|
||||
document.getElementById('resultPlaceholder').style.display = 'block';
|
||||
document.getElementById('resultContent').style.display = 'none';
|
||||
}
|
||||
|
||||
// 填充示例数据
|
||||
function fillExampleData(type) {
|
||||
let exampleData;
|
||||
|
||||
switch(type) {
|
||||
case 'low':
|
||||
exampleData = {
|
||||
age: 18000, // 约49岁
|
||||
gender: 1, // 女性
|
||||
height: 165,
|
||||
weight: 60,
|
||||
ap_hi: 120,
|
||||
ap_lo: 80,
|
||||
cholesterol: 1,
|
||||
gluc: 1,
|
||||
smoke: 0,
|
||||
alco: 0,
|
||||
active: 1
|
||||
};
|
||||
break;
|
||||
|
||||
case 'medium':
|
||||
exampleData = {
|
||||
age: 25000, // 约68岁
|
||||
gender: 2, // 男性
|
||||
height: 170,
|
||||
weight: 80,
|
||||
ap_hi: 140,
|
||||
ap_lo: 90,
|
||||
cholesterol: 2,
|
||||
gluc: 1,
|
||||
smoke: 1,
|
||||
alco: 1,
|
||||
active: 0
|
||||
};
|
||||
break;
|
||||
|
||||
case 'high':
|
||||
exampleData = {
|
||||
age: 30000, // 约82岁
|
||||
gender: 2, // 男性
|
||||
height: 168,
|
||||
weight: 95,
|
||||
ap_hi: 160,
|
||||
ap_lo: 100,
|
||||
cholesterol: 3,
|
||||
gluc: 2,
|
||||
smoke: 1,
|
||||
alco: 1,
|
||||
active: 0
|
||||
};
|
||||
break;
|
||||
}
|
||||
|
||||
// 填充表单
|
||||
for (const [key, value] of Object.entries(exampleData)) {
|
||||
const element = document.getElementById(key);
|
||||
if (element) {
|
||||
element.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
// 重置结果显示
|
||||
document.getElementById('resultPlaceholder').style.display = 'block';
|
||||
document.getElementById('resultContent').style.display = 'none';
|
||||
}
|
||||
|
||||
// 辅助函数:获取胆固醇文本描述
|
||||
function getCholesterolText(value) {
|
||||
switch(value) {
|
||||
case 1: return '正常';
|
||||
case 2: return '高于正常';
|
||||
case 3: return '极高';
|
||||
default: return '未知';
|
||||
}
|
||||
}
|
||||
|
||||
// 辅助函数:获取血糖文本描述
|
||||
function getGlucText(value) {
|
||||
switch(value) {
|
||||
case 1: return '正常';
|
||||
case 2: return '高于正常';
|
||||
case 3: return '极高';
|
||||
default: return '未知';
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
172
test/module2_predictor/test_api.py
Normal file
172
test/module2_predictor/test_api.py
Normal file
@@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
CardioAI API测试脚本
|
||||
|
||||
测试模型加载和预测功能
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# 添加项目根目录到Python路径
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.append(str(project_root))
|
||||
|
||||
# 导入Flask应用中的函数
|
||||
from app import load_model, preprocess_input
|
||||
|
||||
def test_model_loading():
|
||||
"""测试模型加载"""
|
||||
print("测试模型加载...")
|
||||
try:
|
||||
success = load_model()
|
||||
if success:
|
||||
print("✅ 模型加载成功")
|
||||
return True
|
||||
else:
|
||||
print("❌ 模型加载失败")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ 模型加载异常: {str(e)}")
|
||||
return False
|
||||
|
||||
def test_data_preprocessing():
|
||||
"""测试数据预处理"""
|
||||
print("\n测试数据预处理...")
|
||||
|
||||
# 测试数据
|
||||
test_data = {
|
||||
"age": 20228, # 约55岁
|
||||
"gender": 1, # 女性
|
||||
"height": 156, # 身高(cm)
|
||||
"weight": 85, # 体重(kg)
|
||||
"ap_hi": 140, # 收缩压(mmHg)
|
||||
"ap_lo": 90, # 舒张压(mmHg)
|
||||
"cholesterol": 1, # 胆固醇水平
|
||||
"gluc": 1, # 血糖水平
|
||||
"smoke": 0, # 吸烟
|
||||
"alco": 0, # 饮酒
|
||||
"active": 1 # 体育活动
|
||||
}
|
||||
|
||||
try:
|
||||
processed_df = preprocess_input(test_data)
|
||||
print(f"✅ 数据预处理成功")
|
||||
print(f" 处理后的特征:")
|
||||
for col in processed_df.columns:
|
||||
print(f" {col}: {processed_df[col].iloc[0]}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ 数据预处理失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def test_prediction():
|
||||
"""测试预测功能"""
|
||||
print("\n测试预测功能...")
|
||||
|
||||
# 需要导入pipeline
|
||||
from app import pipeline
|
||||
|
||||
if pipeline is None:
|
||||
print("❌ 模型未加载,无法测试预测")
|
||||
return False
|
||||
|
||||
# 测试数据
|
||||
test_data = {
|
||||
"age": 20228,
|
||||
"gender": 1,
|
||||
"height": 156,
|
||||
"weight": 85,
|
||||
"ap_hi": 140,
|
||||
"ap_lo": 90,
|
||||
"cholesterol": 1,
|
||||
"gluc": 1,
|
||||
"smoke": 0,
|
||||
"alco": 0,
|
||||
"active": 1
|
||||
}
|
||||
|
||||
try:
|
||||
processed_df = preprocess_input(test_data)
|
||||
prediction = pipeline.predict(processed_df)[0]
|
||||
probability = pipeline.predict_proba(processed_df)[0][1]
|
||||
|
||||
print(f"✅ 预测成功")
|
||||
print(f" 预测结果: {prediction} ({'有风险' if prediction == 1 else '无风险'})")
|
||||
print(f" 患病概率: {probability:.4f} ({(probability*100):.1f}%)")
|
||||
|
||||
# 确定风险等级
|
||||
if probability < 0.3:
|
||||
risk_level = "低危"
|
||||
elif probability < 0.6:
|
||||
risk_level = "中危"
|
||||
else:
|
||||
risk_level = "高危"
|
||||
|
||||
print(f" 风险等级: {risk_level}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ 预测失败: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def test_api_endpoint():
|
||||
"""测试API端点(需要启动服务器)"""
|
||||
print("\n测试API端点...")
|
||||
print("注意:此测试需要Flask服务器正在运行")
|
||||
print("请先启动Flask应用,然后运行此测试")
|
||||
|
||||
# 这里可以添加实际的HTTP请求测试
|
||||
# 但为了简单起见,我们只是提示用户
|
||||
print("使用以下命令启动服务器:")
|
||||
print(' cd "D:\\Project\\PythonProject\\AIcode\\test"')
|
||||
print(' "D:\\software\\anaconda\\envs\\cardioenv\\python.exe" module2_predictor/app.py')
|
||||
print("\n然后使用curl或浏览器测试API:")
|
||||
print(' curl -X POST http://localhost:5000/predict_cardio \\')
|
||||
print(' -H "Content-Type: application/json" \\')
|
||||
print(' -d \'{"age":20228,"gender":1,"height":156,"weight":85,"ap_hi":140,"ap_lo":90,"cholesterol":1,"gluc":1,"smoke":0,"alco":0,"active":1}\'')
|
||||
|
||||
def main():
|
||||
"""主测试函数"""
|
||||
print("=" * 60)
|
||||
print("CardioAI API 测试")
|
||||
print("=" * 60)
|
||||
|
||||
# 测试模型加载
|
||||
model_loaded = test_model_loading()
|
||||
|
||||
if model_loaded:
|
||||
# 测试数据预处理
|
||||
preprocessing_ok = test_data_preprocessing()
|
||||
|
||||
# 测试预测功能
|
||||
prediction_ok = test_prediction()
|
||||
|
||||
# 汇总结果
|
||||
print("\n" + "=" * 60)
|
||||
print("测试结果汇总:")
|
||||
print(f" 模型加载: {'✅ 通过' if model_loaded else '❌ 失败'}")
|
||||
print(f" 数据预处理: {'✅ 通过' if preprocessing_ok else '❌ 失败'}")
|
||||
print(f" 预测功能: {'✅ 通过' if prediction_ok else '❌ 失败'}")
|
||||
|
||||
if model_loaded and preprocessing_ok and prediction_ok:
|
||||
print("\n🎉 所有测试通过!")
|
||||
print("Flask API可以正常运行。")
|
||||
return True
|
||||
else:
|
||||
print("\n⚠️ 部分测试失败,请检查问题。")
|
||||
return False
|
||||
else:
|
||||
print("\n❌ 模型加载失败,无法继续测试。")
|
||||
return False
|
||||
|
||||
# 显示API测试说明
|
||||
test_api_endpoint()
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
331
test/module2_predictor/train_and_save.py
Normal file
331
test/module2_predictor/train_and_save.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
CardioAI - 心血管疾病预测模型训练脚本
|
||||
|
||||
功能:
|
||||
1. 加载和清洗数据(与模块1相同的流程)
|
||||
2. 特征工程:年龄转换、BMI计算、异常值处理
|
||||
3. 构建机器学习Pipeline
|
||||
4. 训练XGBoost分类器
|
||||
5. 保存完整Pipeline到文件
|
||||
|
||||
注意:此脚本为一次性训练脚本,生成模型文件供Flask应用使用。
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.pipeline import Pipeline
|
||||
from xgboost import XGBClassifier
|
||||
import joblib
|
||||
import warnings
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# 忽略警告
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
# 添加项目根目录到Python路径
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.append(str(project_root))
|
||||
|
||||
def load_and_preprocess_data():
|
||||
"""
|
||||
加载数据并进行预处理(与模块1相同的清洗和特征工程)
|
||||
|
||||
返回:
|
||||
pd.DataFrame: 预处理后的数据框
|
||||
"""
|
||||
print("开始加载和预处理数据...")
|
||||
|
||||
# 数据文件路径
|
||||
data_path = project_root / "data" / "心血管疾病.xlsx"
|
||||
|
||||
try:
|
||||
# 加载数据
|
||||
df = pd.read_excel(data_path)
|
||||
print(f"原始数据形状: {df.shape}")
|
||||
|
||||
# 检查必要列
|
||||
required_columns = ['id', 'age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
|
||||
'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio']
|
||||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||||
if missing_columns:
|
||||
raise ValueError(f"数据文件中缺少必要列: {missing_columns}")
|
||||
|
||||
# 创建数据副本
|
||||
df_processed = df.copy()
|
||||
|
||||
# 1. 年龄转换:从天转换为年(四舍五入)
|
||||
df_processed['age_years'] = (df_processed['age'] / 365.25).round().astype(int)
|
||||
|
||||
# 2. 计算BMI: BMI = weight(kg) / (height(m)^2)
|
||||
df_processed['bmi'] = df_processed['weight'] / ((df_processed['height'] / 100) ** 2)
|
||||
df_processed['bmi'] = df_processed['bmi'].round(2)
|
||||
|
||||
# 3. 异常值处理
|
||||
# 删除舒张压 >= 收缩压的记录
|
||||
invalid_bp = df_processed['ap_lo'] >= df_processed['ap_hi']
|
||||
if invalid_bp.any():
|
||||
print(f"删除 {invalid_bp.sum()} 条舒张压 >= 收缩压的异常记录")
|
||||
df_processed = df_processed[~invalid_bp].copy()
|
||||
|
||||
# 删除血压极端异常值
|
||||
# 收缩压 ∈ [90, 250], 舒张压 ∈ [60, 150]
|
||||
bp_outliers = ~((df_processed['ap_hi'] >= 90) & (df_processed['ap_hi'] <= 250) &
|
||||
(df_processed['ap_lo'] >= 60) & (df_processed['ap_lo'] <= 150))
|
||||
if bp_outliers.any():
|
||||
print(f"删除 {bp_outliers.sum()} 条血压极端异常值记录")
|
||||
df_processed = df_processed[~bp_outliers].copy()
|
||||
|
||||
# 4. 删除不需要的列
|
||||
# 删除id和原始age字段(使用转换后的age_years)
|
||||
df_processed = df_processed.drop(['id', 'age'], axis=1)
|
||||
|
||||
print(f"预处理后数据形状: {df_processed.shape}")
|
||||
print("数据预处理完成!")
|
||||
|
||||
return df_processed
|
||||
|
||||
except Exception as e:
|
||||
print(f"数据加载和预处理失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def prepare_features_and_target(df):
|
||||
"""
|
||||
准备特征矩阵X和目标向量y
|
||||
|
||||
参数:
|
||||
df: 预处理后的数据框
|
||||
|
||||
返回:
|
||||
X: 特征矩阵
|
||||
y: 目标向量
|
||||
feature_names: 特征名称列表
|
||||
"""
|
||||
print("准备特征和目标变量...")
|
||||
|
||||
# 目标变量
|
||||
y = df['cardio'].values
|
||||
|
||||
# 特征矩阵 - 删除目标变量
|
||||
X = df.drop('cardio', axis=1)
|
||||
|
||||
print(f"特征矩阵形状: {X.shape}")
|
||||
print(f"目标变量分布: 0={sum(y==0)}, 1={sum(y==1)}")
|
||||
|
||||
return X, y, X.columns.tolist()
|
||||
|
||||
def build_pipeline():
|
||||
"""
|
||||
构建机器学习Pipeline
|
||||
|
||||
返回:
|
||||
Pipeline: 包含预处理和分类器的完整Pipeline
|
||||
"""
|
||||
print("构建机器学习Pipeline...")
|
||||
|
||||
# 定义特征类型
|
||||
# 连续特征:需要标准化
|
||||
numerical_features = ['age_years', 'bmi', 'ap_hi', 'ap_lo']
|
||||
|
||||
# 分类特征:需要独热编码
|
||||
categorical_features = ['gender', 'cholesterol', 'gluc']
|
||||
|
||||
# 二元特征:直接使用(不需要编码)
|
||||
binary_features = ['smoke', 'alco', 'active']
|
||||
|
||||
# 所有特征顺序
|
||||
all_features = numerical_features + categorical_features + binary_features
|
||||
|
||||
# 创建列转换器
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
('num', StandardScaler(), numerical_features),
|
||||
('cat', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'),
|
||||
categorical_features),
|
||||
# 二元特征直接通过(不进行变换)
|
||||
('binary', 'passthrough', binary_features)
|
||||
],
|
||||
remainder='drop' # 丢弃其他列
|
||||
)
|
||||
|
||||
# 创建完整Pipeline
|
||||
pipeline = Pipeline([
|
||||
('preprocessor', preprocessor),
|
||||
('classifier', XGBClassifier(
|
||||
n_estimators=100,
|
||||
max_depth=5,
|
||||
learning_rate=0.1,
|
||||
subsample=0.8,
|
||||
colsample_bytree=0.8,
|
||||
random_state=42,
|
||||
eval_metric='logloss',
|
||||
use_label_encoder=False
|
||||
))
|
||||
])
|
||||
|
||||
print("Pipeline构建完成!")
|
||||
return pipeline, all_features
|
||||
|
||||
def train_model(X, y, pipeline):
|
||||
"""
|
||||
训练模型
|
||||
|
||||
参数:
|
||||
X: 特征矩阵
|
||||
y: 目标向量
|
||||
pipeline: 机器学习Pipeline
|
||||
|
||||
返回:
|
||||
训练好的Pipeline
|
||||
"""
|
||||
print("开始训练模型...")
|
||||
|
||||
# 划分训练集和测试集
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=42, stratify=y
|
||||
)
|
||||
|
||||
print(f"训练集大小: {X_train.shape}")
|
||||
print(f"测试集大小: {X_test.shape}")
|
||||
|
||||
# 训练模型
|
||||
pipeline.fit(X_train, y_train)
|
||||
|
||||
# 评估模型
|
||||
train_score = pipeline.score(X_train, y_train)
|
||||
test_score = pipeline.score(X_test, y_test)
|
||||
|
||||
print(f"训练集准确率: {train_score:.4f}")
|
||||
print(f"测试集准确率: {test_score:.4f}")
|
||||
|
||||
# 特征重要性(如果可用)
|
||||
if hasattr(pipeline.named_steps['classifier'], 'feature_importances_'):
|
||||
importances = pipeline.named_steps['classifier'].feature_importances_
|
||||
print(f"特征重要性数量: {len(importances)}")
|
||||
|
||||
# 获取特征名称(需要从预处理器中提取)
|
||||
preprocessor = pipeline.named_steps['preprocessor']
|
||||
|
||||
# 获取转换后的特征名称
|
||||
feature_names = []
|
||||
|
||||
# 数值特征名称
|
||||
feature_names.extend(preprocessor.transformers_[0][2])
|
||||
|
||||
# 分类特征名称(独热编码后)
|
||||
if len(preprocessor.transformers_) > 1:
|
||||
cat_encoder = preprocessor.transformers_[1][1]
|
||||
if hasattr(cat_encoder, 'get_feature_names_out'):
|
||||
cat_features = cat_encoder.get_feature_names_out(
|
||||
preprocessor.transformers_[1][2]
|
||||
)
|
||||
feature_names.extend(cat_features)
|
||||
|
||||
# 二元特征名称
|
||||
if len(preprocessor.transformers_) > 2:
|
||||
feature_names.extend(preprocessor.transformers_[2][2])
|
||||
|
||||
# 打印最重要的特征
|
||||
if len(feature_names) == len(importances):
|
||||
print("\nTop 10 特征重要性:")
|
||||
indices = np.argsort(importances)[::-1]
|
||||
for i in range(min(10, len(importances))):
|
||||
print(f" {feature_names[indices[i]]}: {importances[indices[i]]:.4f}")
|
||||
|
||||
return pipeline
|
||||
|
||||
def save_pipeline(pipeline, all_features):
|
||||
"""
|
||||
保存Pipeline到文件
|
||||
|
||||
参数:
|
||||
pipeline: 训练好的Pipeline
|
||||
all_features: 特征名称列表
|
||||
"""
|
||||
print("保存模型和特征信息...")
|
||||
|
||||
# 创建模型保存目录
|
||||
model_dir = Path(__file__).parent / "models"
|
||||
model_dir.mkdir(exist_ok=True)
|
||||
|
||||
# 模型文件路径
|
||||
model_path = model_dir / "cardio_predictor_model.pkl"
|
||||
|
||||
# 保存Pipeline对象
|
||||
model_data = {
|
||||
'pipeline': pipeline,
|
||||
'feature_names': all_features,
|
||||
'model_version': '1.0.0',
|
||||
'description': 'CardioAI心血管疾病预测模型'
|
||||
}
|
||||
|
||||
joblib.dump(model_data, model_path)
|
||||
print(f"模型已保存到: {model_path}")
|
||||
|
||||
# 保存特征信息到单独文件(可选)
|
||||
features_path = model_dir / "feature_info.txt"
|
||||
with open(features_path, 'w', encoding='utf-8') as f:
|
||||
f.write("CardioAI模型特征信息\n")
|
||||
f.write("=" * 50 + "\n\n")
|
||||
f.write("特征列表(按输入顺序):\n")
|
||||
for i, feature in enumerate(all_features, 1):
|
||||
f.write(f"{i:2d}. {feature}\n")
|
||||
|
||||
f.write("\n\n特征说明:\n")
|
||||
f.write("- age_years: 年龄(岁),由原始天数转换而来\n")
|
||||
f.write("- bmi: 身体质量指数,计算公式:体重(kg) / (身高(m)^2)\n")
|
||||
f.write("- ap_hi: 收缩压(mmHg)\n")
|
||||
f.write("- ap_lo: 舒张压(mmHg)\n")
|
||||
f.write("- gender: 性别(1=女性,2=男性)\n")
|
||||
f.write("- cholesterol: 胆固醇水平(1=正常,2=高于正常,3=极高)\n")
|
||||
f.write("- gluc: 血糖水平(1=正常,2=高于正常,3=极高)\n")
|
||||
f.write("- smoke: 吸烟(0=否,1=是)\n")
|
||||
f.write("- alco: 饮酒(0=否,1=是)\n")
|
||||
f.write("- active: 体育活动(0=否,1=是)\n")
|
||||
|
||||
print(f"特征信息已保存到: {features_path}")
|
||||
|
||||
return model_path
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
print("=" * 60)
|
||||
print("CardioAI - 心血管疾病预测模型训练")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# 1. 加载和预处理数据
|
||||
df = load_and_preprocess_data()
|
||||
|
||||
# 2. 准备特征和目标
|
||||
X, y, original_features = prepare_features_and_target(df)
|
||||
|
||||
# 3. 构建Pipeline
|
||||
pipeline, all_features = build_pipeline()
|
||||
|
||||
# 4. 训练模型
|
||||
trained_pipeline = train_model(X, y, pipeline)
|
||||
|
||||
# 5. 保存模型
|
||||
model_path = save_pipeline(trained_pipeline, all_features)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("模型训练完成!")
|
||||
print(f"模型文件: {model_path}")
|
||||
print("下一步:使用Flask应用部署模型")
|
||||
print("=" * 60)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n训练过程出现错误: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
33
test/requirements.txt
Normal file
33
test/requirements.txt
Normal file
@@ -0,0 +1,33 @@
|
||||
# CardioAI - Cardiovascular Disease Intelligent Assistant System
|
||||
# Python dependencies for the multi-module application
|
||||
|
||||
# Conda Environment Setup Instructions:
|
||||
# 1. Create a new conda environment named 'cardioenv' with Python 3.10:
|
||||
# conda create -n cardioenv python=3.10
|
||||
# 2. Activate the environment:
|
||||
# conda activate cardioenv
|
||||
# 3. Install dependencies from this file:
|
||||
# pip install -r requirements.txt
|
||||
|
||||
# Core data processing and machine learning
|
||||
pandas>=2.0.0
|
||||
openpyxl>=3.1.0
|
||||
numpy>=1.24.0
|
||||
scikit-learn>=1.3.0
|
||||
xgboost>=2.0.0
|
||||
joblib>=1.3.0
|
||||
|
||||
# Data visualization and dashboard
|
||||
streamlit>=1.28.0
|
||||
plotly>=5.18.0
|
||||
|
||||
# Web API and prediction server
|
||||
Flask>=3.0.0
|
||||
|
||||
# Environment configuration
|
||||
python-dotenv>=1.0.0
|
||||
|
||||
# AI and language model integration
|
||||
langchain-openai>=0.0.5
|
||||
dashscope>=1.14.0
|
||||
requests>=2.31.0
|
||||
1
zzs_test
Submodule
1
zzs_test
Submodule
Submodule zzs_test added at bf30e493dd
1
zzs_test4
Submodule
1
zzs_test4
Submodule
Submodule zzs_test4 added at 39b398e159
Reference in New Issue
Block a user