update code
This commit is contained in:
15
.claude/settings.local.json
Normal file
15
.claude/settings.local.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(mkdir:*)",
|
||||
"Bash(cd ..:*)",
|
||||
"Bash(ls:*)",
|
||||
"Bash(echo 语法检查失败:*)",
|
||||
"Bash(head:*)",
|
||||
"Bash(cd module1_dashboard && python -m py_compile cardio_dashboard.py 2>&1)",
|
||||
"Bash(cd:*)",
|
||||
"Bash(python:*)",
|
||||
"Bash(chmod +x:*)"
|
||||
]
|
||||
}
|
||||
}
|
||||
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
7
.idea/misc.xml
generated
Normal file
7
.idea/misc.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="cardioenv" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="cardioenv" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/zion_agent.iml" filepath="$PROJECT_DIR$/.idea/zion_agent.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
7
.idea/vcs.xml
generated
Normal file
7
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
15
.idea/zion_agent.iml
generated
Normal file
15
.idea/zion_agent.iml
generated
Normal file
@@ -0,0 +1,15 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="cardioenv" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TemplatesService">
|
||||
<option name="TEMPLATE_FOLDERS">
|
||||
<list>
|
||||
<option value="$MODULE_DIR$/aicodes/module2_predictor/templates" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
</module>
|
||||
70
aicodes/README.md
Normal file
70
aicodes/README.md
Normal file
@@ -0,0 +1,70 @@
|
||||
# CardioAI - 心血管疾病智能辅助系统
|
||||
|
||||
## 项目概述
|
||||
本项目是一个多模块应用,集成了数据可视化(Streamlit)、机器学习预测(XGBoost/Flask)和AI语音问答(DeepSeek/cosyVoice)。
|
||||
|
||||
## 项目结构
|
||||
```
|
||||
F:\Project\PythonProject\zion_agent\
|
||||
├── .env # 环境配置文件
|
||||
├── data\ # 数据文件目录
|
||||
│ └── 心血管疾病.xlsx # 心血管疾病数据集
|
||||
├── aicodes\ # 源代码根目录
|
||||
│ ├── requirements.txt # 项目依赖包列表
|
||||
│ ├── data\ # 项目数据目录
|
||||
│ ├── module1_dashboard\ # 模块1: Streamlit数据可视化
|
||||
│ ├── module2_predictor\ # 模块2: Flask+XGBoost预测模型
|
||||
│ │ └── templates\ # Flask模板文件
|
||||
│ └── module3_voice_assistant\ # 模块3: AI语音问答系统
|
||||
│ └── templates\ # 语音问答模板文件
|
||||
```
|
||||
|
||||
## 环境配置
|
||||
|
||||
### 1. 创建Conda虚拟环境
|
||||
```bash
|
||||
conda create -n cardioenv python=3.10
|
||||
conda activate cardioenv
|
||||
```
|
||||
|
||||
### 2. 安装依赖包
|
||||
```bash
|
||||
cd aicodes
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 3. 配置环境变量
|
||||
在 `.env` 文件中配置必要的API密钥和参数:
|
||||
```
|
||||
# 示例配置
|
||||
OPENAI_API_KEY=your_openai_api_key_here
|
||||
DASHSCOPE_API_KEY=your_dashscope_api_key_here
|
||||
```
|
||||
|
||||
## 各模块说明
|
||||
|
||||
### Module 1: 数据可视化看板 (Streamlit)
|
||||
- 位置: `module1_dashboard/`
|
||||
- 功能: 心血管疾病数据可视化分析
|
||||
- 技术栈: Streamlit + Plotly + Pandas
|
||||
|
||||
### Module 2: 疾病预测模型 (Flask + XGBoost)
|
||||
- 位置: `module2_predictor/`
|
||||
- 功能: 心血管疾病风险预测
|
||||
- 技术栈: Flask + XGBoost + Scikit-learn
|
||||
|
||||
### Module 3: AI语音问答助手
|
||||
- 位置: `module3_voice_assistant/`
|
||||
- 功能: 基于语音的智能问答系统
|
||||
- 技术栈: DeepSeek API + cosyVoice + LangChain
|
||||
|
||||
## 下一步操作
|
||||
1. 将 `心血管疾病.xlsx` 数据文件放入 `data/` 目录
|
||||
2. 根据需要配置 `.env` 文件中的API密钥
|
||||
3. 开始开发各个模块的功能
|
||||
|
||||
## 开发说明
|
||||
- 请严格按照给定的文件路径进行开发
|
||||
- 数据文件路径: `F:\Project\PythonProject\zion_agent\data\心血管疾病.xlsx`
|
||||
- 配置文件路径: `F:\Project\PythonProject\zion_agent\.env`
|
||||
- 代码根目录: `F:\Project\PythonProject\zion_agent\aicodes`
|
||||
BIN
aicodes/data/心血管疾病.xlsx
Normal file
BIN
aicodes/data/心血管疾病.xlsx
Normal file
Binary file not shown.
115
aicodes/module1_dashboard/README.md
Normal file
115
aicodes/module1_dashboard/README.md
Normal file
@@ -0,0 +1,115 @@
|
||||
# Module 1: 心血管疾病数据可视化看板
|
||||
|
||||
## 概述
|
||||
这是一个基于Streamlit的交互式数据可视化应用程序,用于分析和可视化心血管疾病数据。
|
||||
|
||||
## 功能特性
|
||||
- 数据加载与清洗
|
||||
- 特征工程(年龄转换、BMI计算)
|
||||
- 异常值处理(血压数据)
|
||||
- 交互式数据筛选
|
||||
- 多种可视化图表
|
||||
- 数据导出功能
|
||||
|
||||
## 文件结构
|
||||
```
|
||||
module1_dashboard/
|
||||
├── cardio_dashboard.py # 主应用程序文件
|
||||
└── README.md # 本说明文件
|
||||
```
|
||||
|
||||
## 运行要求
|
||||
|
||||
### 1. 环境配置
|
||||
确保已创建并激活conda虚拟环境:
|
||||
```bash
|
||||
conda create -n cardioenv python=3.10
|
||||
conda activate cardioenv
|
||||
```
|
||||
|
||||
### 2. 安装依赖
|
||||
```bash
|
||||
cd aicodes
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 3. 准备数据文件
|
||||
将 `心血管疾病.xlsx` 文件放置在以下路径:
|
||||
```
|
||||
F:\Project\PythonProject\zion_agent\data\心血管疾病.xlsx
|
||||
```
|
||||
|
||||
如果数据文件不存在,应用程序将使用示例数据进行演示。
|
||||
|
||||
## 运行应用程序
|
||||
|
||||
### 方式1: 使用Streamlit命令行
|
||||
```bash
|
||||
cd aicodes/module1_dashboard
|
||||
streamlit run cardio_dashboard.py
|
||||
```
|
||||
|
||||
### 方式2: 使用指定的Python解释器
|
||||
```bash
|
||||
"C:\Users\Yeraphael\.conda\envs\cardioenv\python.exe" -m streamlit run cardio_dashboard.py
|
||||
```
|
||||
|
||||
## 应用程序界面
|
||||
|
||||
### 侧边栏筛选器
|
||||
- **年龄范围滑块**: 筛选指定年龄范围的记录
|
||||
- **性别多选框**: 筛选指定性别的记录(1=男性,2=女性)
|
||||
- **心血管疾病状态**: 筛选疾病状态(0=无疾病,1=有疾病)
|
||||
|
||||
### 主页展示
|
||||
- **总记录数**: 筛选后的数据记录总数
|
||||
- **心血管疾病风险率**: 心血管疾病患者的比例
|
||||
- **平均年龄**: 筛选后数据的平均年龄
|
||||
|
||||
### 可视化图表
|
||||
1. **年龄分布直方图**: 按心血管疾病状态区分的年龄分布
|
||||
2. **BMI类别堆叠柱状图**: BMI类别对心血管疾病的影响
|
||||
3. **血压与年龄关系图**: 收缩压和舒张压与年龄的关系
|
||||
|
||||
### 数据导出
|
||||
可以从侧边栏下载筛选后的数据为CSV文件。
|
||||
|
||||
## 数据处理流程
|
||||
|
||||
### 1. 数据加载
|
||||
- 从Excel文件加载数据
|
||||
- 检查文件是否存在
|
||||
- 显示原始数据信息
|
||||
|
||||
### 2. 特征工程
|
||||
- 将age(天)转换为age_years(年)
|
||||
- 计算BMI: weight / (height/100)^2
|
||||
- 创建BMI类别: 偏瘦、正常、超重、肥胖
|
||||
|
||||
### 3. 数据清洗
|
||||
- 删除舒张压≥收缩压的记录
|
||||
- 删除血压极端异常值:
|
||||
- 收缩压: 90-250 mmHg
|
||||
- 舒张压: 60-150 mmHg
|
||||
|
||||
### 4. 类别转换
|
||||
- 胆固醇水平: 1=正常, 2=高于正常, 3=显著高于正常
|
||||
- 血糖水平: 1=正常, 2=高于正常, 3=显著高于正常
|
||||
|
||||
## 故障排除
|
||||
|
||||
### 常见问题
|
||||
1. **数据文件未找到**: 应用程序将使用示例数据进行演示
|
||||
2. **缺少必需列**: 应用程序会跳过相关处理步骤并显示警告
|
||||
3. **依赖包缺失**: 确保已安装requirements.txt中的所有包
|
||||
|
||||
### 日志查看
|
||||
Streamlit应用程序运行时会在终端显示日志信息,包括:
|
||||
- 数据加载状态
|
||||
- 数据处理步骤
|
||||
- 错误和警告信息
|
||||
|
||||
## 开发说明
|
||||
文件路径已硬编码为:`F:\Project\PythonProject\zion_agent\data\心血管疾病.xlsx`
|
||||
- 如需更改数据路径,请修改`cardio_dashboard.py`中的`DATA_PATH`常量
|
||||
- 所有数据处理函数都使用了`@st.cache_data`装饰器以提高性能
|
||||
Binary file not shown.
774
aicodes/module1_dashboard/cardio_dashboard.py
Normal file
774
aicodes/module1_dashboard/cardio_dashboard.py
Normal file
@@ -0,0 +1,774 @@
|
||||
"""
|
||||
CardioAI - 心血管疾病数据可视化看板
|
||||
Streamlit应用程序,用于对心血管疾病数据进行清洗、特征工程和交互式可视化
|
||||
|
||||
数据路径: F:\\Project\\PythonProject\\zion_agent\\data\\心血管疾病.xlsx
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# 设置页面配置
|
||||
st.set_page_config(
|
||||
page_title="CardioAI - 心血管疾病智能分析",
|
||||
page_icon="❤️",
|
||||
layout="wide",
|
||||
initial_sidebar_state="expanded"
|
||||
)
|
||||
|
||||
def standardize_column_names(df):
|
||||
"""
|
||||
标准化数据框列名
|
||||
|
||||
Args:
|
||||
df: 原始数据框
|
||||
|
||||
Returns:
|
||||
标准化列名后的数据框,列名映射字典
|
||||
"""
|
||||
if df is None or len(df) == 0:
|
||||
return df, {}
|
||||
|
||||
df_std = df.copy()
|
||||
column_mapping = {}
|
||||
|
||||
# 常见列名映射(中文到英文)
|
||||
common_mappings = {
|
||||
# 中文列名可能的各种写法
|
||||
'年龄': 'age', '年龄(天)': 'age', '年龄(天)': 'age', 'age': 'age',
|
||||
'性别': 'gender', '性别(1男2女)': 'gender', '性别(1男2女)': 'gender', 'gender': 'gender',
|
||||
'身高': 'height', '身高(cm)': 'height', '身高(cm)': 'height', 'height': 'height',
|
||||
'体重': 'weight', '体重(kg)': 'weight', '体重(kg)': 'weight', 'weight': 'weight',
|
||||
'收缩压': 'ap_hi', '收缩压(mmHg)': 'ap_hi', '收缩压(mmHg)': 'ap_hi', 'ap_hi': 'ap_hi', 'aphi': 'ap_hi',
|
||||
'舒张压': 'ap_lo', '舒张压(mmHg)': 'ap_lo', '舒张压(mmHg)': 'ap_lo', 'ap_lo': 'ap_lo', 'aplo': 'ap_lo',
|
||||
'胆固醇': 'cholesterol', '胆固醇水平': 'cholesterol', 'cholesterol': 'cholesterol',
|
||||
'血糖': 'gluc', '血糖水平': 'gluc', 'gluc': 'gluc',
|
||||
'心血管疾病': 'cardio', '是否患心血管疾病': 'cardio', '心血管疾病(0无1有)': 'cardio', 'cardio': 'cardio',
|
||||
# 英文列名可能的大小写变化
|
||||
'AGE': 'age', 'Age': 'age',
|
||||
'GENDER': 'gender', 'Gender': 'gender',
|
||||
'HEIGHT': 'height', 'Height': 'height',
|
||||
'WEIGHT': 'weight', 'Weight': 'weight',
|
||||
'AP_HI': 'ap_hi', 'Ap_hi': 'ap_hi', 'APHI': 'ap_hi',
|
||||
'AP_LO': 'ap_lo', 'Ap_lo': 'ap_lo', 'APLO': 'ap_lo',
|
||||
'CHOLESTEROL': 'cholesterol', 'Cholesterol': 'cholesterol',
|
||||
'GLUC': 'gluc', 'Gluc': 'gluc',
|
||||
'CARDIO': 'cardio', 'Cardio': 'cardio'
|
||||
}
|
||||
|
||||
# 应用列名映射
|
||||
new_columns = []
|
||||
for col in df_std.columns:
|
||||
col_str = str(col).strip()
|
||||
original_col = col_str # 保存原始列名
|
||||
|
||||
# 检查是否在映射表中
|
||||
if col_str in common_mappings:
|
||||
new_col = common_mappings[col_str]
|
||||
column_mapping[original_col] = new_col
|
||||
new_columns.append(new_col)
|
||||
else:
|
||||
# 尝试小写化
|
||||
col_lower = col_str.lower()
|
||||
if col_lower in common_mappings:
|
||||
# 小写形式在映射表中
|
||||
new_col = common_mappings[col_lower]
|
||||
column_mapping[original_col] = new_col
|
||||
new_columns.append(new_col)
|
||||
elif col_lower in common_mappings.values():
|
||||
# 已经是标准列名的小写形式
|
||||
column_mapping[original_col] = col_lower
|
||||
new_columns.append(col_lower)
|
||||
else:
|
||||
# 保留原列名,但标准化格式
|
||||
# 去除特殊字符,替换为下划线
|
||||
new_col = col_lower
|
||||
for char in [' ', '(', ')', '(', ')', '【', '】', '[', ']', '{', '}', '-', '–', '—']:
|
||||
new_col = new_col.replace(char, '_')
|
||||
# 去除重复下划线
|
||||
while '__' in new_col:
|
||||
new_col = new_col.replace('__', '_')
|
||||
# 去除首尾下划线
|
||||
new_col = new_col.strip('_')
|
||||
|
||||
column_mapping[original_col] = new_col
|
||||
new_columns.append(new_col)
|
||||
|
||||
df_std.columns = new_columns
|
||||
return df_std, column_mapping
|
||||
|
||||
# 定义常量
|
||||
DATA_PATH = r"F:\Project\PythonProject\zion_agent\aicodes\data\心血管疾病.xlsx"
|
||||
|
||||
# 转换字典
|
||||
CHOLESTEROL_MAP = {
|
||||
1: "正常",
|
||||
2: "高于正常",
|
||||
3: "显著高于正常"
|
||||
}
|
||||
|
||||
GLUC_MAP = {
|
||||
1: "正常",
|
||||
2: "高于正常",
|
||||
3: "显著高于正常"
|
||||
}
|
||||
|
||||
BMI_CATEGORIES = {
|
||||
"underweight": "偏瘦 (<18.5)",
|
||||
"normal": "正常 (18.5-24.9)",
|
||||
"overweight": "超重 (25-29.9)",
|
||||
"obese": "肥胖 (≥30)"
|
||||
}
|
||||
|
||||
@st.cache_data(ttl=3600)
|
||||
def load_and_clean_data(data_path):
|
||||
"""
|
||||
加载和清洗心血管疾病数据
|
||||
|
||||
Args:
|
||||
data_path: Excel数据文件路径
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: 清洗后的数据框,如果文件不存在则返回None
|
||||
"""
|
||||
try:
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(data_path):
|
||||
st.error(f"数据文件不存在: {data_path}")
|
||||
st.info("请将 '心血管疾病.xlsx' 文件放置在: F:\\Project\\PythonProject\\zion_agent\\data\\")
|
||||
return None
|
||||
|
||||
# 加载数据 - 尝试不同的列名可能性
|
||||
df = pd.read_excel(data_path, engine='openpyxl')
|
||||
|
||||
# 记录原始数据信息
|
||||
original_count = len(df)
|
||||
st.success(f"成功加载数据: {original_count:,} 条记录")
|
||||
|
||||
# 显示列名映射(帮助识别列名)
|
||||
st.info(f"检测到 {len(df.columns)} 个数据列")
|
||||
|
||||
# 保存原始列名(标准化前)
|
||||
original_columns = list(df.columns)
|
||||
|
||||
# 标准化列名
|
||||
df, column_mapping = standardize_column_names(df)
|
||||
|
||||
# 显示原始和清洗后的列名对照
|
||||
with st.expander("🔍 详细数据信息"):
|
||||
st.write(f"**数据形状**: {df.shape}")
|
||||
st.write("**原始列名**:", original_columns)
|
||||
st.write("**标准化后列名**:", list(df.columns))
|
||||
st.write("**列名映射**:", column_mapping)
|
||||
|
||||
# 检查关键列是否存在
|
||||
critical_columns = ['age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo', 'cholesterol', 'gluc', 'cardio']
|
||||
missing_columns = []
|
||||
for col in critical_columns:
|
||||
if col not in df.columns:
|
||||
missing_columns.append(col)
|
||||
|
||||
if missing_columns:
|
||||
st.warning(f"缺失关键列: {missing_columns}")
|
||||
st.info("尝试在数据中查找类似列名...")
|
||||
# 尝试查找相似的列名
|
||||
for missing_col in missing_columns:
|
||||
possible_matches = [c for c in df.columns if missing_col in str(c).lower()]
|
||||
if possible_matches:
|
||||
st.write(f" '{missing_col}' 可能对应: {possible_matches}")
|
||||
|
||||
# 显示数据类型和前几行数据
|
||||
st.write("**数据类型**:")
|
||||
st.write(df.dtypes)
|
||||
|
||||
st.write("**前5行数据**:")
|
||||
st.dataframe(df.head())
|
||||
|
||||
# 显示基本统计信息
|
||||
st.write("**基本统计信息**:")
|
||||
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
||||
if len(numeric_cols) > 0:
|
||||
st.dataframe(df[numeric_cols].describe().round(2))
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"加载数据时出错: {str(e)}")
|
||||
st.exception(e) # 显示完整异常信息
|
||||
return None
|
||||
|
||||
@st.cache_data(ttl=3600)
|
||||
def process_features(df, strict_cleaning=True):
|
||||
"""
|
||||
执行特征工程和数据清洗
|
||||
|
||||
Args:
|
||||
df: 原始数据框
|
||||
strict_cleaning: 是否执行严格的异常值清洗
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: 处理后的数据框
|
||||
"""
|
||||
if df is None or len(df) == 0:
|
||||
return pd.DataFrame()
|
||||
|
||||
# 创建数据副本
|
||||
df_processed = df.copy()
|
||||
original_count = len(df_processed)
|
||||
step_counts = {"原始数据": original_count}
|
||||
|
||||
st.info(f"开始数据处理,原始数据量: {original_count:,} 条记录")
|
||||
|
||||
# 检查缺失值
|
||||
missing_values = df_processed.isnull().sum()
|
||||
total_missing = missing_values.sum()
|
||||
if total_missing > 0:
|
||||
st.warning(f"发现缺失值: 总计 {total_missing:,} 个缺失值")
|
||||
with st.expander("查看缺失值详情"):
|
||||
missing_percentage = (missing_values / len(df_processed) * 100).round(2)
|
||||
missing_df = pd.DataFrame({
|
||||
'缺失数量': missing_values,
|
||||
'缺失百分比': missing_percentage
|
||||
})
|
||||
st.dataframe(missing_df[missing_df['缺失数量'] > 0])
|
||||
else:
|
||||
st.success("没有缺失值")
|
||||
|
||||
# 检查重复记录
|
||||
duplicates = df_processed.duplicated().sum()
|
||||
if duplicates > 0:
|
||||
st.warning(f"发现重复记录: {duplicates:,} 条 ({duplicates/original_count*100:.1f}%)")
|
||||
# 可以选择删除重复记录,但这里仅标记
|
||||
df_processed['is_duplicate'] = df_processed.duplicated()
|
||||
else:
|
||||
st.success("没有重复记录")
|
||||
|
||||
# 1. 将age转换为年(自动检测单位)
|
||||
if 'age' in df_processed.columns:
|
||||
# 检查age列的值范围
|
||||
age_min = df_processed['age'].min()
|
||||
age_max = df_processed['age'].max()
|
||||
age_mean = df_processed['age'].mean()
|
||||
|
||||
st.write(f"age列统计: 最小值={age_min:.1f}, 最大值={age_max:.1f}, 平均值={age_mean:.1f}")
|
||||
|
||||
# 自动检测单位:如果值看起来像年(0-150),则直接使用;如果看起来像天(>100),则转换
|
||||
if age_max > 150 and age_min > 0:
|
||||
# 看起来像天为单位
|
||||
st.info(f"检测到年龄数据以天为单位(最大值={age_max:.0f}),转换为年")
|
||||
df_processed['age_years'] = (df_processed['age'] / 365.25).round().astype(int)
|
||||
age_years_min = df_processed['age_years'].min()
|
||||
age_years_max = df_processed['age_years'].max()
|
||||
st.write(f"转换后年龄范围: {age_years_min} 到 {age_years_max} 岁")
|
||||
elif age_max <= 150 and age_min >= 0:
|
||||
# 看起来已经是年为单位
|
||||
st.info(f"检测到年龄数据以年为单位(最大值={age_max:.0f}),直接使用")
|
||||
df_processed['age_years'] = df_processed['age'].round().astype(int)
|
||||
age_years_min = df_processed['age_years'].min()
|
||||
age_years_max = df_processed['age_years'].max()
|
||||
st.write(f"年龄范围: {age_years_min} 到 {age_years_max} 岁")
|
||||
else:
|
||||
# 异常范围,尝试按天处理
|
||||
st.warning(f"年龄数据范围异常({age_min:.0f} 到 {age_max:.0f}),尝试按天处理")
|
||||
df_processed['age_years'] = (df_processed['age'] / 365.25).round().astype(int)
|
||||
age_years_min = df_processed['age_years'].min()
|
||||
age_years_max = df_processed['age_years'].max()
|
||||
st.write(f"转换后年龄范围: {age_years_min} 到 {age_years_max} 岁")
|
||||
else:
|
||||
st.warning("数据中未找到 'age' 列")
|
||||
# 尝试查找可能的年龄列
|
||||
age_like_cols = [col for col in df_processed.columns if 'age' in str(col).lower()]
|
||||
if age_like_cols:
|
||||
st.info(f"找到可能的年龄列: {age_like_cols}")
|
||||
df_processed['age_years'] = 0
|
||||
|
||||
# 2. 计算BMI (体重kg / (身高cm/100)^2)
|
||||
if all(col in df_processed.columns for col in ['weight', 'height']):
|
||||
# 检查身高体重范围
|
||||
height_min = df_processed['height'].min()
|
||||
height_max = df_processed['height'].max()
|
||||
weight_min = df_processed['weight'].min()
|
||||
weight_max = df_processed['weight'].max()
|
||||
|
||||
st.write(f"身高范围: {height_min:.1f} 到 {height_max:.1f} cm")
|
||||
st.write(f"体重范围: {weight_min:.1f} 到 {weight_max:.1f} kg")
|
||||
|
||||
# 检查单位是否合理
|
||||
if height_max > 300:
|
||||
st.warning(f"身高最大值 {height_max:.1f} cm 可能不是厘米单位")
|
||||
|
||||
df_processed['bmi'] = df_processed['weight'] / ((df_processed['height'] / 100) ** 2)
|
||||
df_processed['bmi'] = df_processed['bmi'].round(2)
|
||||
|
||||
# 创建BMI类别
|
||||
conditions = [
|
||||
df_processed['bmi'] < 18.5,
|
||||
(df_processed['bmi'] >= 18.5) & (df_processed['bmi'] < 25),
|
||||
(df_processed['bmi'] >= 25) & (df_processed['bmi'] < 30),
|
||||
df_processed['bmi'] >= 30
|
||||
]
|
||||
choices = ['underweight', 'normal', 'overweight', 'obese']
|
||||
df_processed['bmi_category'] = np.select(conditions, choices, default='unknown')
|
||||
|
||||
# 统计BMI类别分布
|
||||
bmi_dist = df_processed['bmi_category'].value_counts()
|
||||
st.write("BMI类别分布:", bmi_dist.to_dict())
|
||||
else:
|
||||
st.warning("数据中未找到 'weight' 或 'height' 列,无法计算BMI")
|
||||
df_processed['bmi'] = 0
|
||||
df_processed['bmi_category'] = 'unknown'
|
||||
|
||||
# 3. 异常值处理 - 血压数据
|
||||
if all(col in df_processed.columns for col in ['ap_hi', 'ap_lo']):
|
||||
# 检查血压范围和单位
|
||||
ap_hi_min = df_processed['ap_hi'].min()
|
||||
ap_hi_max = df_processed['ap_hi'].max()
|
||||
ap_lo_min = df_processed['ap_lo'].min()
|
||||
ap_lo_max = df_processed['ap_lo'].max()
|
||||
|
||||
# 自动检测单位:如果值看起来像kPa(正常范围12-33),则转换为mmHg
|
||||
if ap_hi_max < 50 and ap_lo_max < 50:
|
||||
# 看起来像kPa单位(正常血压范围12-22 kPa)
|
||||
st.warning(f"检测到血压数据可能以kPa为单位(收缩压范围: {ap_hi_min:.1f} 到 {ap_hi_max:.1f})")
|
||||
st.info("正在将血压数据从kPa转换为mmHg(乘以7.5)")
|
||||
|
||||
# 转换数据
|
||||
df_processed['ap_hi'] = df_processed['ap_hi'] * 7.5
|
||||
df_processed['ap_lo'] = df_processed['ap_lo'] * 7.5
|
||||
|
||||
# 更新范围
|
||||
ap_hi_min = df_processed['ap_hi'].min()
|
||||
ap_hi_max = df_processed['ap_hi'].max()
|
||||
ap_lo_min = df_processed['ap_lo'].min()
|
||||
ap_lo_max = df_processed['ap_lo'].max()
|
||||
|
||||
st.write(f"转换后收缩压范围: {ap_hi_min:.0f} 到 {ap_hi_max:.0f} mmHg")
|
||||
st.write(f"转换后舒张压范围: {ap_lo_min:.0f} 到 {ap_lo_max:.0f} mmHg")
|
||||
else:
|
||||
# 看起来像mmHg单位
|
||||
st.write(f"收缩压范围: {ap_hi_min:.0f} 到 {ap_hi_max:.0f} mmHg")
|
||||
st.write(f"舒张压范围: {ap_lo_min:.0f} 到 {ap_lo_max:.0f} mmHg")
|
||||
|
||||
# 检查是否可能已经是转换后的值
|
||||
if ap_hi_max > 300 or ap_lo_max > 200:
|
||||
st.warning(f"血压值异常高(收缩压最大={ap_hi_max:.0f} mmHg),请检查数据单位")
|
||||
|
||||
# 统计异常血压记录
|
||||
diastolic_ge_systolic = (df_processed['ap_lo'] >= df_processed['ap_hi']).sum()
|
||||
systolic_out_of_range = ((df_processed['ap_hi'] < 90) | (df_processed['ap_hi'] > 250)).sum()
|
||||
diastolic_out_of_range = ((df_processed['ap_lo'] < 60) | (df_processed['ap_lo'] > 150)).sum()
|
||||
|
||||
st.write(f"舒张压≥收缩压的记录: {diastolic_ge_systolic:,} 条 ({diastolic_ge_systolic/original_count*100:.1f}%)")
|
||||
st.write(f"收缩压异常记录: {systolic_out_of_range:,} 条 ({systolic_out_of_range/original_count*100:.1f}%)")
|
||||
st.write(f"舒张压异常记录: {diastolic_out_of_range:,} 条 ({diastolic_out_of_range/original_count*100:.1f}%)")
|
||||
|
||||
if strict_cleaning:
|
||||
# 严格清洗:删除异常记录
|
||||
# 删除舒张压 >= 收缩压的记录
|
||||
initial_count = len(df_processed)
|
||||
df_processed = df_processed[df_processed['ap_lo'] < df_processed['ap_hi']]
|
||||
removed_count = initial_count - len(df_processed)
|
||||
if removed_count > 0:
|
||||
st.info(f"严格清洗: 删除了 {removed_count:,} 条舒张压≥收缩压的记录")
|
||||
step_counts["删除舒张压≥收缩压"] = len(df_processed)
|
||||
|
||||
# 删除血压极端异常值
|
||||
initial_count = len(df_processed)
|
||||
df_processed = df_processed[
|
||||
(df_processed['ap_hi'] >= 90) & (df_processed['ap_hi'] <= 250) &
|
||||
(df_processed['ap_lo'] >= 60) & (df_processed['ap_lo'] <= 150)
|
||||
]
|
||||
removed_count = initial_count - len(df_processed)
|
||||
if removed_count > 0:
|
||||
st.info(f"严格清洗: 删除了 {removed_count:,} 条血压异常值记录")
|
||||
step_counts["删除血压异常值"] = len(df_processed)
|
||||
else:
|
||||
# 宽松清洗:只标记不删除
|
||||
st.info("宽松清洗模式: 保留所有血压记录,仅标记异常")
|
||||
df_processed['bp_anomaly'] = np.where(
|
||||
(df_processed['ap_lo'] >= df_processed['ap_hi']) |
|
||||
(df_processed['ap_hi'] < 90) | (df_processed['ap_hi'] > 250) |
|
||||
(df_processed['ap_lo'] < 60) | (df_processed['ap_lo'] > 150),
|
||||
'异常', '正常'
|
||||
)
|
||||
anomaly_count = (df_processed['bp_anomaly'] == '异常').sum()
|
||||
st.write(f"血压异常标记: {anomaly_count:,} 条记录 ({anomaly_count/len(df_processed)*100:.1f}%)")
|
||||
else:
|
||||
st.warning("数据中未找到 'ap_hi' 或 'ap_lo' 列,跳过血压异常值处理")
|
||||
# 尝试查找可能的血压列
|
||||
bp_cols = [col for col in df_processed.columns if 'bp' in str(col).lower() or 'pressure' in str(col).lower()]
|
||||
if bp_cols:
|
||||
st.info(f"找到可能的血压列: {bp_cols}")
|
||||
|
||||
# 4. 类别转换
|
||||
if 'cholesterol' in df_processed.columns:
|
||||
# 检查胆固醇值范围
|
||||
cholesterol_values = df_processed['cholesterol'].unique()
|
||||
st.write(f"胆固醇唯一值: {sorted(cholesterol_values)}")
|
||||
df_processed['cholesterol_str'] = df_processed['cholesterol'].map(CHOLESTEROL_MAP)
|
||||
df_processed['cholesterol_str'] = df_processed['cholesterol_str'].fillna("未知")
|
||||
else:
|
||||
df_processed['cholesterol_str'] = "未知"
|
||||
|
||||
if 'gluc' in df_processed.columns:
|
||||
# 检查血糖值范围
|
||||
gluc_values = df_processed['gluc'].unique()
|
||||
st.write(f"血糖唯一值: {sorted(gluc_values)}")
|
||||
df_processed['gluc_str'] = df_processed['gluc'].map(GLUC_MAP)
|
||||
df_processed['gluc_str'] = df_processed['gluc_str'].fillna("未知")
|
||||
else:
|
||||
df_processed['gluc_str'] = "未知"
|
||||
|
||||
# 5. BMI类别描述转换
|
||||
df_processed['bmi_category_str'] = df_processed['bmi_category'].map(BMI_CATEGORIES)
|
||||
df_processed['bmi_category_str'] = df_processed['bmi_category_str'].fillna("未知")
|
||||
|
||||
# 最终统计
|
||||
final_count = len(df_processed)
|
||||
removed_total = original_count - final_count
|
||||
retention_rate = final_count / original_count * 100 if original_count > 0 else 0
|
||||
|
||||
st.success(f"数据处理完成:")
|
||||
st.write(f"- 原始记录数: {original_count:,}")
|
||||
st.write(f"- 处理后记录数: {final_count:,}")
|
||||
st.write(f"- 删除记录数: {removed_total:,} ({removed_total/original_count*100:.1f}%)")
|
||||
st.write(f"- 数据保留率: {retention_rate:.1f}%")
|
||||
|
||||
# 显示各步骤计数
|
||||
with st.expander("查看详细处理步骤"):
|
||||
for step, count in step_counts.items():
|
||||
st.write(f"{step}: {count:,} 条记录")
|
||||
|
||||
return df_processed
|
||||
|
||||
def create_visualizations(df_filtered):
|
||||
"""
|
||||
创建数据可视化图表
|
||||
|
||||
Args:
|
||||
df_filtered: 筛选后的数据框
|
||||
"""
|
||||
if df_filtered.empty:
|
||||
st.warning("没有可用的数据用于可视化")
|
||||
return
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
# 年龄分布直方图(按cardio区分)
|
||||
if 'age_years' in df_filtered.columns and 'cardio' in df_filtered.columns:
|
||||
fig1 = px.histogram(
|
||||
df_filtered,
|
||||
x='age_years',
|
||||
color='cardio',
|
||||
nbins=30,
|
||||
title="年龄分布直方图(按心血管疾病状态)",
|
||||
labels={'age_years': '年龄(岁)', 'cardio': '心血管疾病', 'count': '人数'},
|
||||
color_discrete_map={0: 'blue', 1: 'red'},
|
||||
barmode='overlay',
|
||||
opacity=0.7
|
||||
)
|
||||
fig1.update_layout(
|
||||
xaxis_title="年龄(岁)",
|
||||
yaxis_title="人数",
|
||||
legend_title="心血管疾病",
|
||||
hovermode="x unified"
|
||||
)
|
||||
st.plotly_chart(fig1, use_container_width=True)
|
||||
else:
|
||||
st.info("缺少年龄或心血管疾病数据,无法生成年龄分布图")
|
||||
|
||||
with col2:
|
||||
# BMI类别对心血管疾病影响的堆叠柱状图
|
||||
if all(col in df_filtered.columns for col in ['bmi_category_str', 'cardio']):
|
||||
# 创建交叉表
|
||||
cross_tab = pd.crosstab(
|
||||
df_filtered['bmi_category_str'],
|
||||
df_filtered['cardio'],
|
||||
normalize='index'
|
||||
).round(4) * 100
|
||||
|
||||
# 重命名列
|
||||
cross_tab = cross_tab.rename(columns={0: '无疾病', 1: '有疾病'})
|
||||
|
||||
fig2 = px.bar(
|
||||
cross_tab,
|
||||
x=cross_tab.index,
|
||||
y=['无疾病', '有疾病'],
|
||||
title="BMI类别对心血管疾病影响的堆叠柱状图",
|
||||
labels={'value': '百分比 (%)', 'variable': '疾病状态', 'index': 'BMI类别'},
|
||||
barmode='stack'
|
||||
)
|
||||
fig2.update_layout(
|
||||
xaxis_title="BMI类别",
|
||||
yaxis_title="百分比 (%)",
|
||||
legend_title="疾病状态"
|
||||
)
|
||||
st.plotly_chart(fig2, use_container_width=True)
|
||||
else:
|
||||
st.info("缺少BMI类别或心血管疾病数据,无法生成BMI影响图")
|
||||
|
||||
# 额外图表:血压与年龄的关系
|
||||
st.subheader("血压与年龄的关系")
|
||||
if all(col in df_filtered.columns for col in ['age_years', 'ap_hi', 'ap_lo', 'cardio']):
|
||||
col3, col4 = st.columns(2)
|
||||
|
||||
with col3:
|
||||
fig3 = px.scatter(
|
||||
df_filtered,
|
||||
x='age_years',
|
||||
y='ap_hi',
|
||||
color='cardio',
|
||||
title="收缩压与年龄的关系",
|
||||
labels={'age_years': '年龄(岁)', 'ap_hi': '收缩压 (mmHg)', 'cardio': '心血管疾病'},
|
||||
color_discrete_map={0: 'blue', 1: 'red'},
|
||||
opacity=0.6
|
||||
)
|
||||
fig3.update_layout(
|
||||
xaxis_title="年龄(岁)",
|
||||
yaxis_title="收缩压 (mmHg)",
|
||||
legend_title="心血管疾病"
|
||||
)
|
||||
st.plotly_chart(fig3, use_container_width=True)
|
||||
|
||||
with col4:
|
||||
fig4 = px.scatter(
|
||||
df_filtered,
|
||||
x='age_years',
|
||||
y='ap_lo',
|
||||
color='cardio',
|
||||
title="舒张压与年龄的关系",
|
||||
labels={'age_years': '年龄(岁)', 'ap_lo': '舒张压 (mmHg)', 'cardio': '心血管疾病'},
|
||||
color_discrete_map={0: 'blue', 1: 'red'},
|
||||
opacity=0.6
|
||||
)
|
||||
fig4.update_layout(
|
||||
xaxis_title="年龄(岁)",
|
||||
yaxis_title="舒张压 (mmHg)",
|
||||
legend_title="心血管疾病"
|
||||
)
|
||||
st.plotly_chart(fig4, use_container_width=True)
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
# 页面标题
|
||||
st.title("❤️ CardioAI - 心血管疾病智能分析系统")
|
||||
st.markdown("---")
|
||||
|
||||
# 加载数据
|
||||
with st.spinner("正在加载数据..."):
|
||||
df_raw = load_and_clean_data(DATA_PATH)
|
||||
|
||||
if df_raw is None:
|
||||
# 显示示例数据用于演示
|
||||
st.warning("使用示例数据进行演示(实际运行时请放置数据文件)")
|
||||
|
||||
# 创建示例数据
|
||||
np.random.seed(42)
|
||||
n_samples = 1000
|
||||
example_data = {
|
||||
'age': np.random.randint(365*20, 365*80, n_samples), # 20-80岁
|
||||
'gender': np.random.choice([1, 2], n_samples),
|
||||
'height': np.random.normal(170, 10, n_samples).astype(int),
|
||||
'weight': np.random.normal(70, 15, n_samples).astype(int),
|
||||
'ap_hi': np.random.normal(120, 20, n_samples).astype(int),
|
||||
'ap_lo': np.random.normal(80, 15, n_samples).astype(int),
|
||||
'cholesterol': np.random.choice([1, 2, 3], n_samples, p=[0.7, 0.2, 0.1]),
|
||||
'gluc': np.random.choice([1, 2, 3], n_samples, p=[0.8, 0.15, 0.05]),
|
||||
'cardio': np.random.choice([0, 1], n_samples, p=[0.7, 0.3])
|
||||
}
|
||||
df_raw = pd.DataFrame(example_data)
|
||||
st.info("正在使用示例数据进行演示")
|
||||
|
||||
# 数据处理选项
|
||||
st.sidebar.header("⚙️ 数据处理选项")
|
||||
|
||||
# 清洗模式选择
|
||||
cleaning_mode = st.sidebar.radio(
|
||||
"选择数据清洗模式",
|
||||
options=["宽松模式(保留所有数据)", "严格模式(删除异常值)"],
|
||||
index=1, # 默认严格模式
|
||||
help="宽松模式:标记异常但不删除;严格模式:删除血压异常值"
|
||||
)
|
||||
strict_cleaning = cleaning_mode == "严格模式(删除异常值)"
|
||||
|
||||
if strict_cleaning:
|
||||
st.sidebar.info("严格模式:将删除血压异常记录")
|
||||
else:
|
||||
st.sidebar.info("宽松模式:保留所有数据,仅标记异常")
|
||||
|
||||
# 特征工程
|
||||
with st.spinner("正在进行特征工程和数据清洗..."):
|
||||
df_processed = process_features(df_raw, strict_cleaning=strict_cleaning)
|
||||
|
||||
if df_processed.empty:
|
||||
st.error("数据处理后没有可用的记录")
|
||||
return
|
||||
|
||||
# 侧边栏筛选器
|
||||
st.sidebar.header("🔍 数据筛选器")
|
||||
|
||||
# 年龄范围筛选
|
||||
if 'age_years' in df_processed.columns:
|
||||
age_min = int(df_processed['age_years'].min())
|
||||
age_max = int(df_processed['age_years'].max())
|
||||
age_range = st.sidebar.slider(
|
||||
"选择年龄范围",
|
||||
min_value=age_min,
|
||||
max_value=age_max,
|
||||
value=(age_min, age_max),
|
||||
help="筛选指定年龄范围的记录"
|
||||
)
|
||||
else:
|
||||
age_range = (0, 100)
|
||||
|
||||
# 性别筛选
|
||||
if 'gender' in df_processed.columns:
|
||||
gender_options = sorted(df_processed['gender'].unique())
|
||||
selected_genders = st.sidebar.multiselect(
|
||||
"选择性别",
|
||||
options=gender_options,
|
||||
default=gender_options,
|
||||
format_func=lambda x: "男性" if x == 1 else "女性" if x == 2 else f"性别{x}",
|
||||
help="筛选指定性别的记录"
|
||||
)
|
||||
else:
|
||||
selected_genders = []
|
||||
|
||||
# 心血管疾病状态筛选
|
||||
if 'cardio' in df_processed.columns:
|
||||
cardio_options = sorted(df_processed['cardio'].unique())
|
||||
selected_cardio = st.sidebar.multiselect(
|
||||
"选择心血管疾病状态",
|
||||
options=cardio_options,
|
||||
default=cardio_options,
|
||||
format_func=lambda x: "无疾病" if x == 0 else "有疾病" if x == 1 else f"状态{x}",
|
||||
help="筛选指定疾病状态的记录"
|
||||
)
|
||||
else:
|
||||
selected_cardio = []
|
||||
|
||||
# 应用筛选
|
||||
df_filtered = df_processed.copy()
|
||||
|
||||
# 年龄筛选
|
||||
if 'age_years' in df_filtered.columns:
|
||||
df_filtered = df_filtered[
|
||||
(df_filtered['age_years'] >= age_range[0]) &
|
||||
(df_filtered['age_years'] <= age_range[1])
|
||||
]
|
||||
|
||||
# 性别筛选
|
||||
if selected_genders and 'gender' in df_filtered.columns:
|
||||
df_filtered = df_filtered[df_filtered['gender'].isin(selected_genders)]
|
||||
|
||||
# 疾病状态筛选
|
||||
if selected_cardio and 'cardio' in df_filtered.columns:
|
||||
df_filtered = df_filtered[df_filtered['cardio'].isin(selected_cardio)]
|
||||
|
||||
# 主页展示
|
||||
st.header("📊 数据概览")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
label="总记录数",
|
||||
value=f"{len(df_filtered):,}",
|
||||
help="筛选后的数据记录总数"
|
||||
)
|
||||
|
||||
with col2:
|
||||
if 'cardio' in df_filtered.columns:
|
||||
cardio_rate = df_filtered['cardio'].mean() * 100
|
||||
st.metric(
|
||||
label="心血管疾病风险率",
|
||||
value=f"{cardio_rate:.1f}%",
|
||||
help="心血管疾病患者的比例"
|
||||
)
|
||||
else:
|
||||
st.metric("心血管疾病风险率", "N/A")
|
||||
|
||||
with col3:
|
||||
if 'age_years' in df_filtered.columns:
|
||||
avg_age = df_filtered['age_years'].mean()
|
||||
st.metric(
|
||||
label="平均年龄",
|
||||
value=f"{avg_age:.1f} 岁",
|
||||
help="筛选后数据的平均年龄"
|
||||
)
|
||||
else:
|
||||
st.metric("平均年龄", "N/A")
|
||||
|
||||
# 显示筛选后的数据摘要
|
||||
with st.expander("📋 查看筛选后数据摘要"):
|
||||
st.write(f"数据形状: {df_filtered.shape}")
|
||||
|
||||
# 显示关键统计信息
|
||||
if not df_filtered.empty:
|
||||
st.write("### 关键统计信息")
|
||||
|
||||
stats_col1, stats_col2, stats_col3 = st.columns(3)
|
||||
|
||||
with stats_col1:
|
||||
if 'age_years' in df_filtered.columns:
|
||||
st.write("**年龄统计**")
|
||||
st.write(f"- 最小值: {df_filtered['age_years'].min()} 岁")
|
||||
st.write(f"- 最大值: {df_filtered['age_years'].max()} 岁")
|
||||
st.write(f"- 平均值: {df_filtered['age_years'].mean():.1f} 岁")
|
||||
|
||||
with stats_col2:
|
||||
if 'bmi' in df_filtered.columns:
|
||||
st.write("**BMI统计**")
|
||||
st.write(f"- 最小值: {df_filtered['bmi'].min():.1f}")
|
||||
st.write(f"- 最大值: {df_filtered['bmi'].max():.1f}")
|
||||
st.write(f"- 平均值: {df_filtered['bmi'].mean():.1f}")
|
||||
|
||||
with stats_col3:
|
||||
if all(col in df_filtered.columns for col in ['ap_hi', 'ap_lo']):
|
||||
st.write("**血压统计**")
|
||||
st.write(f"- 平均收缩压: {df_filtered['ap_hi'].mean():.1f} mmHg")
|
||||
st.write(f"- 平均舒张压: {df_filtered['ap_lo'].mean():.1f} mmHg")
|
||||
|
||||
# 显示前10行数据
|
||||
st.write("### 前10行数据")
|
||||
st.dataframe(df_filtered.head(10))
|
||||
|
||||
# 数据可视化
|
||||
st.header("📈 数据可视化")
|
||||
create_visualizations(df_filtered)
|
||||
|
||||
# 数据下载
|
||||
st.sidebar.markdown("---")
|
||||
st.sidebar.header("📥 数据导出")
|
||||
|
||||
if not df_filtered.empty:
|
||||
# 转换为CSV
|
||||
csv_data = df_filtered.to_csv(index=False).encode('utf-8')
|
||||
|
||||
st.sidebar.download_button(
|
||||
label="下载筛选后数据 (CSV)",
|
||||
data=csv_data,
|
||||
file_name="cardio_filtered_data.csv",
|
||||
mime="text/csv",
|
||||
help="下载当前筛选后的数据"
|
||||
)
|
||||
|
||||
# 页脚
|
||||
st.markdown("---")
|
||||
st.markdown(
|
||||
"""
|
||||
<div style='text-align: center; color: gray;'>
|
||||
<p>CardioAI - 心血管疾病智能辅助系统 | 版本 1.0 | 使用Streamlit构建</p>
|
||||
</div>
|
||||
""",
|
||||
unsafe_allow_html=True
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
55
aicodes/module1_dashboard/run_dashboard.bat
Normal file
55
aicodes/module1_dashboard/run_dashboard.bat
Normal file
@@ -0,0 +1,55 @@
|
||||
@echo off
|
||||
REM CardioAI Streamlit 仪表板启动脚本
|
||||
echo ================================================
|
||||
echo CardioAI - 心血管疾病数据可视化看板
|
||||
echo ================================================
|
||||
|
||||
REM 检查是否在conda环境中
|
||||
where conda >nul 2>nul
|
||||
if %ERRORLEVEL% equ 0 (
|
||||
echo 检查Conda环境...
|
||||
conda info --envs | findstr cardioenv >nul
|
||||
if %ERRORLEVEL% equ 0 (
|
||||
echo 激活cardioenv环境...
|
||||
call conda activate cardioenv
|
||||
) else (
|
||||
echo 警告: 未找到cardioenv环境
|
||||
echo 请先创建环境: conda create -n cardioenv python=3.10
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
|
||||
REM 检查Python解释器
|
||||
echo 检查Python解释器...
|
||||
python --version
|
||||
if %ERRORLEVEL% neq 0 (
|
||||
echo 错误: 未找到Python
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM 检查依赖包
|
||||
echo 检查依赖包...
|
||||
python -c "import streamlit, pandas, plotly, numpy, openpyxl" 2>nul
|
||||
if %ERRORLEVEL% neq 0 (
|
||||
echo 错误: 缺少必要的依赖包
|
||||
echo 请运行: pip install -r requirements.txt
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM 启动Streamlit应用程序
|
||||
echo 启动Streamlit应用程序...
|
||||
echo 应用程序将在浏览器中打开...
|
||||
echo 按Ctrl+C停止服务器
|
||||
echo.
|
||||
|
||||
streamlit run cardio_dashboard.py
|
||||
|
||||
if %ERRORLEVEL% neq 0 (
|
||||
echo.
|
||||
echo 启动失败,请检查错误信息
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
55
aicodes/module1_dashboard/run_dashboard.sh
Normal file
55
aicodes/module1_dashboard/run_dashboard.sh
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/bin/bash
|
||||
|
||||
# CardioAI Streamlit 仪表板启动脚本
|
||||
|
||||
echo "================================================"
|
||||
echo "CardioAI - 心血管疾病数据可视化看板"
|
||||
echo "================================================"
|
||||
|
||||
# 检查是否在conda环境中
|
||||
if command -v conda &> /dev/null; then
|
||||
echo "检查Conda环境..."
|
||||
if conda info --envs | grep -q "cardioenv"; then
|
||||
echo "激活cardioenv环境..."
|
||||
conda activate cardioenv
|
||||
else
|
||||
echo "警告: 未找到cardioenv环境"
|
||||
echo "请先创建环境: conda create -n cardioenv python=3.10"
|
||||
read -p "按Enter键继续..."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# 检查Python解释器
|
||||
echo "检查Python解释器..."
|
||||
python --version
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "错误: 未找到Python"
|
||||
read -p "按Enter键继续..."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 检查依赖包
|
||||
echo "检查依赖包..."
|
||||
python -c "import streamlit, pandas, plotly, numpy, openpyxl" 2>/dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "错误: 缺少必要的依赖包"
|
||||
echo "请运行: pip install -r requirements.txt"
|
||||
read -p "按Enter键继续..."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 启动Streamlit应用程序
|
||||
echo "启动Streamlit应用程序..."
|
||||
echo "应用程序将在浏览器中打开..."
|
||||
echo "按Ctrl+C停止服务器"
|
||||
echo ""
|
||||
|
||||
streamlit run cardio_dashboard.py
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo ""
|
||||
echo "启动失败,请检查错误信息"
|
||||
read -p "按Enter键继续..."
|
||||
exit 1
|
||||
fi
|
||||
68
aicodes/module1_dashboard/test_import.py
Normal file
68
aicodes/module1_dashboard/test_import.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
测试脚本 - 验证cardio_dashboard.py的导入和基本功能
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 添加当前目录到Python路径
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
try:
|
||||
# 测试导入必要的库
|
||||
print("测试导入必要的库...")
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import streamlit as st
|
||||
import plotly.express as px
|
||||
print("[OK] 所有必要的库导入成功")
|
||||
|
||||
# 测试导入应用程序模块
|
||||
print("\n测试导入cardio_dashboard模块...")
|
||||
# 由于cardio_dashboard.py使用了Streamlit命令,我们需要模拟环境
|
||||
# 这里只测试语法和导入
|
||||
|
||||
# 读取文件内容检查基本语法
|
||||
with open('cardio_dashboard.py', 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 检查关键函数是否存在
|
||||
required_functions = ['load_and_clean_data', 'process_features', 'create_visualizations', 'main']
|
||||
missing_functions = []
|
||||
|
||||
for func in required_functions:
|
||||
if f'def {func}' not in content:
|
||||
missing_functions.append(func)
|
||||
|
||||
if missing_functions:
|
||||
print(f"✗ 缺少函数: {missing_functions}")
|
||||
else:
|
||||
print("[OK] 所有必需函数都存在")
|
||||
|
||||
# 检查装饰器
|
||||
if '@st.cache_data' in content:
|
||||
print("[OK] 使用了@st.cache_data装饰器")
|
||||
else:
|
||||
print("[ERROR] 未找到@st.cache_data装饰器")
|
||||
|
||||
# 检查数据路径
|
||||
if r'F:\\Project\\PythonProject\\zion_agent\\data\\心血管疾病.xlsx' in content:
|
||||
print("[OK] 数据路径正确设置")
|
||||
else:
|
||||
print("[ERROR] 数据路径未正确设置")
|
||||
|
||||
print("\n基本语法检查完成!")
|
||||
|
||||
except ImportError as e:
|
||||
print(f"[ERROR] 导入错误: {e}")
|
||||
print("请确保已安装requirements.txt中的所有依赖包")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"[ERROR] 错误: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("测试完成! 应用程序可以通过以下命令运行:")
|
||||
print("streamlit run cardio_dashboard.py")
|
||||
print("="*50)
|
||||
20
aicodes/requirements.txt
Normal file
20
aicodes/requirements.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
# 核心数据科学库
|
||||
pandas>=2.0.0
|
||||
openpyxl>=3.1.0
|
||||
numpy>=1.24.0
|
||||
scikit-learn>=1.3.0
|
||||
xgboost>=1.7.0
|
||||
joblib>=1.3.0
|
||||
|
||||
# 可视化与Web框架
|
||||
streamlit>=1.28.0
|
||||
plotly>=5.18.0
|
||||
Flask>=2.3.0
|
||||
|
||||
# 环境配置与API
|
||||
python-dotenv>=1.0.0
|
||||
langchain-openai>=0.0.5
|
||||
dashscope>=1.14.0
|
||||
requests>=2.31.0
|
||||
|
||||
Reference in New Issue
Block a user