feat: 添加 Streamlit 心血管疾病数据可视化仪表板
- 实现数据加载和清洗函数 (带 @st.cache_data 装饰器) - 年龄转换: 天 -> 年 - BMI 计算和分类 - 异常值处理: 血压范围过滤 - 类别转换: cholesterol, gluc, gender, cardio - Streamlit 交互界面: 侧边栏筛选器 - 可视化图表: 年龄分布直方图、BMI与心血管疾病堆叠柱状图 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
297
ai_code/aicodes/module1_dashboard/cardio_dashboard.py
Normal file
297
ai_code/aicodes/module1_dashboard/cardio_dashboard.py
Normal file
@@ -0,0 +1,297 @@
|
||||
"""
|
||||
CardioAI 心血管疾病数据可视化仪表板
|
||||
使用 Streamlit 构建交互式数据分析和可视化应用
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import plotly.express as px
|
||||
|
||||
|
||||
# ============================================
|
||||
# 数据加载函数 (带缓存)
|
||||
# ============================================
|
||||
@st.cache_data
|
||||
def load_data(file_path: str) -> pd.DataFrame:
|
||||
"""
|
||||
加载 Excel 数据文件
|
||||
|
||||
Args:
|
||||
file_path: Excel 文件路径
|
||||
|
||||
Returns:
|
||||
加载的 DataFrame
|
||||
"""
|
||||
df = pd.read_excel(file_path)
|
||||
return df
|
||||
|
||||
|
||||
# ============================================
|
||||
# 数据清洗函数 (带缓存)
|
||||
# ============================================
|
||||
@st.cache_data
|
||||
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
数据清洗和特征工程
|
||||
|
||||
处理步骤:
|
||||
1. 将 age(天) 转换为年 (age_years)
|
||||
2. 计算 BMI
|
||||
3. 异常值处理
|
||||
4. 类别转换
|
||||
|
||||
Args:
|
||||
df: 原始 DataFrame
|
||||
|
||||
Returns:
|
||||
清洗后的 DataFrame
|
||||
"""
|
||||
# 创建副本避免修改原始数据
|
||||
df_clean = df.copy()
|
||||
|
||||
# 1. 年龄转换: 天 -> 年 (四舍五入)
|
||||
df_clean['age_years'] = (df_clean['age'] / 365).round().astype(int)
|
||||
|
||||
# 2. 计算 BMI: BMI = weight / (height/100)^2
|
||||
df_clean['bmi'] = df_clean['weight'] / ((df_clean['height'] / 100) ** 2)
|
||||
|
||||
# 3. 异常值处理
|
||||
# 3.1 删除舒张压 >= 收缩压的记录
|
||||
df_clean = df_clean[df_clean['ap_hi'] > df_clean['ap_lo']]
|
||||
|
||||
# 3.2 删除血压极端异常值
|
||||
# 收缩压应在 [90, 250] 范围
|
||||
df_clean = df_clean[(df_clean['ap_hi'] >= 90) & (df_clean['ap_hi'] <= 250)]
|
||||
# 舒张压应在 [60, 150] 范围
|
||||
df_clean = df_clean[(df_clean['ap_lo'] >= 60) & (df_clean['ap_lo'] <= 150)]
|
||||
|
||||
# 4. 类别转换
|
||||
# 4.1 cholesterol 数值转描述性字符串
|
||||
cholesterol_map = {
|
||||
1: '正常',
|
||||
2: '高于正常',
|
||||
3: '远高于正常'
|
||||
}
|
||||
df_clean['cholesterol_category'] = df_clean['cholesterol'].map(cholesterol_map)
|
||||
|
||||
# 4.2 gluc 数值转描述性字符串
|
||||
gluc_map = {
|
||||
1: '正常',
|
||||
2: '高于正常',
|
||||
3: '远高于正常'
|
||||
}
|
||||
df_clean['gluc_category'] = df_clean['gluc'].map(gluc_map)
|
||||
|
||||
# 4.3 BMI 分类
|
||||
def categorize_bmi(bmi):
|
||||
if bmi < 18.5:
|
||||
return '偏瘦'
|
||||
elif bmi < 24:
|
||||
return '正常'
|
||||
elif bmi < 28:
|
||||
return '超重'
|
||||
else:
|
||||
return '肥胖'
|
||||
|
||||
df_clean['bmi_category'] = df_clean['bmi'].apply(categorize_bmi)
|
||||
|
||||
# 4.4 gender 转描述性字符串
|
||||
gender_map = {1: '女性', 2: '男性'}
|
||||
df_clean['gender_category'] = df_clean['gender'].map(gender_map)
|
||||
|
||||
# 4.5 cardio 转描述性字符串
|
||||
cardio_map = {0: '无心血管疾病', 1: '有心血管疾病'}
|
||||
df_clean['cardio_category'] = df_clean['cardio'].map(cardio_map)
|
||||
|
||||
return df_clean
|
||||
|
||||
|
||||
# ============================================
|
||||
# Streamlit 页面配置
|
||||
# ============================================
|
||||
st.set_page_config(
|
||||
page_title="CardioAI 心血管疾病分析",
|
||||
page_icon="❤️",
|
||||
layout="wide"
|
||||
)
|
||||
|
||||
|
||||
# ============================================
|
||||
# 主程序
|
||||
# ============================================
|
||||
def main():
|
||||
"""主程序入口"""
|
||||
|
||||
# 页面标题
|
||||
st.title("❤️ CardioAI 心血管疾病数据分析")
|
||||
st.markdown("---")
|
||||
|
||||
# 数据路径
|
||||
DATA_PATH = "C:/Users/SAM/Desktop/sam_test/ai_code/aicodes/data/心血管疾病.xlsx"
|
||||
|
||||
# 加载数据
|
||||
try:
|
||||
df = load_data(DATA_PATH)
|
||||
st.success(f"✅ 成功加载数据,共 {len(df)} 条记录")
|
||||
except Exception as e:
|
||||
st.error(f"❌ 数据加载失败: {e}")
|
||||
return
|
||||
|
||||
# 数据清洗和特征工程
|
||||
df_clean = clean_data(df)
|
||||
st.info(f"📊 数据清洗后剩余 {len(df_clean)} 条记录 (剔除了异常值)")
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# ============================================
|
||||
# 侧边栏 - 筛选器
|
||||
# ============================================
|
||||
st.sidebar.header("🔍 筛选条件")
|
||||
|
||||
# 年龄范围滑块
|
||||
age_min = int(df_clean['age_years'].min())
|
||||
age_max = int(df_clean['age_years'].max())
|
||||
age_range = st.sidebar.slider(
|
||||
"年龄范围 (岁)",
|
||||
min_value=age_min,
|
||||
max_value=age_max,
|
||||
value=(age_min, age_max)
|
||||
)
|
||||
|
||||
# 性别多选框
|
||||
gender_options = df_clean['gender_category'].unique().tolist()
|
||||
selected_genders = st.sidebar.multiselect(
|
||||
"性别",
|
||||
options=gender_options,
|
||||
default=gender_options
|
||||
)
|
||||
|
||||
# 心血管疾病多选框
|
||||
cardio_options = df_clean['cardio_category'].unique().tolist()
|
||||
selected_cardios = st.sidebar.multiselect(
|
||||
"心血管疾病状态",
|
||||
options=cardio_options,
|
||||
default=cardio_options
|
||||
)
|
||||
|
||||
# ============================================
|
||||
# 数据筛选
|
||||
# ============================================
|
||||
# 应用筛选条件
|
||||
df_filtered = df_clean[
|
||||
(df_clean['age_years'] >= age_range[0]) &
|
||||
(df_clean['age_years'] <= age_range[1]) &
|
||||
(df_clean['gender_category'].isin(selected_genders)) &
|
||||
(df_clean['cardio_category'].isin(selected_cardios))
|
||||
]
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# ============================================
|
||||
# 主页展示 - 统计指标
|
||||
# ============================================
|
||||
st.header("📈 数据概览")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
label="筛选后记录数",
|
||||
value=f"{len(df_filtered):,}"
|
||||
)
|
||||
|
||||
with col2:
|
||||
# 计算心血管疾病风险率
|
||||
total_count = len(df_filtered)
|
||||
disease_count = len(df_filtered[df_filtered['cardio'] == 1])
|
||||
risk_rate = (disease_count / total_count * 100) if total_count > 0 else 0
|
||||
st.metric(
|
||||
label="心血管疾病风险率",
|
||||
value=f"{risk_rate:.1f}%",
|
||||
delta=f"{disease_count} / {total_count}"
|
||||
)
|
||||
|
||||
with col3:
|
||||
avg_age = df_filtered['age_years'].mean()
|
||||
st.metric(
|
||||
label="平均年龄",
|
||||
value=f"{avg_age:.1f} 岁"
|
||||
)
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# ============================================
|
||||
# 图表展示
|
||||
# ============================================
|
||||
st.header("📊 可视化分析")
|
||||
|
||||
# Tab 切换不同图表
|
||||
tab1, tab2 = st.tabs(["年龄分布", "BMI与心血管疾病关系"])
|
||||
|
||||
with tab1:
|
||||
st.subheader("年龄分布直方图 (按心血管疾病状态区分)")
|
||||
|
||||
# 年龄分布直方图
|
||||
fig_age = px.histogram(
|
||||
df_filtered,
|
||||
x='age_years',
|
||||
color='cardio_category',
|
||||
nbins=20,
|
||||
title="年龄分布 (按心血管疾病状态)",
|
||||
labels={
|
||||
'age_years': '年龄 (岁)',
|
||||
'count': '人数',
|
||||
'cardio_category': '心血管疾病状态'
|
||||
},
|
||||
color_discrete_map={
|
||||
'无心血管疾病': '#2ecc71',
|
||||
'有心血管疾病': '#e74c3c'
|
||||
},
|
||||
barmode='overlay'
|
||||
)
|
||||
fig_age.update_layout(bargap=0.1)
|
||||
st.plotly_chart(fig_age, use_container_width=True)
|
||||
|
||||
with tab2:
|
||||
st.subheader("BMI分类对心血管疾病的影响")
|
||||
|
||||
# BMI 分类统计
|
||||
bmi_cardio = df_filtered.groupby(['bmi_category', 'cardio_category']).size().reset_index(name='count')
|
||||
|
||||
# 按 BMI 分类排序
|
||||
bmi_order = ['偏瘦', '正常', '超重', '肥胖']
|
||||
|
||||
# 堆叠柱状图
|
||||
fig_bmi = px.bar(
|
||||
bmi_cardio,
|
||||
x='bmi_category',
|
||||
y='count',
|
||||
color='cardio_category',
|
||||
title="BMI分类与心血管疾病关系",
|
||||
labels={
|
||||
'bmi_category': 'BMI分类',
|
||||
'count': '人数',
|
||||
'cardio_category': '心血管疾病状态'
|
||||
},
|
||||
color_discrete_map={
|
||||
'无心血管疾病': '#2ecc71',
|
||||
'有心血管疾病': '#e74c3c'
|
||||
},
|
||||
category_orders={'bmi_category': bmi_order}
|
||||
)
|
||||
fig_bmi.update_layout(bargap=0.1)
|
||||
st.plotly_chart(fig_bmi, use_container_width=True)
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# ============================================
|
||||
# 数据预览
|
||||
# ============================================
|
||||
with st.expander("📋 查看清洗后的数据"):
|
||||
st.dataframe(df_clean.head(100))
|
||||
st.caption(f"显示前 100 条记录,共 {len(df_clean)} 条")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user