Files
testhub/aiheart/cardio_dashboard.py
Alcc 498044d3a4 alcc-patch-1
实验分支
2026-03-16 15:26:15 +08:00

403 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
心血管疾病数据仪表板
Streamlit应用程序用于心血管疾病数据的清洗、特征工程和交互式可视化
终端启动程序命令
streamlit run D:\AI_Class\PyCharm\Work_Space\CardAI\module1_dashboard\cardio_dashboard.py
"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
# 设置页面配置
st.set_page_config(
page_title="心血管疾病数据分析仪表板",
page_icon="❤️",
layout="wide",
initial_sidebar_state="expanded"
)
# 常量定义 - 使用绝对路径
DATA_PATH = Path("D:/AI_Class/PyCharm/Work_Space/CardAI/data/心血管疾病.xlsx")
# BMI分类中国标准
BMI_CATEGORIES = {
"偏瘦": (0, 18.5),
"正常": (18.5, 24),
"超重": (24, 28),
"肥胖": (28, float('inf'))
}
CHOLESTEROL_MAP = {1: "正常", 2: "高于正常", 3: "远高于正常"}
GLUC_MAP = {1: "正常", 2: "高于正常", 3: "远高于正常"}
GENDER_MAP = {1: "女性", 2: "男性"}
# 数据加载和清洗函数(使用缓存)
@st.cache_data
def load_and_clean_data(file_path):
"""
加载并清洗心血管疾病数据
Args:
file_path: Excel文件路径
Returns:
pandas.DataFrame: 清洗后的数据框
"""
try:
# 加载数据
df = pd.read_excel(file_path)
# 1. 特征工程
# 将年龄从天数转换为年(四舍五入)
df['age_years'] = round(df['age'] / 365.25).astype(int)
# 计算BMI
df['BMI'] = df['weight'] / ((df['height'] / 100) ** 2)
# 2. 异常值处理
# 删除舒张压 ≥ 收缩压的记录
df = df[df['ap_lo'] < df['ap_hi']]
# 删除血压极端异常值(保留收缩压∈[90,250],舒张压∈[60,150]
df = df[(df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)]
df = df[(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)]
# 3. 类别转换
# 胆固醇水平转换
df['cholesterol_str'] = df['cholesterol'].map(CHOLESTEROL_MAP)
# 血糖水平转换
df['gluc_str'] = df['gluc'].map(GLUC_MAP)
# 性别转换
df['gender_str'] = df['gender'].map(GENDER_MAP)
# 心血管疾病状态转换
df['cardio_str'] = df['cardio'].map({0: "无心血管疾病", 1: "有心血管疾病"})
# 4. BMI分类中国标准
def categorize_bmi(bmi):
for category, (low, high) in BMI_CATEGORIES.items():
if low <= bmi < high:
return category
return "未知"
df['bmi_category'] = df['BMI'].apply(categorize_bmi)
# 重置索引
df = df.reset_index(drop=True)
return df
except FileNotFoundError:
st.error(f"数据文件未找到: {file_path}")
return pd.DataFrame()
except Exception as e:
st.error(f"数据加载失败: {str(e)}")
return pd.DataFrame()
@st.cache_data
def filter_data(df, age_range, gender_filter, cardio_filter):
"""
根据筛选条件过滤数据
Args:
df: 原始数据框
age_range: 年龄范围 [min, max]
gender_filter: 性别筛选列表
cardio_filter: 心血管疾病筛选列表
Returns:
pandas.DataFrame: 筛选后的数据框
"""
filtered_df = df.copy()
# 年龄筛选
filtered_df = filtered_df[
(filtered_df['age_years'] >= age_range[0]) &
(filtered_df['age_years'] <= age_range[1])
]
# 性别筛选
if "全部" not in gender_filter:
gender_values = [k for k, v in GENDER_MAP.items() if v in gender_filter]
filtered_df = filtered_df[filtered_df['gender'].isin(gender_values)]
# 心血管疾病筛选
if "全部" not in cardio_filter:
cardio_values = []
if "有心血管疾病" in cardio_filter:
cardio_values.append(1)
if "无心血管疾病" in cardio_filter:
cardio_values.append(0)
filtered_df = filtered_df[filtered_df['cardio'].isin(cardio_values)]
return filtered_df
def create_age_distribution_chart(df):
"""
创建年龄分布直方图
Args:
df: 数据框
Returns:
plotly.graph_objects.Figure: 年龄分布图表
"""
fig = px.histogram(
df,
x='age_years',
color='cardio_str',
nbins=30,
title='年龄分布(按心血管疾病状态)',
labels={'age_years': '年龄(岁)', 'count': '人数', 'cardio_str': '心血管疾病状态'},
color_discrete_map={"有心血管疾病": "#EF553B", "无心血管疾病": "#636EFA"},
opacity=0.7
)
fig.update_layout(
bargap=0.1,
xaxis_title="年龄(岁)",
yaxis_title="人数",
legend_title="心血管疾病状态",
hovermode='x unified'
)
return fig
def create_bmi_cardio_chart(df):
"""
创建BMI分类对心血管疾病影响的堆叠柱状图
Args:
df: 数据框
Returns:
plotly.graph_objects.Figure: BMI分类图表
"""
# 计算交叉表
cross_tab = pd.crosstab(
df['bmi_category'],
df['cardio_str'],
normalize='index'
).reset_index()
# 转换为长格式
cross_tab_melted = cross_tab.melt(
id_vars='bmi_category',
var_name='cardio_status',
value_name='percentage'
)
# 创建堆叠柱状图
fig = px.bar(
cross_tab_melted,
x='bmi_category',
y='percentage',
color='cardio_status',
title='BMI分类对心血管疾病的影响',
labels={
'bmi_category': 'BMI分类',
'percentage': '比例',
'cardio_status': '心血管疾病状态'
},
color_discrete_map={"有心血管疾病": "#EF553B", "无心血管疾病": "#636EFA"},
text_auto='.1%'
)
fig.update_layout(
xaxis_title="BMI分类",
yaxis_title="比例",
legend_title="心血管疾病状态",
yaxis_tickformat=',.0%',
hovermode='x unified'
)
return fig
def display_summary_stats(df):
"""
显示摘要统计信息
Args:
df: 数据框
"""
total_records = len(df)
cardio_cases = df['cardio'].sum()
cardio_rate = (cardio_cases / total_records * 100) if total_records > 0 else 0
col1, col2, col3 = st.columns(3)
with col1:
st.metric(
label="总记录数",
value=f"{total_records:,}",
delta=None
)
with col2:
st.metric(
label="心血管疾病病例数",
value=f"{cardio_cases:,}",
delta=None
)
with col3:
st.metric(
label="心血管疾病风险率",
value=f"{cardio_rate:.2f}%",
delta=None
)
def main():
"""
主函数Streamlit应用程序入口
"""
# 标题
st.title("❤️ 心血管疾病数据分析仪表板")
st.markdown("---")
# 加载数据
with st.spinner("正在加载数据..."):
df = load_and_clean_data(DATA_PATH)
if df.empty:
st.error("数据加载失败,请检查数据文件路径和格式。")
return
# 侧边栏 - 筛选器
st.sidebar.header("🔍 数据筛选")
# 年龄范围滑块
age_min = int(df['age_years'].min())
age_max = int(df['age_years'].max())
age_range = st.sidebar.slider(
"选择年龄范围(岁)",
min_value=age_min,
max_value=age_max,
value=[20, 80],
step=1
)
# 性别筛选器
gender_options = ["女性", "男性", "全部"]
gender_filter = st.sidebar.multiselect(
"选择性别",
options=gender_options,
default=["全部"]
)
# 心血管疾病筛选器
cardio_options = ["有心血管疾病", "无心血管疾病", "全部"]
cardio_filter = st.sidebar.multiselect(
"选择心血管疾病状态",
options=cardio_options,
default=["全部"]
)
# 应用筛选
filtered_df = filter_data(df, age_range, gender_filter, cardio_filter)
# 显示筛选信息
st.sidebar.markdown("---")
st.sidebar.info(f"**筛选结果**: {len(filtered_df):,} 条记录")
# 主页面
# 1. 摘要统计
st.header("📊 数据摘要")
display_summary_stats(filtered_df)
st.markdown("---")
# 2. 年龄分布图表
st.header("📈 年龄分布分析")
col1, col2 = st.columns([3, 1])
with col1:
age_chart = create_age_distribution_chart(filtered_df)
st.plotly_chart(age_chart, use_container_width=True)
with col2:
st.markdown("### 年龄统计")
st.metric("平均年龄", f"{filtered_df['age_years'].mean():.1f}")
st.metric("年龄中位数", f"{filtered_df['age_years'].median():.1f}")
st.metric("年龄标准差", f"{filtered_df['age_years'].std():.1f}")
st.markdown("---")
# 3. BMI分类分析
st.header("⚖️ BMI分类分析")
col1, col2 = st.columns([3, 1])
with col1:
bmi_chart = create_bmi_cardio_chart(filtered_df)
st.plotly_chart(bmi_chart, use_container_width=True)
with col2:
st.markdown("### BMI统计")
st.metric("平均BMI", f"{filtered_df['BMI'].mean():.1f}")
st.metric("BMI中位数", f"{filtered_df['BMI'].median():.1f}")
# BMI分类分布
bmi_dist = filtered_df['bmi_category'].value_counts()
st.markdown("### BMI分类分布")
for category, count in bmi_dist.items():
percentage = (count / len(filtered_df)) * 100
st.markdown(f"**{category}**: {count:,} ({percentage:.1f}%)")
st.markdown("---")
# 4. 数据预览
st.header("🔍 数据预览")
with st.expander("查看筛选后的数据"):
st.dataframe(
filtered_df[
['id', 'age_years', 'gender_str', 'height', 'weight', 'BMI',
'bmi_category', 'ap_hi', 'ap_lo', 'cholesterol_str',
'gluc_str', 'smoke', 'alco', 'active', 'cardio_str']
].head(100),
use_container_width=True
)
# 5. 数据下载
st.header("📥 数据导出")
@st.cache_data
def convert_df_to_csv(df):
return df.to_csv(index=False).encode('utf-8')
csv_data = convert_df_to_csv(filtered_df)
col1, col2 = st.columns(2)
with col1:
st.download_button(
label="📥 下载筛选数据 (CSV)",
data=csv_data,
file_name="filtered_cardio_data.csv",
mime="text/csv",
help="下载当前筛选条件下的数据"
)
with col2:
if st.button("🔄 重置筛选"):
st.rerun()
# 页脚
st.markdown("---")
st.markdown(
"""
<div style='text-align: center; color: gray;'>
<p>心血管疾病数据分析仪表板 | 数据来源: 心血管疾病.xlsx | 总记录数: 70,000</p>
</div>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
main()