添加Module1数据可视化仪表板和心血管疾病数据

1. 创建module1_dashboard/cardio_dashboard.py
   - Streamlit应用程序,提供交互式数据可视化
   - 包含数据清洗、特征工程、异常值处理
   - 集成年龄转换、BMI计算、类别转换
   - 侧边栏筛选器、Plotly图表展示
   - 智能路径解析,支持多种运行环境

2. 添加data/心血管疾病.xlsx数据文件
   - 心血管疾病原始数据集
   - 用于模型训练和可视化分析

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 17:59:10 +08:00
parent 08b3b5c050
commit 3a3394da49
2 changed files with 394 additions and 0 deletions

BIN
data/心血管疾病.xlsx Normal file

Binary file not shown.

View File

@@ -0,0 +1,394 @@
#!/opt/anaconda3/envs/cardioenv/bin/python
"""
CardioAI - 心血管疾病数据可视化仪表板
Streamlit应用程序用于对心血管疾病数据进行清洗、特征工程和交互式可视化
"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
import os
warnings.filterwarnings('ignore')
# 页面配置
st.set_page_config(
page_title="CardioAI - 心血管疾病智能分析",
page_icon="❤️",
layout="wide",
initial_sidebar_state="expanded"
)
# 数据路径
DATA_PATH = "./data/心血管疾病.xlsx"
@st.cache_data(ttl=3600)
def load_and_process_data():
"""
加载并处理心血管疾病数据
返回处理后的DataFrame
"""
try:
# 调试信息:检查文件路径
current_dir = os.getcwd()
script_dir = os.path.dirname(os.path.abspath(__file__))
st.sidebar.info(f"当前工作目录: {current_dir}")
st.sidebar.info(f"脚本目录: {script_dir}")
st.sidebar.info(f"数据路径: {DATA_PATH}")
# 尝试多种路径
possible_paths = [
os.path.abspath(DATA_PATH), # 绝对路径
DATA_PATH, # 原始相对路径
os.path.join(current_dir, DATA_PATH),
os.path.join(current_dir, "data", "心血管疾病.xlsx"),
os.path.join(script_dir, "..", DATA_PATH),
os.path.join(script_dir, "..", "data", "心血管疾病.xlsx"),
os.path.join(script_dir, "data", "心血管疾病.xlsx")
]
data_path = None
for path in possible_paths:
if os.path.exists(path):
data_path = path
st.sidebar.success(f"找到数据文件: {path}")
break
if data_path is None:
st.sidebar.error("未找到数据文件,请检查路径")
st.sidebar.info(f"尝试过的路径: {possible_paths}")
return pd.DataFrame()
# 加载数据
df = pd.read_excel(data_path)
# 1. 特征工程
# 将age(天)转换为年,四舍五入
df['age_years'] = (df['age'] / 365.25).round().astype(int)
# 计算BMI: weight / (height/100)^2
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
# 2. 异常值处理
# 删除舒张压 >= 收缩压的记录
df = df[df['ap_lo'] < df['ap_hi']].copy()
# 删除血压极端异常值
# 收缩压 ∈ [90, 250], 舒张压 ∈ [60, 150]
df = df[
(df['ap_hi'] >= 90) & (df['ap_hi'] <= 250) &
(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)
].copy()
# 3. 类别转换
# cholesterol转换
cholesterol_map = {
1: '正常',
2: '高于正常',
3: '很高'
}
df['cholesterol_str'] = df['cholesterol'].map(cholesterol_map)
# gluc转换
gluc_map = {
1: '正常',
2: '高于正常',
3: '很高'
}
df['gluc_str'] = df['gluc'].map(gluc_map)
# BMI分类
def categorize_bmi(bmi):
if bmi < 18.5:
return '偏瘦'
elif 18.5 <= bmi < 25:
return '正常'
elif 25 <= bmi < 30:
return '超重'
else:
return '肥胖'
df['bmi_category'] = df['bmi'].apply(categorize_bmi)
# 添加血压分类
df['bp_category'] = pd.cut(
df['ap_hi'],
bins=[0, 120, 140, 160, 180, 250],
labels=['正常', '偏高', '高血压1级', '高血压2级', '高血压3级']
)
return df
except Exception as e:
st.error(f"数据加载失败: {e}")
return pd.DataFrame()
def create_filters(df):
"""
创建侧边栏筛选器
返回筛选后的数据
"""
st.sidebar.header("🔍 数据筛选")
# 年龄范围滑块
min_age = int(df['age_years'].min())
max_age = int(df['age_years'].max())
age_range = st.sidebar.slider(
"选择年龄范围",
min_value=min_age,
max_value=max_age,
value=(min_age, max_age),
step=1
)
# 性别筛选(多选)
gender_options = df['gender'].unique()
gender_options = sorted(gender_options)
gender_labels = {1: '女性', 2: '男性'}
selected_genders = st.sidebar.multiselect(
"选择性别",
options=gender_options,
default=gender_options,
format_func=lambda x: gender_labels.get(x, f"性别{x}")
)
# 心血管疾病筛选(多选)
cardio_options = df['cardio'].unique()
cardio_options = sorted(cardio_options)
cardio_labels = {0: '无疾病', 1: '有疾病'}
selected_cardio = st.sidebar.multiselect(
"选择心血管疾病状态",
options=cardio_options,
default=cardio_options,
format_func=lambda x: cardio_labels.get(x, f"状态{x}")
)
# 应用筛选
filtered_df = df[
(df['age_years'] >= age_range[0]) &
(df['age_years'] <= age_range[1]) &
(df['gender'].isin(selected_genders)) &
(df['cardio'].isin(selected_cardio))
].copy()
return filtered_df, age_range, selected_genders, selected_cardio
def display_metrics(filtered_df, original_df):
"""
显示关键指标
"""
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric(
label="总记录数",
value=f"{len(filtered_df):,}",
delta=f"{len(filtered_df) - len(original_df):+,}" if len(filtered_df) != len(original_df) else None
)
with col2:
disease_count = filtered_df['cardio'].sum()
disease_rate = (disease_count / len(filtered_df) * 100) if len(filtered_df) > 0 else 0
st.metric(
label="心血管疾病风险率",
value=f"{disease_rate:.1f}%",
delta=f"{disease_count:,}"
)
with col3:
avg_age = filtered_df['age_years'].mean()
st.metric(
label="平均年龄",
value=f"{avg_age:.1f}"
)
with col4:
avg_bmi = filtered_df['bmi'].mean()
st.metric(
label="平均BMI",
value=f"{avg_bmi:.1f}"
)
def create_visualizations(filtered_df):
"""
创建可视化图表
"""
st.subheader("📊 数据分析")
# 创建两列布局
col1, col2 = st.columns(2)
with col1:
st.markdown("##### 年龄分布(按心血管疾病状态)")
if not filtered_df.empty:
fig1 = px.histogram(
filtered_df,
x='age_years',
color='cardio',
nbins=30,
barmode='overlay',
color_discrete_map={0: '#636EFA', 1: '#EF553B'},
labels={
'age_years': '年龄(岁)',
'cardio': '心血管疾病',
'count': '人数'
},
category_orders={'cardio': [0, 1]},
opacity=0.7
)
fig1.update_layout(
legend_title_text='疾病状态',
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
st.plotly_chart(fig1, use_container_width=True)
else:
st.info("没有数据可显示")
with col2:
st.markdown("##### BMI分类与心血管疾病关系")
if not filtered_df.empty:
# 创建交叉表
bmi_cardio_cross = pd.crosstab(
filtered_df['bmi_category'],
filtered_df['cardio'],
normalize='index'
).reset_index()
# 转换为长格式
bmi_cardio_long = pd.melt(
bmi_cardio_cross,
id_vars=['bmi_category'],
value_vars=[0, 1],
var_name='cardio',
value_name='proportion'
)
# 添加标签
bmi_cardio_long['cardio_label'] = bmi_cardio_long['cardio'].map({0: '无疾病', 1: '有疾病'})
fig2 = px.bar(
bmi_cardio_long,
x='bmi_category',
y='proportion',
color='cardio_label',
barmode='stack',
color_discrete_map={'无疾病': '#00CC96', '有疾病': '#AB63FA'},
labels={
'bmi_category': 'BMI分类',
'proportion': '比例',
'cardio_label': '疾病状态'
},
category_orders={
'bmi_category': ['偏瘦', '正常', '超重', '肥胖']
}
)
fig2.update_layout(
yaxis_tickformat=',.0%',
legend_title_text='疾病状态'
)
st.plotly_chart(fig2, use_container_width=True)
else:
st.info("没有数据可显示")
# 额外分析
st.subheader("🔍 详细分析")
col3, col4 = st.columns(2)
with col3:
st.markdown("##### 胆固醇水平分布")
if not filtered_df.empty:
fig3 = px.pie(
filtered_df,
names='cholesterol_str',
color='cholesterol_str',
color_discrete_sequence=px.colors.sequential.RdBu
)
fig3.update_traces(textposition='inside', textinfo='percent+label')
st.plotly_chart(fig3, use_container_width=True)
with col4:
st.markdown("##### 血糖水平分布")
if not filtered_df.empty:
fig4 = px.pie(
filtered_df,
names='gluc_str',
color='gluc_str',
color_discrete_sequence=px.colors.sequential.Bluyl
)
fig4.update_traces(textposition='inside', textinfo='percent+label')
st.plotly_chart(fig4, use_container_width=True)
def display_data_summary(filtered_df):
"""
显示数据摘要
"""
with st.expander("📋 数据摘要(点击展开)"):
st.dataframe(
filtered_df.describe().round(2),
use_container_width=True
)
st.markdown("##### 数据样本")
st.dataframe(
filtered_df.head(10),
use_container_width=True
)
def main():
"""
主函数
"""
# 标题和介绍
st.title("❤️ CardioAI - 心血管疾病智能分析系统")
st.markdown("""
本仪表板提供心血管疾病数据的交互式可视化分析。使用侧边栏筛选器探索数据模式。
""")
# 加载数据
with st.spinner('正在加载和处理数据...'):
df = load_and_process_data()
if df.empty:
st.error("无法加载数据。请检查数据文件路径和格式。")
return
# 创建筛选器并获取筛选后的数据
filtered_df, age_range, selected_genders, selected_cardio = create_filters(df)
# 显示筛选条件
st.sidebar.markdown("---")
st.sidebar.markdown(f"**筛选结果:** {len(filtered_df):,} 条记录")
# 显示关键指标
display_metrics(filtered_df, df)
# 显示可视化图表
create_visualizations(filtered_df)
# 显示数据摘要
display_data_summary(filtered_df)
# 页脚
st.markdown("---")
st.caption("CardioAI - 心血管疾病智能辅助系统 | Module 1: 数据可视化仪表板")
if __name__ == "__main__":
main()