添加Module1数据可视化仪表板和心血管疾病数据
1. 创建module1_dashboard/cardio_dashboard.py - Streamlit应用程序,提供交互式数据可视化 - 包含数据清洗、特征工程、异常值处理 - 集成年龄转换、BMI计算、类别转换 - 侧边栏筛选器、Plotly图表展示 - 智能路径解析,支持多种运行环境 2. 添加data/心血管疾病.xlsx数据文件 - 心血管疾病原始数据集 - 用于模型训练和可视化分析 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
BIN
data/心血管疾病.xlsx
Normal file
BIN
data/心血管疾病.xlsx
Normal file
Binary file not shown.
394
module1_dashboard/cardio_dashboard.py
Normal file
394
module1_dashboard/cardio_dashboard.py
Normal file
@@ -0,0 +1,394 @@
|
||||
#!/opt/anaconda3/envs/cardioenv/bin/python
|
||||
"""
|
||||
CardioAI - 心血管疾病数据可视化仪表板
|
||||
Streamlit应用程序,用于对心血管疾病数据进行清洗、特征工程和交互式可视化
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
from plotly.subplots import make_subplots
|
||||
import warnings
|
||||
import os
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
# 页面配置
|
||||
st.set_page_config(
|
||||
page_title="CardioAI - 心血管疾病智能分析",
|
||||
page_icon="❤️",
|
||||
layout="wide",
|
||||
initial_sidebar_state="expanded"
|
||||
)
|
||||
|
||||
# 数据路径
|
||||
DATA_PATH = "./data/心血管疾病.xlsx"
|
||||
|
||||
|
||||
@st.cache_data(ttl=3600)
|
||||
def load_and_process_data():
|
||||
"""
|
||||
加载并处理心血管疾病数据
|
||||
返回处理后的DataFrame
|
||||
"""
|
||||
try:
|
||||
# 调试信息:检查文件路径
|
||||
current_dir = os.getcwd()
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
st.sidebar.info(f"当前工作目录: {current_dir}")
|
||||
st.sidebar.info(f"脚本目录: {script_dir}")
|
||||
st.sidebar.info(f"数据路径: {DATA_PATH}")
|
||||
|
||||
# 尝试多种路径
|
||||
possible_paths = [
|
||||
os.path.abspath(DATA_PATH), # 绝对路径
|
||||
DATA_PATH, # 原始相对路径
|
||||
os.path.join(current_dir, DATA_PATH),
|
||||
os.path.join(current_dir, "data", "心血管疾病.xlsx"),
|
||||
os.path.join(script_dir, "..", DATA_PATH),
|
||||
os.path.join(script_dir, "..", "data", "心血管疾病.xlsx"),
|
||||
os.path.join(script_dir, "data", "心血管疾病.xlsx")
|
||||
]
|
||||
|
||||
data_path = None
|
||||
for path in possible_paths:
|
||||
if os.path.exists(path):
|
||||
data_path = path
|
||||
st.sidebar.success(f"找到数据文件: {path}")
|
||||
break
|
||||
|
||||
if data_path is None:
|
||||
st.sidebar.error("未找到数据文件,请检查路径")
|
||||
st.sidebar.info(f"尝试过的路径: {possible_paths}")
|
||||
return pd.DataFrame()
|
||||
|
||||
# 加载数据
|
||||
df = pd.read_excel(data_path)
|
||||
|
||||
# 1. 特征工程
|
||||
# 将age(天)转换为年,四舍五入
|
||||
df['age_years'] = (df['age'] / 365.25).round().astype(int)
|
||||
|
||||
# 计算BMI: weight / (height/100)^2
|
||||
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
|
||||
|
||||
# 2. 异常值处理
|
||||
# 删除舒张压 >= 收缩压的记录
|
||||
df = df[df['ap_lo'] < df['ap_hi']].copy()
|
||||
|
||||
# 删除血压极端异常值
|
||||
# 收缩压 ∈ [90, 250], 舒张压 ∈ [60, 150]
|
||||
df = df[
|
||||
(df['ap_hi'] >= 90) & (df['ap_hi'] <= 250) &
|
||||
(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)
|
||||
].copy()
|
||||
|
||||
# 3. 类别转换
|
||||
# cholesterol转换
|
||||
cholesterol_map = {
|
||||
1: '正常',
|
||||
2: '高于正常',
|
||||
3: '很高'
|
||||
}
|
||||
df['cholesterol_str'] = df['cholesterol'].map(cholesterol_map)
|
||||
|
||||
# gluc转换
|
||||
gluc_map = {
|
||||
1: '正常',
|
||||
2: '高于正常',
|
||||
3: '很高'
|
||||
}
|
||||
df['gluc_str'] = df['gluc'].map(gluc_map)
|
||||
|
||||
# BMI分类
|
||||
def categorize_bmi(bmi):
|
||||
if bmi < 18.5:
|
||||
return '偏瘦'
|
||||
elif 18.5 <= bmi < 25:
|
||||
return '正常'
|
||||
elif 25 <= bmi < 30:
|
||||
return '超重'
|
||||
else:
|
||||
return '肥胖'
|
||||
|
||||
df['bmi_category'] = df['bmi'].apply(categorize_bmi)
|
||||
|
||||
# 添加血压分类
|
||||
df['bp_category'] = pd.cut(
|
||||
df['ap_hi'],
|
||||
bins=[0, 120, 140, 160, 180, 250],
|
||||
labels=['正常', '偏高', '高血压1级', '高血压2级', '高血压3级']
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"数据加载失败: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
def create_filters(df):
|
||||
"""
|
||||
创建侧边栏筛选器
|
||||
返回筛选后的数据
|
||||
"""
|
||||
st.sidebar.header("🔍 数据筛选")
|
||||
|
||||
# 年龄范围滑块
|
||||
min_age = int(df['age_years'].min())
|
||||
max_age = int(df['age_years'].max())
|
||||
age_range = st.sidebar.slider(
|
||||
"选择年龄范围",
|
||||
min_value=min_age,
|
||||
max_value=max_age,
|
||||
value=(min_age, max_age),
|
||||
step=1
|
||||
)
|
||||
|
||||
# 性别筛选(多选)
|
||||
gender_options = df['gender'].unique()
|
||||
gender_options = sorted(gender_options)
|
||||
gender_labels = {1: '女性', 2: '男性'}
|
||||
selected_genders = st.sidebar.multiselect(
|
||||
"选择性别",
|
||||
options=gender_options,
|
||||
default=gender_options,
|
||||
format_func=lambda x: gender_labels.get(x, f"性别{x}")
|
||||
)
|
||||
|
||||
# 心血管疾病筛选(多选)
|
||||
cardio_options = df['cardio'].unique()
|
||||
cardio_options = sorted(cardio_options)
|
||||
cardio_labels = {0: '无疾病', 1: '有疾病'}
|
||||
selected_cardio = st.sidebar.multiselect(
|
||||
"选择心血管疾病状态",
|
||||
options=cardio_options,
|
||||
default=cardio_options,
|
||||
format_func=lambda x: cardio_labels.get(x, f"状态{x}")
|
||||
)
|
||||
|
||||
# 应用筛选
|
||||
filtered_df = df[
|
||||
(df['age_years'] >= age_range[0]) &
|
||||
(df['age_years'] <= age_range[1]) &
|
||||
(df['gender'].isin(selected_genders)) &
|
||||
(df['cardio'].isin(selected_cardio))
|
||||
].copy()
|
||||
|
||||
return filtered_df, age_range, selected_genders, selected_cardio
|
||||
|
||||
|
||||
def display_metrics(filtered_df, original_df):
|
||||
"""
|
||||
显示关键指标
|
||||
"""
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
label="总记录数",
|
||||
value=f"{len(filtered_df):,}",
|
||||
delta=f"{len(filtered_df) - len(original_df):+,}" if len(filtered_df) != len(original_df) else None
|
||||
)
|
||||
|
||||
with col2:
|
||||
disease_count = filtered_df['cardio'].sum()
|
||||
disease_rate = (disease_count / len(filtered_df) * 100) if len(filtered_df) > 0 else 0
|
||||
st.metric(
|
||||
label="心血管疾病风险率",
|
||||
value=f"{disease_rate:.1f}%",
|
||||
delta=f"{disease_count:,} 例"
|
||||
)
|
||||
|
||||
with col3:
|
||||
avg_age = filtered_df['age_years'].mean()
|
||||
st.metric(
|
||||
label="平均年龄",
|
||||
value=f"{avg_age:.1f} 岁"
|
||||
)
|
||||
|
||||
with col4:
|
||||
avg_bmi = filtered_df['bmi'].mean()
|
||||
st.metric(
|
||||
label="平均BMI",
|
||||
value=f"{avg_bmi:.1f}"
|
||||
)
|
||||
|
||||
|
||||
def create_visualizations(filtered_df):
|
||||
"""
|
||||
创建可视化图表
|
||||
"""
|
||||
st.subheader("📊 数据分析")
|
||||
|
||||
# 创建两列布局
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.markdown("##### 年龄分布(按心血管疾病状态)")
|
||||
if not filtered_df.empty:
|
||||
fig1 = px.histogram(
|
||||
filtered_df,
|
||||
x='age_years',
|
||||
color='cardio',
|
||||
nbins=30,
|
||||
barmode='overlay',
|
||||
color_discrete_map={0: '#636EFA', 1: '#EF553B'},
|
||||
labels={
|
||||
'age_years': '年龄(岁)',
|
||||
'cardio': '心血管疾病',
|
||||
'count': '人数'
|
||||
},
|
||||
category_orders={'cardio': [0, 1]},
|
||||
opacity=0.7
|
||||
)
|
||||
fig1.update_layout(
|
||||
legend_title_text='疾病状态',
|
||||
legend=dict(
|
||||
orientation="h",
|
||||
yanchor="bottom",
|
||||
y=1.02,
|
||||
xanchor="right",
|
||||
x=1
|
||||
)
|
||||
)
|
||||
st.plotly_chart(fig1, use_container_width=True)
|
||||
else:
|
||||
st.info("没有数据可显示")
|
||||
|
||||
with col2:
|
||||
st.markdown("##### BMI分类与心血管疾病关系")
|
||||
if not filtered_df.empty:
|
||||
# 创建交叉表
|
||||
bmi_cardio_cross = pd.crosstab(
|
||||
filtered_df['bmi_category'],
|
||||
filtered_df['cardio'],
|
||||
normalize='index'
|
||||
).reset_index()
|
||||
|
||||
# 转换为长格式
|
||||
bmi_cardio_long = pd.melt(
|
||||
bmi_cardio_cross,
|
||||
id_vars=['bmi_category'],
|
||||
value_vars=[0, 1],
|
||||
var_name='cardio',
|
||||
value_name='proportion'
|
||||
)
|
||||
|
||||
# 添加标签
|
||||
bmi_cardio_long['cardio_label'] = bmi_cardio_long['cardio'].map({0: '无疾病', 1: '有疾病'})
|
||||
|
||||
fig2 = px.bar(
|
||||
bmi_cardio_long,
|
||||
x='bmi_category',
|
||||
y='proportion',
|
||||
color='cardio_label',
|
||||
barmode='stack',
|
||||
color_discrete_map={'无疾病': '#00CC96', '有疾病': '#AB63FA'},
|
||||
labels={
|
||||
'bmi_category': 'BMI分类',
|
||||
'proportion': '比例',
|
||||
'cardio_label': '疾病状态'
|
||||
},
|
||||
category_orders={
|
||||
'bmi_category': ['偏瘦', '正常', '超重', '肥胖']
|
||||
}
|
||||
)
|
||||
fig2.update_layout(
|
||||
yaxis_tickformat=',.0%',
|
||||
legend_title_text='疾病状态'
|
||||
)
|
||||
st.plotly_chart(fig2, use_container_width=True)
|
||||
else:
|
||||
st.info("没有数据可显示")
|
||||
|
||||
# 额外分析
|
||||
st.subheader("🔍 详细分析")
|
||||
|
||||
col3, col4 = st.columns(2)
|
||||
|
||||
with col3:
|
||||
st.markdown("##### 胆固醇水平分布")
|
||||
if not filtered_df.empty:
|
||||
fig3 = px.pie(
|
||||
filtered_df,
|
||||
names='cholesterol_str',
|
||||
color='cholesterol_str',
|
||||
color_discrete_sequence=px.colors.sequential.RdBu
|
||||
)
|
||||
fig3.update_traces(textposition='inside', textinfo='percent+label')
|
||||
st.plotly_chart(fig3, use_container_width=True)
|
||||
|
||||
with col4:
|
||||
st.markdown("##### 血糖水平分布")
|
||||
if not filtered_df.empty:
|
||||
fig4 = px.pie(
|
||||
filtered_df,
|
||||
names='gluc_str',
|
||||
color='gluc_str',
|
||||
color_discrete_sequence=px.colors.sequential.Bluyl
|
||||
)
|
||||
fig4.update_traces(textposition='inside', textinfo='percent+label')
|
||||
st.plotly_chart(fig4, use_container_width=True)
|
||||
|
||||
|
||||
def display_data_summary(filtered_df):
|
||||
"""
|
||||
显示数据摘要
|
||||
"""
|
||||
with st.expander("📋 数据摘要(点击展开)"):
|
||||
st.dataframe(
|
||||
filtered_df.describe().round(2),
|
||||
use_container_width=True
|
||||
)
|
||||
|
||||
st.markdown("##### 数据样本")
|
||||
st.dataframe(
|
||||
filtered_df.head(10),
|
||||
use_container_width=True
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
主函数
|
||||
"""
|
||||
# 标题和介绍
|
||||
st.title("❤️ CardioAI - 心血管疾病智能分析系统")
|
||||
st.markdown("""
|
||||
本仪表板提供心血管疾病数据的交互式可视化分析。使用侧边栏筛选器探索数据模式。
|
||||
""")
|
||||
|
||||
# 加载数据
|
||||
with st.spinner('正在加载和处理数据...'):
|
||||
df = load_and_process_data()
|
||||
|
||||
if df.empty:
|
||||
st.error("无法加载数据。请检查数据文件路径和格式。")
|
||||
return
|
||||
|
||||
# 创建筛选器并获取筛选后的数据
|
||||
filtered_df, age_range, selected_genders, selected_cardio = create_filters(df)
|
||||
|
||||
# 显示筛选条件
|
||||
st.sidebar.markdown("---")
|
||||
st.sidebar.markdown(f"**筛选结果:** {len(filtered_df):,} 条记录")
|
||||
|
||||
# 显示关键指标
|
||||
display_metrics(filtered_df, df)
|
||||
|
||||
# 显示可视化图表
|
||||
create_visualizations(filtered_df)
|
||||
|
||||
# 显示数据摘要
|
||||
display_data_summary(filtered_df)
|
||||
|
||||
# 页脚
|
||||
st.markdown("---")
|
||||
st.caption("CardioAI - 心血管疾病智能辅助系统 | Module 1: 数据可视化仪表板")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user