Files
ai_coding/module1_dashboard/cardio_dashboard.py

148 lines
3.8 KiB
Python
Raw Permalink Normal View History

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
# 数据路径
DATA_PATH = "D:\\AI_Coding\\data\\心血管疾病.xlsx"
# 缓存数据加载和清洗函数
@st.cache_data
def load_and_preprocess_data():
# 加载数据
df = pd.read_excel(DATA_PATH)
# 特征工程
# 将age转换为年四舍五入
df['age_years'] = round(df['age'] / 365.25, 0)
# 计算BMI
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
# 异常值处理
# 删除舒张压≥收缩压的记录
df = df[df['ap_lo'] < df['ap_hi']]
# 删除血压极端异常值
df = df[(df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)]
df = df[(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)]
# 类别转换
# 将cholesterol数值转换为描述性字符串
cholesterol_map = {1: '正常', 2: '偏高', 3: '很高'}
df['cholesterol_desc'] = df['cholesterol'].map(cholesterol_map)
# 将gluc数值转换为描述性字符串
gluc_map = {1: '正常', 2: '偏高', 3: '很高'}
df['gluc_desc'] = df['gluc'].map(gluc_map)
# 创建bmi_category
def get_bmi_category(bmi):
if bmi < 18.5:
return '偏瘦'
elif bmi < 24:
return '正常'
elif bmi < 28:
return '超重'
else:
return '肥胖'
df['bmi_category'] = df['bmi'].apply(get_bmi_category)
return df
# 加载数据
df = load_and_preprocess_data()
# Streamlit应用
st.title('CardioAI - 心血管疾病数据可视化')
# 侧边栏筛选器
st.sidebar.header('数据筛选')
# age_years范围滑块
age_min = int(df['age_years'].min())
age_max = int(df['age_years'].max())
age_range = st.sidebar.slider(
'年龄范围',
min_value=age_min,
max_value=age_max,
value=(age_min, age_max)
)
# gender多选框
gender_options = df['gender'].unique()
gender_filter = st.sidebar.multiselect(
'性别 (1=女, 2=男)',
options=gender_options,
default=gender_options
)
# cardio多选框
cardio_options = df['cardio'].unique()
cardio_filter = st.sidebar.multiselect(
'心血管疾病 (0=无, 1=有)',
options=cardio_options,
default=cardio_options
)
# 应用筛选器
filtered_df = df[
(df['age_years'] >= age_range[0]) &
(df['age_years'] <= age_range[1]) &
(df['gender'].isin(gender_filter)) &
(df['cardio'].isin(cardio_filter))
]
# 主页展示
st.header('数据概览')
# 展示筛选后的总记录数
st.metric('筛选后的总记录数', len(filtered_df))
# 计算并展示心血管疾病总风险率
if len(filtered_df) > 0:
cardio_rate = (filtered_df['cardio'].sum() / len(filtered_df)) * 100
st.metric('心血管疾病总风险率', f"{cardio_rate:.2f}%")
# 图表
st.header('数据可视化')
# age_years分布直方图按cardio区分
st.subheader('年龄分布与心血管疾病关系')
fig_age = px.histogram(
filtered_df,
x='age_years',
color='cardio',
barmode='overlay',
nbins=20,
labels={'age_years': '年龄', 'cardio': '心血管疾病', 'count': '人数'},
title='年龄分布 histogram'
)
fig_age.update_layout(
xaxis_title='年龄',
yaxis_title='人数',
legend_title='心血管疾病'
)
st.plotly_chart(fig_age)
# bmi_category对cardio影响的堆叠柱状图
st.subheader('BMI类别与心血管疾病关系')
fig_bmi = px.histogram(
filtered_df,
x='bmi_category',
color='cardio',
barmode='stack',
labels={'bmi_category': 'BMI类别', 'cardio': '心血管疾病', 'count': '人数'},
title='BMI类别对心血管疾病影响的堆叠柱状图'
)
fig_bmi.update_layout(
xaxis_title='BMI类别',
yaxis_title='人数',
legend_title='心血管疾病'
)
st.plotly_chart(fig_bmi)
# 展示数据样本
st.header('数据样本')
st.dataframe(filtered_df.head())