Files
ai_coding/module1_dashboard/cardio_dashboard.py

148 lines
3.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
# 数据路径
DATA_PATH = "D:\\AI_Coding\\data\\心血管疾病.xlsx"
# 缓存数据加载和清洗函数
@st.cache_data
def load_and_preprocess_data():
# 加载数据
df = pd.read_excel(DATA_PATH)
# 特征工程
# 将age转换为年四舍五入
df['age_years'] = round(df['age'] / 365.25, 0)
# 计算BMI
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
# 异常值处理
# 删除舒张压≥收缩压的记录
df = df[df['ap_lo'] < df['ap_hi']]
# 删除血压极端异常值
df = df[(df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)]
df = df[(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)]
# 类别转换
# 将cholesterol数值转换为描述性字符串
cholesterol_map = {1: '正常', 2: '偏高', 3: '很高'}
df['cholesterol_desc'] = df['cholesterol'].map(cholesterol_map)
# 将gluc数值转换为描述性字符串
gluc_map = {1: '正常', 2: '偏高', 3: '很高'}
df['gluc_desc'] = df['gluc'].map(gluc_map)
# 创建bmi_category
def get_bmi_category(bmi):
if bmi < 18.5:
return '偏瘦'
elif bmi < 24:
return '正常'
elif bmi < 28:
return '超重'
else:
return '肥胖'
df['bmi_category'] = df['bmi'].apply(get_bmi_category)
return df
# 加载数据
df = load_and_preprocess_data()
# Streamlit应用
st.title('CardioAI - 心血管疾病数据可视化')
# 侧边栏筛选器
st.sidebar.header('数据筛选')
# age_years范围滑块
age_min = int(df['age_years'].min())
age_max = int(df['age_years'].max())
age_range = st.sidebar.slider(
'年龄范围',
min_value=age_min,
max_value=age_max,
value=(age_min, age_max)
)
# gender多选框
gender_options = df['gender'].unique()
gender_filter = st.sidebar.multiselect(
'性别 (1=女, 2=男)',
options=gender_options,
default=gender_options
)
# cardio多选框
cardio_options = df['cardio'].unique()
cardio_filter = st.sidebar.multiselect(
'心血管疾病 (0=无, 1=有)',
options=cardio_options,
default=cardio_options
)
# 应用筛选器
filtered_df = df[
(df['age_years'] >= age_range[0]) &
(df['age_years'] <= age_range[1]) &
(df['gender'].isin(gender_filter)) &
(df['cardio'].isin(cardio_filter))
]
# 主页展示
st.header('数据概览')
# 展示筛选后的总记录数
st.metric('筛选后的总记录数', len(filtered_df))
# 计算并展示心血管疾病总风险率
if len(filtered_df) > 0:
cardio_rate = (filtered_df['cardio'].sum() / len(filtered_df)) * 100
st.metric('心血管疾病总风险率', f"{cardio_rate:.2f}%")
# 图表
st.header('数据可视化')
# age_years分布直方图按cardio区分
st.subheader('年龄分布与心血管疾病关系')
fig_age = px.histogram(
filtered_df,
x='age_years',
color='cardio',
barmode='overlay',
nbins=20,
labels={'age_years': '年龄', 'cardio': '心血管疾病', 'count': '人数'},
title='年龄分布 histogram'
)
fig_age.update_layout(
xaxis_title='年龄',
yaxis_title='人数',
legend_title='心血管疾病'
)
st.plotly_chart(fig_age)
# bmi_category对cardio影响的堆叠柱状图
st.subheader('BMI类别与心血管疾病关系')
fig_bmi = px.histogram(
filtered_df,
x='bmi_category',
color='cardio',
barmode='stack',
labels={'bmi_category': 'BMI类别', 'cardio': '心血管疾病', 'count': '人数'},
title='BMI类别对心血管疾病影响的堆叠柱状图'
)
fig_bmi.update_layout(
xaxis_title='BMI类别',
yaxis_title='人数',
legend_title='心血管疾病'
)
st.plotly_chart(fig_bmi)
# 展示数据样本
st.header('数据样本')
st.dataframe(filtered_df.head())