148 lines
3.8 KiB
Python
148 lines
3.8 KiB
Python
|
|
import streamlit as st
|
|||
|
|
import pandas as pd
|
|||
|
|
import numpy as np
|
|||
|
|
import plotly.express as px
|
|||
|
|
|
|||
|
|
# 数据路径
|
|||
|
|
DATA_PATH = "D:\\AI_Coding\\data\\心血管疾病.xlsx"
|
|||
|
|
|
|||
|
|
# 缓存数据加载和清洗函数
|
|||
|
|
@st.cache_data
|
|||
|
|
def load_and_preprocess_data():
|
|||
|
|
# 加载数据
|
|||
|
|
df = pd.read_excel(DATA_PATH)
|
|||
|
|
|
|||
|
|
# 特征工程
|
|||
|
|
# 将age(天)转换为年(四舍五入)
|
|||
|
|
df['age_years'] = round(df['age'] / 365.25, 0)
|
|||
|
|
|
|||
|
|
# 计算BMI
|
|||
|
|
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
|
|||
|
|
|
|||
|
|
# 异常值处理
|
|||
|
|
# 删除舒张压≥收缩压的记录
|
|||
|
|
df = df[df['ap_lo'] < df['ap_hi']]
|
|||
|
|
|
|||
|
|
# 删除血压极端异常值
|
|||
|
|
df = df[(df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)]
|
|||
|
|
df = df[(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)]
|
|||
|
|
|
|||
|
|
# 类别转换
|
|||
|
|
# 将cholesterol数值转换为描述性字符串
|
|||
|
|
cholesterol_map = {1: '正常', 2: '偏高', 3: '很高'}
|
|||
|
|
df['cholesterol_desc'] = df['cholesterol'].map(cholesterol_map)
|
|||
|
|
|
|||
|
|
# 将gluc数值转换为描述性字符串
|
|||
|
|
gluc_map = {1: '正常', 2: '偏高', 3: '很高'}
|
|||
|
|
df['gluc_desc'] = df['gluc'].map(gluc_map)
|
|||
|
|
|
|||
|
|
# 创建bmi_category
|
|||
|
|
def get_bmi_category(bmi):
|
|||
|
|
if bmi < 18.5:
|
|||
|
|
return '偏瘦'
|
|||
|
|
elif bmi < 24:
|
|||
|
|
return '正常'
|
|||
|
|
elif bmi < 28:
|
|||
|
|
return '超重'
|
|||
|
|
else:
|
|||
|
|
return '肥胖'
|
|||
|
|
|
|||
|
|
df['bmi_category'] = df['bmi'].apply(get_bmi_category)
|
|||
|
|
|
|||
|
|
return df
|
|||
|
|
|
|||
|
|
# 加载数据
|
|||
|
|
df = load_and_preprocess_data()
|
|||
|
|
|
|||
|
|
# Streamlit应用
|
|||
|
|
st.title('CardioAI - 心血管疾病数据可视化')
|
|||
|
|
|
|||
|
|
# 侧边栏筛选器
|
|||
|
|
st.sidebar.header('数据筛选')
|
|||
|
|
|
|||
|
|
# age_years范围滑块
|
|||
|
|
age_min = int(df['age_years'].min())
|
|||
|
|
age_max = int(df['age_years'].max())
|
|||
|
|
age_range = st.sidebar.slider(
|
|||
|
|
'年龄范围',
|
|||
|
|
min_value=age_min,
|
|||
|
|
max_value=age_max,
|
|||
|
|
value=(age_min, age_max)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# gender多选框
|
|||
|
|
gender_options = df['gender'].unique()
|
|||
|
|
gender_filter = st.sidebar.multiselect(
|
|||
|
|
'性别 (1=女, 2=男)',
|
|||
|
|
options=gender_options,
|
|||
|
|
default=gender_options
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# cardio多选框
|
|||
|
|
cardio_options = df['cardio'].unique()
|
|||
|
|
cardio_filter = st.sidebar.multiselect(
|
|||
|
|
'心血管疾病 (0=无, 1=有)',
|
|||
|
|
options=cardio_options,
|
|||
|
|
default=cardio_options
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 应用筛选器
|
|||
|
|
filtered_df = df[
|
|||
|
|
(df['age_years'] >= age_range[0]) &
|
|||
|
|
(df['age_years'] <= age_range[1]) &
|
|||
|
|
(df['gender'].isin(gender_filter)) &
|
|||
|
|
(df['cardio'].isin(cardio_filter))
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 主页展示
|
|||
|
|
st.header('数据概览')
|
|||
|
|
|
|||
|
|
# 展示筛选后的总记录数
|
|||
|
|
st.metric('筛选后的总记录数', len(filtered_df))
|
|||
|
|
|
|||
|
|
# 计算并展示心血管疾病总风险率
|
|||
|
|
if len(filtered_df) > 0:
|
|||
|
|
cardio_rate = (filtered_df['cardio'].sum() / len(filtered_df)) * 100
|
|||
|
|
st.metric('心血管疾病总风险率', f"{cardio_rate:.2f}%")
|
|||
|
|
|
|||
|
|
# 图表
|
|||
|
|
st.header('数据可视化')
|
|||
|
|
|
|||
|
|
# age_years分布直方图(按cardio区分)
|
|||
|
|
st.subheader('年龄分布与心血管疾病关系')
|
|||
|
|
fig_age = px.histogram(
|
|||
|
|
filtered_df,
|
|||
|
|
x='age_years',
|
|||
|
|
color='cardio',
|
|||
|
|
barmode='overlay',
|
|||
|
|
nbins=20,
|
|||
|
|
labels={'age_years': '年龄', 'cardio': '心血管疾病', 'count': '人数'},
|
|||
|
|
title='年龄分布 histogram'
|
|||
|
|
)
|
|||
|
|
fig_age.update_layout(
|
|||
|
|
xaxis_title='年龄',
|
|||
|
|
yaxis_title='人数',
|
|||
|
|
legend_title='心血管疾病'
|
|||
|
|
)
|
|||
|
|
st.plotly_chart(fig_age)
|
|||
|
|
|
|||
|
|
# bmi_category对cardio影响的堆叠柱状图
|
|||
|
|
st.subheader('BMI类别与心血管疾病关系')
|
|||
|
|
fig_bmi = px.histogram(
|
|||
|
|
filtered_df,
|
|||
|
|
x='bmi_category',
|
|||
|
|
color='cardio',
|
|||
|
|
barmode='stack',
|
|||
|
|
labels={'bmi_category': 'BMI类别', 'cardio': '心血管疾病', 'count': '人数'},
|
|||
|
|
title='BMI类别对心血管疾病影响的堆叠柱状图'
|
|||
|
|
)
|
|||
|
|
fig_bmi.update_layout(
|
|||
|
|
xaxis_title='BMI类别',
|
|||
|
|
yaxis_title='人数',
|
|||
|
|
legend_title='心血管疾病'
|
|||
|
|
)
|
|||
|
|
st.plotly_chart(fig_bmi)
|
|||
|
|
|
|||
|
|
# 展示数据样本
|
|||
|
|
st.header('数据样本')
|
|||
|
|
st.dataframe(filtered_df.head())
|