import streamlit as st import pandas as pd import numpy as np import plotly.express as px # 数据路径 DATA_PATH = "D:\\AI_Coding\\data\\心血管疾病.xlsx" # 缓存数据加载和清洗函数 @st.cache_data def load_and_preprocess_data(): # 加载数据 df = pd.read_excel(DATA_PATH) # 特征工程 # 将age(天)转换为年(四舍五入) df['age_years'] = round(df['age'] / 365.25, 0) # 计算BMI df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2) # 异常值处理 # 删除舒张压≥收缩压的记录 df = df[df['ap_lo'] < df['ap_hi']] # 删除血压极端异常值 df = df[(df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)] df = df[(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)] # 类别转换 # 将cholesterol数值转换为描述性字符串 cholesterol_map = {1: '正常', 2: '偏高', 3: '很高'} df['cholesterol_desc'] = df['cholesterol'].map(cholesterol_map) # 将gluc数值转换为描述性字符串 gluc_map = {1: '正常', 2: '偏高', 3: '很高'} df['gluc_desc'] = df['gluc'].map(gluc_map) # 创建bmi_category def get_bmi_category(bmi): if bmi < 18.5: return '偏瘦' elif bmi < 24: return '正常' elif bmi < 28: return '超重' else: return '肥胖' df['bmi_category'] = df['bmi'].apply(get_bmi_category) return df # 加载数据 df = load_and_preprocess_data() # Streamlit应用 st.title('CardioAI - 心血管疾病数据可视化') # 侧边栏筛选器 st.sidebar.header('数据筛选') # age_years范围滑块 age_min = int(df['age_years'].min()) age_max = int(df['age_years'].max()) age_range = st.sidebar.slider( '年龄范围', min_value=age_min, max_value=age_max, value=(age_min, age_max) ) # gender多选框 gender_options = df['gender'].unique() gender_filter = st.sidebar.multiselect( '性别 (1=女, 2=男)', options=gender_options, default=gender_options ) # cardio多选框 cardio_options = df['cardio'].unique() cardio_filter = st.sidebar.multiselect( '心血管疾病 (0=无, 1=有)', options=cardio_options, default=cardio_options ) # 应用筛选器 filtered_df = df[ (df['age_years'] >= age_range[0]) & (df['age_years'] <= age_range[1]) & (df['gender'].isin(gender_filter)) & (df['cardio'].isin(cardio_filter)) ] # 主页展示 st.header('数据概览') # 展示筛选后的总记录数 st.metric('筛选后的总记录数', len(filtered_df)) # 计算并展示心血管疾病总风险率 if len(filtered_df) > 0: cardio_rate = (filtered_df['cardio'].sum() / len(filtered_df)) * 100 st.metric('心血管疾病总风险率', f"{cardio_rate:.2f}%") # 图表 st.header('数据可视化') # age_years分布直方图(按cardio区分) st.subheader('年龄分布与心血管疾病关系') fig_age = px.histogram( filtered_df, x='age_years', color='cardio', barmode='overlay', nbins=20, labels={'age_years': '年龄', 'cardio': '心血管疾病', 'count': '人数'}, title='年龄分布 histogram' ) fig_age.update_layout( xaxis_title='年龄', yaxis_title='人数', legend_title='心血管疾病' ) st.plotly_chart(fig_age) # bmi_category对cardio影响的堆叠柱状图 st.subheader('BMI类别与心血管疾病关系') fig_bmi = px.histogram( filtered_df, x='bmi_category', color='cardio', barmode='stack', labels={'bmi_category': 'BMI类别', 'cardio': '心血管疾病', 'count': '人数'}, title='BMI类别对心血管疾病影响的堆叠柱状图' ) fig_bmi.update_layout( xaxis_title='BMI类别', yaxis_title='人数', legend_title='心血管疾病' ) st.plotly_chart(fig_bmi) # 展示数据样本 st.header('数据样本') st.dataframe(filtered_df.head())