From b83677d19f03991e2f56dcc6d2a85408785c9eda Mon Sep 17 00:00:00 2001 From: code_sam <1195143862@qq.com> Date: Sun, 15 Mar 2026 10:24:49 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20Streamlit=20?= =?UTF-8?q?=E5=BF=83=E8=A1=80=E7=AE=A1=E7=96=BE=E7=97=85=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=8F=AF=E8=A7=86=E5=8C=96=E4=BB=AA=E8=A1=A8=E6=9D=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 实现数据加载和清洗函数 (带 @st.cache_data 装饰器) - 年龄转换: 天 -> 年 - BMI 计算和分类 - 异常值处理: 血压范围过滤 - 类别转换: cholesterol, gluc, gender, cardio - Streamlit 交互界面: 侧边栏筛选器 - 可视化图表: 年龄分布直方图、BMI与心血管疾病堆叠柱状图 Co-Authored-By: Claude Opus 4.6 --- .../module1_dashboard/cardio_dashboard.py | 297 ++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 ai_code/aicodes/module1_dashboard/cardio_dashboard.py diff --git a/ai_code/aicodes/module1_dashboard/cardio_dashboard.py b/ai_code/aicodes/module1_dashboard/cardio_dashboard.py new file mode 100644 index 0000000..d900215 --- /dev/null +++ b/ai_code/aicodes/module1_dashboard/cardio_dashboard.py @@ -0,0 +1,297 @@ +""" +CardioAI 心血管疾病数据可视化仪表板 +使用 Streamlit 构建交互式数据分析和可视化应用 +""" + +import streamlit as st +import pandas as pd +import numpy as np +import plotly.express as px + + +# ============================================ +# 数据加载函数 (带缓存) +# ============================================ +@st.cache_data +def load_data(file_path: str) -> pd.DataFrame: + """ + 加载 Excel 数据文件 + + Args: + file_path: Excel 文件路径 + + Returns: + 加载的 DataFrame + """ + df = pd.read_excel(file_path) + return df + + +# ============================================ +# 数据清洗函数 (带缓存) +# ============================================ +@st.cache_data +def clean_data(df: pd.DataFrame) -> pd.DataFrame: + """ + 数据清洗和特征工程 + + 处理步骤: + 1. 将 age(天) 转换为年 (age_years) + 2. 计算 BMI + 3. 异常值处理 + 4. 类别转换 + + Args: + df: 原始 DataFrame + + Returns: + 清洗后的 DataFrame + """ + # 创建副本避免修改原始数据 + df_clean = df.copy() + + # 1. 年龄转换: 天 -> 年 (四舍五入) + df_clean['age_years'] = (df_clean['age'] / 365).round().astype(int) + + # 2. 计算 BMI: BMI = weight / (height/100)^2 + df_clean['bmi'] = df_clean['weight'] / ((df_clean['height'] / 100) ** 2) + + # 3. 异常值处理 + # 3.1 删除舒张压 >= 收缩压的记录 + df_clean = df_clean[df_clean['ap_hi'] > df_clean['ap_lo']] + + # 3.2 删除血压极端异常值 + # 收缩压应在 [90, 250] 范围 + df_clean = df_clean[(df_clean['ap_hi'] >= 90) & (df_clean['ap_hi'] <= 250)] + # 舒张压应在 [60, 150] 范围 + df_clean = df_clean[(df_clean['ap_lo'] >= 60) & (df_clean['ap_lo'] <= 150)] + + # 4. 类别转换 + # 4.1 cholesterol 数值转描述性字符串 + cholesterol_map = { + 1: '正常', + 2: '高于正常', + 3: '远高于正常' + } + df_clean['cholesterol_category'] = df_clean['cholesterol'].map(cholesterol_map) + + # 4.2 gluc 数值转描述性字符串 + gluc_map = { + 1: '正常', + 2: '高于正常', + 3: '远高于正常' + } + df_clean['gluc_category'] = df_clean['gluc'].map(gluc_map) + + # 4.3 BMI 分类 + def categorize_bmi(bmi): + if bmi < 18.5: + return '偏瘦' + elif bmi < 24: + return '正常' + elif bmi < 28: + return '超重' + else: + return '肥胖' + + df_clean['bmi_category'] = df_clean['bmi'].apply(categorize_bmi) + + # 4.4 gender 转描述性字符串 + gender_map = {1: '女性', 2: '男性'} + df_clean['gender_category'] = df_clean['gender'].map(gender_map) + + # 4.5 cardio 转描述性字符串 + cardio_map = {0: '无心血管疾病', 1: '有心血管疾病'} + df_clean['cardio_category'] = df_clean['cardio'].map(cardio_map) + + return df_clean + + +# ============================================ +# Streamlit 页面配置 +# ============================================ +st.set_page_config( + page_title="CardioAI 心血管疾病分析", + page_icon="❤️", + layout="wide" +) + + +# ============================================ +# 主程序 +# ============================================ +def main(): + """主程序入口""" + + # 页面标题 + st.title("❤️ CardioAI 心血管疾病数据分析") + st.markdown("---") + + # 数据路径 + DATA_PATH = "C:/Users/SAM/Desktop/sam_test/ai_code/aicodes/data/心血管疾病.xlsx" + + # 加载数据 + try: + df = load_data(DATA_PATH) + st.success(f"✅ 成功加载数据,共 {len(df)} 条记录") + except Exception as e: + st.error(f"❌ 数据加载失败: {e}") + return + + # 数据清洗和特征工程 + df_clean = clean_data(df) + st.info(f"📊 数据清洗后剩余 {len(df_clean)} 条记录 (剔除了异常值)") + + st.markdown("---") + + # ============================================ + # 侧边栏 - 筛选器 + # ============================================ + st.sidebar.header("🔍 筛选条件") + + # 年龄范围滑块 + age_min = int(df_clean['age_years'].min()) + age_max = int(df_clean['age_years'].max()) + age_range = st.sidebar.slider( + "年龄范围 (岁)", + min_value=age_min, + max_value=age_max, + value=(age_min, age_max) + ) + + # 性别多选框 + gender_options = df_clean['gender_category'].unique().tolist() + selected_genders = st.sidebar.multiselect( + "性别", + options=gender_options, + default=gender_options + ) + + # 心血管疾病多选框 + cardio_options = df_clean['cardio_category'].unique().tolist() + selected_cardios = st.sidebar.multiselect( + "心血管疾病状态", + options=cardio_options, + default=cardio_options + ) + + # ============================================ + # 数据筛选 + # ============================================ + # 应用筛选条件 + df_filtered = df_clean[ + (df_clean['age_years'] >= age_range[0]) & + (df_clean['age_years'] <= age_range[1]) & + (df_clean['gender_category'].isin(selected_genders)) & + (df_clean['cardio_category'].isin(selected_cardios)) + ] + + st.markdown("---") + + # ============================================ + # 主页展示 - 统计指标 + # ============================================ + st.header("📈 数据概览") + + col1, col2, col3 = st.columns(3) + + with col1: + st.metric( + label="筛选后记录数", + value=f"{len(df_filtered):,}" + ) + + with col2: + # 计算心血管疾病风险率 + total_count = len(df_filtered) + disease_count = len(df_filtered[df_filtered['cardio'] == 1]) + risk_rate = (disease_count / total_count * 100) if total_count > 0 else 0 + st.metric( + label="心血管疾病风险率", + value=f"{risk_rate:.1f}%", + delta=f"{disease_count} / {total_count}" + ) + + with col3: + avg_age = df_filtered['age_years'].mean() + st.metric( + label="平均年龄", + value=f"{avg_age:.1f} 岁" + ) + + st.markdown("---") + + # ============================================ + # 图表展示 + # ============================================ + st.header("📊 可视化分析") + + # Tab 切换不同图表 + tab1, tab2 = st.tabs(["年龄分布", "BMI与心血管疾病关系"]) + + with tab1: + st.subheader("年龄分布直方图 (按心血管疾病状态区分)") + + # 年龄分布直方图 + fig_age = px.histogram( + df_filtered, + x='age_years', + color='cardio_category', + nbins=20, + title="年龄分布 (按心血管疾病状态)", + labels={ + 'age_years': '年龄 (岁)', + 'count': '人数', + 'cardio_category': '心血管疾病状态' + }, + color_discrete_map={ + '无心血管疾病': '#2ecc71', + '有心血管疾病': '#e74c3c' + }, + barmode='overlay' + ) + fig_age.update_layout(bargap=0.1) + st.plotly_chart(fig_age, use_container_width=True) + + with tab2: + st.subheader("BMI分类对心血管疾病的影响") + + # BMI 分类统计 + bmi_cardio = df_filtered.groupby(['bmi_category', 'cardio_category']).size().reset_index(name='count') + + # 按 BMI 分类排序 + bmi_order = ['偏瘦', '正常', '超重', '肥胖'] + + # 堆叠柱状图 + fig_bmi = px.bar( + bmi_cardio, + x='bmi_category', + y='count', + color='cardio_category', + title="BMI分类与心血管疾病关系", + labels={ + 'bmi_category': 'BMI分类', + 'count': '人数', + 'cardio_category': '心血管疾病状态' + }, + color_discrete_map={ + '无心血管疾病': '#2ecc71', + '有心血管疾病': '#e74c3c' + }, + category_orders={'bmi_category': bmi_order} + ) + fig_bmi.update_layout(bargap=0.1) + st.plotly_chart(fig_bmi, use_container_width=True) + + st.markdown("---") + + # ============================================ + # 数据预览 + # ============================================ + with st.expander("📋 查看清洗后的数据"): + st.dataframe(df_clean.head(100)) + st.caption(f"显示前 100 条记录,共 {len(df_clean)} 条") + + +if __name__ == "__main__": + main()