Files
sad_test01/aicodes/module1_dashboard/cardio_dashboard.py
anthony-s526 ffc4192ff0 Add CardioAI project with three modules
- Module 1: Dashboard for cardiovascular disease data visualization
- Module 2: Machine learning predictor with Flask API
- Module 3: Voice assistant with DeepSeek and CosyVoice integration
- Add .gitignore for proper file exclusion
- Update requirements and documentation

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 21:26:10 +08:00

354 lines
12 KiB
Python

#!/usr/bin/env python3
"""
CardioAI - Cardiovascular Disease Dashboard
Streamlit application for data cleaning, feature engineering, and interactive visualization.
"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')
# Set page configuration
st.set_page_config(
page_title="CardioAI - Cardiovascular Disease Dashboard",
page_icon="❤️",
layout="wide",
initial_sidebar_state="expanded"
)
# Constants
DATA_PATH = "/Users/anthony/ai_lesson/ai_zhangzhongshan/心血管疾病.xlsx"
@st.cache_data
def load_and_clean_data():
"""
Load and clean cardiovascular disease data.
Returns cleaned DataFrame.
"""
# Load data
try:
df = pd.read_excel(DATA_PATH, engine='openpyxl')
st.sidebar.success(f"数据加载成功!原始记录数: {len(df):,}")
except Exception as e:
st.error(f"数据加载失败: {e}")
return pd.DataFrame()
# Standardize column names (handle potential naming variations)
# Common column name variations in cardiovascular datasets
column_mapping = {
'age(天)': 'age',
'cholestero1': 'cholesterol',
'cholesterol': 'cholesterol',
'gluc': 'gluc',
'ap_hi': 'ap_hi',
'ap_lo': 'ap_lo',
'cardio': 'cardio',
'gender': 'gender',
'height': 'height',
'weight': 'weight',
'smoke': 'smoke',
'alco': 'alco',
'active': 'active'
}
# Rename columns to standardized names
df_columns = df.columns.tolist()
for col in df_columns:
for key, value in column_mapping.items():
if key.lower() in col.lower():
df.rename(columns={col: value}, inplace=True)
break
# Feature Engineering
# 1. Convert age (in days) to years (age_years)
if 'age' in df.columns:
df['age_years'] = np.round(df['age'] / 365.25).astype(int)
# 2. Calculate BMI
if 'height' in df.columns and 'weight' in df.columns:
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
df['bmi'] = df['bmi'].round(2)
# 3. Handle blood pressure outliers
# Remove records where diastolic (ap_lo) >= systolic (ap_hi)
if 'ap_hi' in df.columns and 'ap_lo' in df.columns:
df = df[df['ap_lo'] < df['ap_hi']]
# Remove extreme blood pressure values
systolic_mask = (df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)
diastolic_mask = (df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)
df = df[systolic_mask & diastolic_mask]
# 4. Convert cholesterol and glucose to descriptive strings
if 'cholesterol' in df.columns:
cholesterol_map = {
1: "Normal",
2: "Above Normal",
3: "Well Above Normal"
}
df['cholesterol_str'] = df['cholesterol'].map(cholesterol_map)
if 'gluc' in df.columns:
glucose_map = {
1: "Normal",
2: "Above Normal",
3: "Well Above Normal"
}
df['gluc_str'] = df['gluc'].map(glucose_map)
# 5. Create BMI categories
if 'bmi' in df.columns:
def categorize_bmi(bmi):
if bmi < 18.5:
return "Underweight"
elif 18.5 <= bmi < 25:
return "Normal"
elif 25 <= bmi < 30:
return "Overweight"
else:
return "Obese"
df['bmi_category'] = df['bmi'].apply(categorize_bmi)
return df
def create_filters(df):
"""Create interactive filters in sidebar."""
st.sidebar.header("📊 数据筛选器")
# Age range slider
if 'age_years' in df.columns:
min_age = int(df['age_years'].min())
max_age = int(df['age_years'].max())
age_range = st.sidebar.slider(
"选择年龄范围 (岁)",
min_value=min_age,
max_value=max_age,
value=(min_age, max_age)
)
else:
age_range = (0, 100)
# Gender filter (if exists)
gender_filter = None
if 'gender' in df.columns:
gender_options = df['gender'].unique()
gender_options_sorted = sorted(gender_options)
gender_selected = st.sidebar.multiselect(
"选择性别",
options=gender_options_sorted,
default=gender_options_sorted
)
gender_filter = gender_selected
# Cardiovascular disease filter
cardio_filter = None
if 'cardio' in df.columns:
cardio_options = df['cardio'].unique()
cardio_options_sorted = sorted(cardio_options)
cardio_selected = st.sidebar.multiselect(
"心血管疾病状态",
options=cardio_options_sorted,
default=cardio_options_sorted,
help="0: 无疾病, 1: 有疾病"
)
cardio_filter = cardio_selected
return age_range, gender_filter, cardio_filter
def apply_filters(df, age_range, gender_filter, cardio_filter):
"""Apply filters to DataFrame."""
filtered_df = df.copy()
# Apply age filter
if 'age_years' in filtered_df.columns:
filtered_df = filtered_df[
(filtered_df['age_years'] >= age_range[0]) &
(filtered_df['age_years'] <= age_range[1])
]
# Apply gender filter
if gender_filter is not None and 'gender' in filtered_df.columns:
filtered_df = filtered_df[filtered_df['gender'].isin(gender_filter)]
# Apply cardio filter
if cardio_filter is not None and 'cardio' in filtered_df.columns:
filtered_df = filtered_df[filtered_df['cardio'].isin(cardio_filter)]
return filtered_df
def display_summary_metrics(df):
"""Display summary metrics."""
col1, col2, col3, col4 = st.columns(4)
with col1:
total_records = len(df)
st.metric("📈 总记录数", f"{total_records:,}")
with col2:
if 'cardio' in df.columns:
risk_rate = df['cardio'].mean() * 100
st.metric("❤️ 心血管疾病风险率", f"{risk_rate:.2f}%")
else:
st.metric("❤️ 心血管疾病风险率", "N/A")
with col3:
if 'age_years' in df.columns:
avg_age = df['age_years'].mean()
st.metric("👥 平均年龄", f"{avg_age:.1f}")
else:
st.metric("👥 平均年龄", "N/A")
with col4:
if 'bmi' in df.columns:
avg_bmi = df['bmi'].mean()
st.metric("⚖️ 平均BMI", f"{avg_bmi:.1f}")
else:
st.metric("⚖️ 平均BMI", "N/A")
st.markdown("---")
def create_visualizations(df):
"""Create interactive visualizations."""
# Create two columns for charts
col1, col2 = st.columns(2)
with col1:
st.subheader("📊 年龄分布 (按心血管疾病状态)")
if 'age_years' in df.columns and 'cardio' in df.columns:
# Convert cardio to string for better legend
df['cardio_str'] = df['cardio'].apply(lambda x: '有疾病' if x == 1 else '无疾病')
fig1 = px.histogram(
df,
x='age_years',
color='cardio_str',
nbins=30,
barmode='overlay',
opacity=0.7,
color_discrete_sequence=['#FF6B6B', '#4ECDC4'],
labels={'age_years': '年龄 (岁)', 'cardio_str': '心血管疾病状态'}
)
fig1.update_layout(
legend_title="疾病状态",
xaxis_title="年龄 (岁)",
yaxis_title="人数",
bargap=0.1
)
st.plotly_chart(fig1, use_container_width=True)
else:
st.info("年龄或心血管疾病状态数据不可用")
with col2:
st.subheader("📈 BMI类别与心血管疾病关系")
if 'bmi_category' in df.columns and 'cardio' in df.columns:
# Create cross-tabulation
cross_tab = pd.crosstab(df['bmi_category'], df['cardio'], normalize='index') * 100
# Prepare data for stacked bar chart
categories = cross_tab.index.tolist()
no_disease = cross_tab[0].values if 0 in cross_tab.columns else [0] * len(categories)
has_disease = cross_tab[1].values if 1 in cross_tab.columns else [0] * len(categories)
fig2 = go.Figure(data=[
go.Bar(name='无疾病', x=categories, y=no_disease, marker_color='#4ECDC4'),
go.Bar(name='有疾病', x=categories, y=has_disease, marker_color='#FF6B6B')
])
fig2.update_layout(
barmode='stack',
xaxis_title="BMI 类别",
yaxis_title="百分比 (%)",
legend_title="疾病状态",
yaxis=dict(range=[0, 100])
)
st.plotly_chart(fig2, use_container_width=True)
else:
st.info("BMI类别或心血管疾病状态数据不可用")
# Additional visualizations
st.markdown("---")
st.subheader("🔍 数据详情")
# Show filtered data
with st.expander("查看筛选后的数据 (前100行)"):
st.dataframe(df.head(100), use_container_width=True)
# Data statistics
with st.expander("查看数据统计摘要"):
if not df.empty:
st.write("**数值型变量统计:**")
numeric_cols = df.select_dtypes(include=[np.number]).columns
st.dataframe(df[numeric_cols].describe(), use_container_width=True)
st.write("**分类变量统计:**")
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
for col in categorical_cols:
if col in df.columns:
st.write(f"**{col}**:")
value_counts = df[col].value_counts()
st.dataframe(value_counts, use_container_width=True)
def main():
"""Main application function."""
# Title and description
st.title("❤️ CardioAI - 心血管疾病智能辅助系统")
st.markdown("""
### 模块1: 数据可视化仪表板
本模块提供心血管疾病数据的交互式探索、清洗和可视化功能。
""")
# Load and clean data
with st.spinner("正在加载和清洗数据..."):
df = load_and_clean_data()
if df.empty:
st.error("无法加载数据。请检查数据文件路径和格式。")
return
# Display dataset information
with st.expander("📋 数据集信息", expanded=False):
st.write(f"**总记录数:** {len(df):,}")
st.write(f"**特征数:** {len(df.columns)}")
st.write("**列名:**", ", ".join(df.columns.tolist()))
# Show missing values
missing_values = df.isnull().sum()
if missing_values.any():
st.write("**缺失值:**")
st.dataframe(missing_values[missing_values > 0].rename('缺失数量'))
else:
st.write("**缺失值:** 无")
# Create filters in sidebar
age_range, gender_filter, cardio_filter = create_filters(df)
# Apply filters
filtered_df = apply_filters(df, age_range, gender_filter, cardio_filter)
# Display summary metrics
display_summary_metrics(filtered_df)
# Create visualizations
create_visualizations(filtered_df)
# Footer
st.markdown("---")
st.markdown(
"""
<div style='text-align: center; color: gray;'>
<p>CardioAI - 心血管疾病智能辅助系统 | 模块1: 数据可视化仪表板</p>
<p>使用 Streamlit 和 Plotly 构建</p>
</div>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
main()