84 lines
2.9 KiB
Python
84 lines
2.9 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
测试数据加载和处理的脚本
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
|
|
# 添加父目录到路径
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
# 导入数据处理函数
|
|
from cardio_dashboard import load_and_process_data
|
|
|
|
def test_data_loading():
|
|
"""测试数据加载和清洗功能"""
|
|
print("开始测试数据加载和清洗...")
|
|
|
|
try:
|
|
# 加载数据
|
|
df = load_and_process_data()
|
|
|
|
if df.empty:
|
|
print("❌ 数据加载失败:返回空数据框")
|
|
return False
|
|
|
|
print(f"✅ 数据加载成功!共 {len(df)} 条记录")
|
|
|
|
# 检查必要的列
|
|
required_columns = ['age_years', 'bmi', 'bmi_category', 'cholesterol_str',
|
|
'gluc_str', 'gender_str', 'cardio_str', 'ap_hi', 'ap_lo']
|
|
|
|
missing_columns = [col for col in required_columns if col not in df.columns]
|
|
if missing_columns:
|
|
print(f"❌ 缺少必要的列: {missing_columns}")
|
|
return False
|
|
|
|
print("✅ 所有必要的列都存在")
|
|
|
|
# 检查数据类型
|
|
print("\n数据摘要:")
|
|
print(f"- 年龄范围: {df['age_years'].min()} ~ {df['age_years'].max()} 岁")
|
|
print(f"- BMI范围: {df['bmi'].min():.1f} ~ {df['bmi'].max():.1f}")
|
|
print(f"- 收缩压范围: {df['ap_hi'].min()} ~ {df['ap_hi'].max()} mmHg")
|
|
print(f"- 舒张压范围: {df['ap_lo'].min()} ~ {df['ap_lo'].max()} mmHg")
|
|
|
|
# 检查类别转换
|
|
print("\n类别分布:")
|
|
print(f"- 性别: {df['gender_str'].value_counts().to_dict()}")
|
|
print(f"- 心血管疾病: {df['cardio_str'].value_counts().to_dict()}")
|
|
print(f"- BMI分类: {df['bmi_category'].value_counts().to_dict()}")
|
|
print(f"- 胆固醇水平: {df['cholesterol_str'].value_counts().to_dict()}")
|
|
print(f"- 血糖水平: {df['gluc_str'].value_counts().to_dict()}")
|
|
|
|
# 检查异常值处理
|
|
invalid_bp = df['ap_lo'] >= df['ap_hi']
|
|
if invalid_bp.any():
|
|
print(f"❌ 仍然存在舒张压 >= 收缩压的记录: {invalid_bp.sum()} 条")
|
|
return False
|
|
else:
|
|
print("✅ 已成功删除舒张压 >= 收缩压的记录")
|
|
|
|
# 检查血压范围
|
|
bp_in_range = ((df['ap_hi'] >= 90) & (df['ap_hi'] <= 250) &
|
|
(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150))
|
|
if not bp_in_range.all():
|
|
print(f"❌ 仍然存在血压异常值: {(~bp_in_range).sum()} 条")
|
|
return False
|
|
else:
|
|
print("✅ 所有血压值都在正常范围内")
|
|
|
|
print("\n🎉 所有测试通过!")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ 测试过程中发生错误: {str(e)}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = test_data_loading()
|
|
sys.exit(0 if success else 1) |