Add CardioAI project with usage instructions

- Add comprehensive README.md with setup and usage instructions
- Add .env.example template (sanitized, no real API keys)
- Add root-level .gitignore to exclude .env and generated files
- Add all project modules (dashboard, predictor)
- Add data file and requirements.txt
This commit is contained in:
zhenchuan199
2026-04-02 20:11:24 +08:00
parent 9133925f0a
commit b5b299a682
9 changed files with 2277 additions and 0 deletions

View File

@@ -0,0 +1,187 @@
"""
CardioAI 模块2: Flask API服务
心血管疾病风险预测 - 后端接口
"""
import joblib
import numpy as np
import pandas as pd
from flask import Flask, request, jsonify, render_template
from pathlib import Path
# ==================== 常量定义 ====================
CODE_ROOT = Path(r"F:\My_Git_Project\CardioAI")
MODEL_PATH = CODE_ROOT / "module2_predictor" / "cardio_predictor_model.pkl"
# ==================== Flask应用 ====================
app = Flask(__name__,
template_folder='templates',
static_folder='static')
# 全局变量存储模型
model = None
def load_model():
"""加载模型"""
global model
if model is None:
print("📂 正在加载模型...")
model = joblib.load(MODEL_PATH)
print("✅ 模型加载成功!")
return model
# ==================== 路由定义 ====================
@app.route('/')
def index():
"""渲染前端页面"""
return render_template('index.html')
@app.route('/predict_cardio', methods=['POST'])
def predict_cardio():
"""
心血管疾病风险预测接口
接收11个原始特征值的JSON POST请求
返回预测概率和结果
"""
try:
# 获取JSON数据
data = request.get_json()
if not data:
return jsonify({
'success': False,
'error': '未收到数据'
}), 400
# 定义特征列顺序(与训练时一致)
feature_names = [
'age_years', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
'cholesterol', 'gluc', 'smoke', 'alco', 'active'
]
# 从请求中提取特征值
features = []
missing_fields = []
for col in feature_names:
if col in data:
features.append(float(data[col]))
else:
missing_fields.append(col)
features.append(0.0) # 默认值
# 计算BMI: weight / (height/100)^2
weight = float(data.get('weight', 0))
height = float(data.get('height', 0))
if height > 0:
bmi = weight / ((height / 100) ** 2)
features.append(bmi)
else:
features.append(0.0)
if missing_fields:
return jsonify({
'success': False,
'error': f'缺少必要字段: {", ".join(missing_fields)}'
}), 400
# 定义特征列名(与训练时一致)
feature_columns = [
'age_years', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'bmi'
]
# 转换为DataFrame格式
X_input = pd.DataFrame([features], columns=feature_columns)
# 加载模型(如果尚未加载)
predictor = load_model()
# 预测
prediction = int(predictor.predict(X_input)[0])
prob_risk = float(predictor.predict_proba(X_input)[0][1])
prob_healthy = float(predictor.predict_proba(X_input)[0][0])
# 构建响应
result = {
'success': True,
'prediction': prediction,
'prediction_label': '有风险' if prediction == 1 else '健康',
'probability': {
'健康': round(prob_healthy * 100, 2),
'有风险': round(prob_risk * 100, 2)
},
'risk_level': get_risk_level(prob_risk),
'recommendation': get_recommendation(prob_risk, data)
}
return jsonify(result)
except ValueError as e:
return jsonify({
'success': False,
'error': f'数据格式错误: {str(e)}'
}), 400
except Exception as e:
return jsonify({
'success': False,
'error': f'预测失败: {str(e)}'
}), 500
def get_risk_level(probability: float) -> str:
"""根据概率返回风险等级"""
if probability < 0.3:
return '🟢 低风险'
elif probability < 0.5:
return '🟡 中低风险'
elif probability < 0.7:
return '🟠 中高风险'
else:
return '🔴 高风险'
def get_recommendation(probability: float, data: dict) -> str:
"""根据预测结果给出建议"""
if probability < 0.3:
return '继续保持健康的生活方式,定期体检。'
elif probability < 0.5:
return '建议适当增加运动,注意饮食均衡。'
elif probability < 0.7:
return '建议咨询医生,制定健康管理计划。'
else:
return '⚠️ 风险较高,请尽快就医检查。'
@app.route('/health', methods=['GET'])
def health_check():
"""健康检查接口"""
return jsonify({
'status': 'healthy',
'service': 'CardioAI Cardiovascular Prediction API',
'version': '1.0.0'
})
# ==================== 启动应用 ====================
if __name__ == '__main__':
print("\n" + "="*60)
print("❤️ CardioAI 心血管疾病风险预测 API")
print("="*60)
print(f"📂 模型路径: {MODEL_PATH}")
print(f"🌐 启动地址: http://localhost:5001")
print("="*60 + "\n")
# 预加载模型
load_model()
# 启动Flask应用
app.run(
host='0.0.0.0',
port=5001,
debug=True
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,199 @@
"""
CardioAI 模块2: 模型训练脚本
心血管疾病风险预测模型 - 训练与保存
"""
import pandas as pd
import numpy as np
import joblib
from pathlib import Path
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score
from xgboost import XGBClassifier
# ==================== 常量定义 ====================
CODE_ROOT = Path(r"F:\My_Git_Project\CardioAI")
DATA_PATH = CODE_ROOT / "data" / "心血管疾病.xlsx"
MODEL_PATH = CODE_ROOT / "module2_predictor" / "cardio_predictor_model.pkl"
# 特征列定义
CONTINUOUS_FEATURES = ['age', 'height', 'weight', 'ap_hi', 'ap_lo', 'bmi']
CATEGORICAL_FEATURES = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
def load_and_clean_data(file_path: Path) -> pd.DataFrame:
"""加载并清洗数据"""
print(f"📂 正在加载数据: {file_path}")
# 加载Excel数据
df = pd.read_excel(file_path, engine='openpyxl')
print(f"✅ 数据加载成功,共 {len(df)} 条记录")
# 复制数据
df = df.copy()
# 特征工程: age(天) -> age_years
df['age_years'] = (df['age'] / 365).round().astype(int)
# 计算BMI
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
# 异常值处理: 删除舒张压 >= 收缩压的记录
initial_count = len(df)
df = df[df['ap_lo'] < df['ap_hi']]
print(f"🗑️ 删除舒张压>=收缩压的记录: {initial_count - len(df)}")
# 删除血压极端异常值
# 收缩压 ∈ [90, 250]
initial_count = len(df)
df = df[(df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)]
removed_hy = initial_count - len(df)
# 舒张压 ∈ [60, 150]
initial_count = len(df)
df = df[(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)]
removed_lo = initial_count - len(df)
print(f"🗑️ 删除血压异常值: 收缩压 {removed_hy} 条, 舒张压 {removed_lo}")
print(f"✅ 数据清洗完成,剩余 {len(df)} 条记录")
return df
def prepare_features(df: pd.DataFrame) -> tuple:
"""
准备特征和标签
删除id和原始age字段保留处理后的特征
"""
# 定义要使用的特征删除id和原始age保留age_years
feature_columns = ['age_years', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'bmi']
X = df[feature_columns].copy()
y = df['cardio'].copy()
print(f"📊 特征数量: {len(feature_columns)}")
print(f"📊 特征列: {feature_columns}")
return X, y, feature_columns
def build_pipeline() -> Pipeline:
"""构建包含预处理器和分类器的Pipeline"""
print("🔧 正在构建Pipeline...")
# 连续特征列
continuous_cols = ['age_years', 'height', 'weight', 'ap_hi', 'ap_lo', 'bmi']
# 分类特征列
categorical_cols = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
# 预处理器
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), continuous_cols),
('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_cols)
],
remainder='drop'
)
# 完整Pipeline: 预处理 + XGBoost分类器
pipeline = Pipeline([
('preprocessor', preprocessor),
('classifier', XGBClassifier(
n_estimators=100,
max_depth=6,
learning_rate=0.1,
random_state=42,
use_label_encoder=False,
eval_metric='logloss',
n_jobs=-1
))
])
print("✅ Pipeline构建完成")
return pipeline
def train_and_evaluate(X: pd.DataFrame, y: pd.Series, pipeline: Pipeline):
"""训练模型并评估"""
print("\n" + "="*50)
print("📈 开始模型训练...")
print("="*50)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
print(f"📊 训练集大小: {len(X_train)}")
print(f"📊 测试集大小: {len(X_test)}")
# 训练模型
print("🏋️ 正在训练XGBoost模型...")
pipeline.fit(X_train, y_train)
# 预测
y_pred = pipeline.predict(X_test)
y_pred_proba = pipeline.predict_proba(X_test)[:, 1]
# 评估指标
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)
print("\n" + "="*50)
print("📊 模型评估结果:")
print("="*50)
print(f"✅ 准确率 (Accuracy): {accuracy:.4f}")
print(f"✅ ROC-AUC 分数: {roc_auc:.4f}")
print("\n📋 分类报告:")
print(classification_report(y_test, y_pred, target_names=['健康', '有风险']))
return pipeline
def save_model(pipeline: Pipeline, model_path: Path):
"""保存模型"""
print(f"\n💾 正在保存模型到: {model_path}")
joblib.dump(pipeline, model_path)
print(f"✅ 模型保存成功!")
# 验证模型文件
file_size = model_path.stat().st_size / (1024 * 1024)
print(f"📦 模型文件大小: {file_size:.2f} MB")
def main():
"""主函数"""
print("\n" + "="*60)
print("❤️ CardioAI 模块2: 心血管疾病风险预测模型训练")
print("="*60 + "\n")
# 1. 加载并清洗数据
df = load_and_clean_data(DATA_PATH)
# 2. 准备特征
X, y, feature_columns = prepare_features(df)
# 3. 构建Pipeline
pipeline = build_pipeline()
# 4. 训练并评估模型
trained_pipeline = train_and_evaluate(X, y, pipeline)
# 5. 保存模型
save_model(trained_pipeline, MODEL_PATH)
print("\n" + "="*60)
print("🎉 模型训练完成!")
print("="*60)
print(f"\n📌 模型使用说明:")
print(f" 1. 启动Flask API: python app.py")
print(f" 2. 访问 http://localhost:5001 查看预测界面")
print(f" 3. 输入11个特征值进行预测")
if __name__ == "__main__":
main()