Add CardioAI project with usage instructions
- Add comprehensive README.md with setup and usage instructions - Add .env.example template (sanitized, no real API keys) - Add root-level .gitignore to exclude .env and generated files - Add all project modules (dashboard, predictor) - Add data file and requirements.txt
This commit is contained in:
187
CardioAI/module2_predictor/app.py
Normal file
187
CardioAI/module2_predictor/app.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
CardioAI 模块2: Flask API服务
|
||||
心血管疾病风险预测 - 后端接口
|
||||
"""
|
||||
|
||||
import joblib
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from flask import Flask, request, jsonify, render_template
|
||||
from pathlib import Path
|
||||
|
||||
# ==================== 常量定义 ====================
|
||||
CODE_ROOT = Path(r"F:\My_Git_Project\CardioAI")
|
||||
MODEL_PATH = CODE_ROOT / "module2_predictor" / "cardio_predictor_model.pkl"
|
||||
|
||||
# ==================== Flask应用 ====================
|
||||
app = Flask(__name__,
|
||||
template_folder='templates',
|
||||
static_folder='static')
|
||||
|
||||
# 全局变量存储模型
|
||||
model = None
|
||||
|
||||
|
||||
def load_model():
|
||||
"""加载模型"""
|
||||
global model
|
||||
if model is None:
|
||||
print("📂 正在加载模型...")
|
||||
model = joblib.load(MODEL_PATH)
|
||||
print("✅ 模型加载成功!")
|
||||
return model
|
||||
|
||||
|
||||
# ==================== 路由定义 ====================
|
||||
@app.route('/')
|
||||
def index():
|
||||
"""渲染前端页面"""
|
||||
return render_template('index.html')
|
||||
|
||||
|
||||
@app.route('/predict_cardio', methods=['POST'])
|
||||
def predict_cardio():
|
||||
"""
|
||||
心血管疾病风险预测接口
|
||||
接收11个原始特征值的JSON POST请求
|
||||
返回预测概率和结果
|
||||
"""
|
||||
try:
|
||||
# 获取JSON数据
|
||||
data = request.get_json()
|
||||
|
||||
if not data:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': '未收到数据'
|
||||
}), 400
|
||||
|
||||
# 定义特征列顺序(与训练时一致)
|
||||
feature_names = [
|
||||
'age_years', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
|
||||
'cholesterol', 'gluc', 'smoke', 'alco', 'active'
|
||||
]
|
||||
|
||||
# 从请求中提取特征值
|
||||
features = []
|
||||
missing_fields = []
|
||||
|
||||
for col in feature_names:
|
||||
if col in data:
|
||||
features.append(float(data[col]))
|
||||
else:
|
||||
missing_fields.append(col)
|
||||
features.append(0.0) # 默认值
|
||||
|
||||
# 计算BMI: weight / (height/100)^2
|
||||
weight = float(data.get('weight', 0))
|
||||
height = float(data.get('height', 0))
|
||||
if height > 0:
|
||||
bmi = weight / ((height / 100) ** 2)
|
||||
features.append(bmi)
|
||||
else:
|
||||
features.append(0.0)
|
||||
|
||||
if missing_fields:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': f'缺少必要字段: {", ".join(missing_fields)}'
|
||||
}), 400
|
||||
|
||||
# 定义特征列名(与训练时一致)
|
||||
feature_columns = [
|
||||
'age_years', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
|
||||
'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'bmi'
|
||||
]
|
||||
|
||||
# 转换为DataFrame格式
|
||||
X_input = pd.DataFrame([features], columns=feature_columns)
|
||||
|
||||
# 加载模型(如果尚未加载)
|
||||
predictor = load_model()
|
||||
|
||||
# 预测
|
||||
prediction = int(predictor.predict(X_input)[0])
|
||||
prob_risk = float(predictor.predict_proba(X_input)[0][1])
|
||||
prob_healthy = float(predictor.predict_proba(X_input)[0][0])
|
||||
|
||||
# 构建响应
|
||||
result = {
|
||||
'success': True,
|
||||
'prediction': prediction,
|
||||
'prediction_label': '有风险' if prediction == 1 else '健康',
|
||||
'probability': {
|
||||
'健康': round(prob_healthy * 100, 2),
|
||||
'有风险': round(prob_risk * 100, 2)
|
||||
},
|
||||
'risk_level': get_risk_level(prob_risk),
|
||||
'recommendation': get_recommendation(prob_risk, data)
|
||||
}
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
except ValueError as e:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': f'数据格式错误: {str(e)}'
|
||||
}), 400
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': f'预测失败: {str(e)}'
|
||||
}), 500
|
||||
|
||||
|
||||
def get_risk_level(probability: float) -> str:
|
||||
"""根据概率返回风险等级"""
|
||||
if probability < 0.3:
|
||||
return '🟢 低风险'
|
||||
elif probability < 0.5:
|
||||
return '🟡 中低风险'
|
||||
elif probability < 0.7:
|
||||
return '🟠 中高风险'
|
||||
else:
|
||||
return '🔴 高风险'
|
||||
|
||||
|
||||
def get_recommendation(probability: float, data: dict) -> str:
|
||||
"""根据预测结果给出建议"""
|
||||
if probability < 0.3:
|
||||
return '继续保持健康的生活方式,定期体检。'
|
||||
elif probability < 0.5:
|
||||
return '建议适当增加运动,注意饮食均衡。'
|
||||
elif probability < 0.7:
|
||||
return '建议咨询医生,制定健康管理计划。'
|
||||
else:
|
||||
return '⚠️ 风险较高,请尽快就医检查。'
|
||||
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
def health_check():
|
||||
"""健康检查接口"""
|
||||
return jsonify({
|
||||
'status': 'healthy',
|
||||
'service': 'CardioAI Cardiovascular Prediction API',
|
||||
'version': '1.0.0'
|
||||
})
|
||||
|
||||
|
||||
# ==================== 启动应用 ====================
|
||||
if __name__ == '__main__':
|
||||
print("\n" + "="*60)
|
||||
print("❤️ CardioAI 心血管疾病风险预测 API")
|
||||
print("="*60)
|
||||
print(f"📂 模型路径: {MODEL_PATH}")
|
||||
print(f"🌐 启动地址: http://localhost:5001")
|
||||
print("="*60 + "\n")
|
||||
|
||||
# 预加载模型
|
||||
load_model()
|
||||
|
||||
# 启动Flask应用
|
||||
app.run(
|
||||
host='0.0.0.0',
|
||||
port=5001,
|
||||
debug=True
|
||||
)
|
||||
1060
CardioAI/module2_predictor/templates/index.html
Normal file
1060
CardioAI/module2_predictor/templates/index.html
Normal file
File diff suppressed because it is too large
Load Diff
199
CardioAI/module2_predictor/train_and_save.py
Normal file
199
CardioAI/module2_predictor/train_and_save.py
Normal file
@@ -0,0 +1,199 @@
|
||||
"""
|
||||
CardioAI 模块2: 模型训练脚本
|
||||
心血管疾病风险预测模型 - 训练与保存
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import joblib
|
||||
from pathlib import Path
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score
|
||||
from xgboost import XGBClassifier
|
||||
|
||||
# ==================== 常量定义 ====================
|
||||
CODE_ROOT = Path(r"F:\My_Git_Project\CardioAI")
|
||||
DATA_PATH = CODE_ROOT / "data" / "心血管疾病.xlsx"
|
||||
MODEL_PATH = CODE_ROOT / "module2_predictor" / "cardio_predictor_model.pkl"
|
||||
|
||||
# 特征列定义
|
||||
CONTINUOUS_FEATURES = ['age', 'height', 'weight', 'ap_hi', 'ap_lo', 'bmi']
|
||||
CATEGORICAL_FEATURES = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
|
||||
|
||||
|
||||
def load_and_clean_data(file_path: Path) -> pd.DataFrame:
|
||||
"""加载并清洗数据"""
|
||||
print(f"📂 正在加载数据: {file_path}")
|
||||
|
||||
# 加载Excel数据
|
||||
df = pd.read_excel(file_path, engine='openpyxl')
|
||||
print(f"✅ 数据加载成功,共 {len(df)} 条记录")
|
||||
|
||||
# 复制数据
|
||||
df = df.copy()
|
||||
|
||||
# 特征工程: age(天) -> age_years
|
||||
df['age_years'] = (df['age'] / 365).round().astype(int)
|
||||
|
||||
# 计算BMI
|
||||
df['bmi'] = df['weight'] / ((df['height'] / 100) ** 2)
|
||||
|
||||
# 异常值处理: 删除舒张压 >= 收缩压的记录
|
||||
initial_count = len(df)
|
||||
df = df[df['ap_lo'] < df['ap_hi']]
|
||||
print(f"🗑️ 删除舒张压>=收缩压的记录: {initial_count - len(df)} 条")
|
||||
|
||||
# 删除血压极端异常值
|
||||
# 收缩压 ∈ [90, 250]
|
||||
initial_count = len(df)
|
||||
df = df[(df['ap_hi'] >= 90) & (df['ap_hi'] <= 250)]
|
||||
removed_hy = initial_count - len(df)
|
||||
|
||||
# 舒张压 ∈ [60, 150]
|
||||
initial_count = len(df)
|
||||
df = df[(df['ap_lo'] >= 60) & (df['ap_lo'] <= 150)]
|
||||
removed_lo = initial_count - len(df)
|
||||
print(f"🗑️ 删除血压异常值: 收缩压 {removed_hy} 条, 舒张压 {removed_lo} 条")
|
||||
|
||||
print(f"✅ 数据清洗完成,剩余 {len(df)} 条记录")
|
||||
return df
|
||||
|
||||
|
||||
def prepare_features(df: pd.DataFrame) -> tuple:
|
||||
"""
|
||||
准备特征和标签
|
||||
删除id和原始age字段,保留处理后的特征
|
||||
"""
|
||||
# 定义要使用的特征(删除id和原始age,保留age_years)
|
||||
feature_columns = ['age_years', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
|
||||
'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'bmi']
|
||||
|
||||
X = df[feature_columns].copy()
|
||||
y = df['cardio'].copy()
|
||||
|
||||
print(f"📊 特征数量: {len(feature_columns)}")
|
||||
print(f"📊 特征列: {feature_columns}")
|
||||
|
||||
return X, y, feature_columns
|
||||
|
||||
|
||||
def build_pipeline() -> Pipeline:
|
||||
"""构建包含预处理器和分类器的Pipeline"""
|
||||
print("🔧 正在构建Pipeline...")
|
||||
|
||||
# 连续特征列
|
||||
continuous_cols = ['age_years', 'height', 'weight', 'ap_hi', 'ap_lo', 'bmi']
|
||||
|
||||
# 分类特征列
|
||||
categorical_cols = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
|
||||
|
||||
# 预处理器
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
('num', StandardScaler(), continuous_cols),
|
||||
('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_cols)
|
||||
],
|
||||
remainder='drop'
|
||||
)
|
||||
|
||||
# 完整Pipeline: 预处理 + XGBoost分类器
|
||||
pipeline = Pipeline([
|
||||
('preprocessor', preprocessor),
|
||||
('classifier', XGBClassifier(
|
||||
n_estimators=100,
|
||||
max_depth=6,
|
||||
learning_rate=0.1,
|
||||
random_state=42,
|
||||
use_label_encoder=False,
|
||||
eval_metric='logloss',
|
||||
n_jobs=-1
|
||||
))
|
||||
])
|
||||
|
||||
print("✅ Pipeline构建完成")
|
||||
return pipeline
|
||||
|
||||
|
||||
def train_and_evaluate(X: pd.DataFrame, y: pd.Series, pipeline: Pipeline):
|
||||
"""训练模型并评估"""
|
||||
print("\n" + "="*50)
|
||||
print("📈 开始模型训练...")
|
||||
print("="*50)
|
||||
|
||||
# 划分训练集和测试集
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=42, stratify=y
|
||||
)
|
||||
|
||||
print(f"📊 训练集大小: {len(X_train)}")
|
||||
print(f"📊 测试集大小: {len(X_test)}")
|
||||
|
||||
# 训练模型
|
||||
print("🏋️ 正在训练XGBoost模型...")
|
||||
pipeline.fit(X_train, y_train)
|
||||
|
||||
# 预测
|
||||
y_pred = pipeline.predict(X_test)
|
||||
y_pred_proba = pipeline.predict_proba(X_test)[:, 1]
|
||||
|
||||
# 评估指标
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
roc_auc = roc_auc_score(y_test, y_pred_proba)
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("📊 模型评估结果:")
|
||||
print("="*50)
|
||||
print(f"✅ 准确率 (Accuracy): {accuracy:.4f}")
|
||||
print(f"✅ ROC-AUC 分数: {roc_auc:.4f}")
|
||||
print("\n📋 分类报告:")
|
||||
print(classification_report(y_test, y_pred, target_names=['健康', '有风险']))
|
||||
|
||||
return pipeline
|
||||
|
||||
|
||||
def save_model(pipeline: Pipeline, model_path: Path):
|
||||
"""保存模型"""
|
||||
print(f"\n💾 正在保存模型到: {model_path}")
|
||||
joblib.dump(pipeline, model_path)
|
||||
print(f"✅ 模型保存成功!")
|
||||
|
||||
# 验证模型文件
|
||||
file_size = model_path.stat().st_size / (1024 * 1024)
|
||||
print(f"📦 模型文件大小: {file_size:.2f} MB")
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
print("\n" + "="*60)
|
||||
print("❤️ CardioAI 模块2: 心血管疾病风险预测模型训练")
|
||||
print("="*60 + "\n")
|
||||
|
||||
# 1. 加载并清洗数据
|
||||
df = load_and_clean_data(DATA_PATH)
|
||||
|
||||
# 2. 准备特征
|
||||
X, y, feature_columns = prepare_features(df)
|
||||
|
||||
# 3. 构建Pipeline
|
||||
pipeline = build_pipeline()
|
||||
|
||||
# 4. 训练并评估模型
|
||||
trained_pipeline = train_and_evaluate(X, y, pipeline)
|
||||
|
||||
# 5. 保存模型
|
||||
save_model(trained_pipeline, MODEL_PATH)
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("🎉 模型训练完成!")
|
||||
print("="*60)
|
||||
print(f"\n📌 模型使用说明:")
|
||||
print(f" 1. 启动Flask API: python app.py")
|
||||
print(f" 2. 访问 http://localhost:5001 查看预测界面")
|
||||
print(f" 3. 输入11个特征值进行预测")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user