- Module 1: Dashboard for cardiovascular disease data visualization - Module 2: Machine learning predictor with Flask API - Module 3: Voice assistant with DeepSeek and CosyVoice integration - Add .gitignore for proper file exclusion - Update requirements and documentation Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
421 lines
14 KiB
Python
421 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
CardioAI - Cardiovascular Disease Prediction API
|
|
Flask application for real-time cardiovascular disease prediction.
|
|
"""
|
|
|
|
from flask import Flask, request, jsonify, render_template
|
|
import pandas as pd
|
|
import numpy as np
|
|
import joblib
|
|
import traceback
|
|
from datetime import datetime
|
|
import os
|
|
|
|
# Initialize Flask app
|
|
app = Flask(__name__)
|
|
|
|
# Constants
|
|
MODEL_PATH = os.path.join(os.path.dirname(__file__), 'cardio_predictor_model.pkl')
|
|
|
|
# Global variables for model and metadata
|
|
model = None
|
|
model_metadata = None
|
|
expected_features = None
|
|
|
|
# Expected feature names for input validation
|
|
EXPECTED_FEATURES = [
|
|
'age_years', # Age in years
|
|
'gender', # Gender (1: female, 2: male)
|
|
'height', # Height in cm
|
|
'weight', # Weight in kg
|
|
'ap_hi', # Systolic blood pressure
|
|
'ap_lo', # Diastolic blood pressure
|
|
'cholesterol', # Cholesterol level (1: normal, 2: above normal, 3: well above normal)
|
|
'gluc', # Glucose level (1: normal, 2: above normal, 3: well above normal)
|
|
'smoke', # Smoking (0: no, 1: yes)
|
|
'alco', # Alcohol intake (0: no, 1: yes)
|
|
'active' # Physical activity (0: no, 1: yes)
|
|
]
|
|
|
|
# Feature ranges for validation (min, max)
|
|
FEATURE_RANGES = {
|
|
'age_years': (20, 100),
|
|
'gender': (1, 2),
|
|
'height': (100, 250), # cm
|
|
'weight': (30, 200), # kg
|
|
'ap_hi': (90, 250), # systolic blood pressure
|
|
'ap_lo': (60, 150), # diastolic blood pressure
|
|
'cholesterol': (1, 3),
|
|
'gluc': (1, 3),
|
|
'smoke': (0, 1),
|
|
'alco': (0, 1),
|
|
'active': (0, 1),
|
|
'bmi': (10, 50) # BMI range
|
|
}
|
|
|
|
def load_model():
|
|
"""
|
|
Load the trained model pipeline from disk.
|
|
"""
|
|
global model, model_metadata, expected_features
|
|
|
|
try:
|
|
if os.path.exists(MODEL_PATH):
|
|
print(f"📦 Loading model from: {MODEL_PATH}")
|
|
model = joblib.load(MODEL_PATH)
|
|
print("✅ Model loaded successfully!")
|
|
|
|
# Try to extract expected features from model
|
|
try:
|
|
# Get feature names from the preprocessor
|
|
preprocessor = model.named_steps['preprocessor']
|
|
num_features = preprocessor.transformers_[0][2] # Numeric features
|
|
cat_features = preprocessor.transformers_[1][2] # Categorical features
|
|
expected_features = list(num_features) + list(cat_features)
|
|
print(f"📋 Model expects features: {expected_features}")
|
|
except Exception as e:
|
|
print(f"⚠️ Could not extract feature names from model: {e}")
|
|
expected_features = EXPECTED_FEATURES
|
|
|
|
# Load metadata if exists
|
|
metadata_path = MODEL_PATH.replace('.pkl', '_metadata.json')
|
|
if os.path.exists(metadata_path):
|
|
import json
|
|
with open(metadata_path, 'r') as f:
|
|
model_metadata = json.load(f)
|
|
print(f"📋 Model metadata loaded: {model_metadata.get('description', 'N/A')}")
|
|
else:
|
|
model_metadata = {'description': 'CardioAI Prediction Model'}
|
|
|
|
else:
|
|
print(f"❌ Model file not found at: {MODEL_PATH}")
|
|
print("⚠️ Please run train_and_save.py first to train and save the model.")
|
|
model = None
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error loading model: {e}")
|
|
traceback.print_exc()
|
|
model = None
|
|
|
|
def validate_input_features(input_data):
|
|
"""
|
|
Validate input features for prediction.
|
|
Returns (is_valid, error_message, validated_data)
|
|
"""
|
|
try:
|
|
# Convert input to dictionary if it's not already
|
|
if isinstance(input_data, dict):
|
|
data = input_data
|
|
else:
|
|
data = dict(input_data)
|
|
|
|
# Check for missing features
|
|
missing_features = [feat for feat in EXPECTED_FEATURES if feat not in data]
|
|
if missing_features:
|
|
return False, f"Missing features: {missing_features}", None
|
|
|
|
# Check for extra features
|
|
extra_features = [feat for feat in data.keys() if feat not in EXPECTED_FEATURES]
|
|
if extra_features:
|
|
print(f"⚠️ Warning: Extra features provided: {extra_features}")
|
|
|
|
# Validate each feature
|
|
validated_data = {}
|
|
for feature in EXPECTED_FEATURES:
|
|
value = data[feature]
|
|
|
|
# Convert to appropriate type
|
|
try:
|
|
if feature in ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']:
|
|
# These should be integers
|
|
validated_data[feature] = int(float(value))
|
|
else:
|
|
# These should be floats
|
|
validated_data[feature] = float(value)
|
|
except (ValueError, TypeError):
|
|
return False, f"Invalid value for {feature}: {value} (should be numeric)", None
|
|
|
|
# Check value ranges
|
|
if feature in FEATURE_RANGES:
|
|
min_val, max_val = FEATURE_RANGES[feature]
|
|
if not (min_val <= validated_data[feature] <= max_val):
|
|
return False, f"{feature} value {validated_data[feature]} out of range ({min_val}-{max_val})", None
|
|
|
|
# Calculate BMI if height and weight are available
|
|
if 'height' in validated_data and 'weight' in validated_data:
|
|
height_m = validated_data['height'] / 100 # convert cm to meters
|
|
bmi_value = validated_data['weight'] / (height_m ** 2)
|
|
bmi_value = round(bmi_value, 2)
|
|
validated_data['bmi'] = bmi_value
|
|
|
|
# Validate BMI range
|
|
if 'bmi' in FEATURE_RANGES:
|
|
min_val, max_val = FEATURE_RANGES['bmi']
|
|
if not (min_val <= bmi_value <= max_val):
|
|
return False, f"Calculated BMI value {bmi_value} out of range ({min_val}-{max_val})", None
|
|
|
|
return True, "Validation passed", validated_data
|
|
|
|
except Exception as e:
|
|
return False, f"Validation error: {str(e)}", None
|
|
|
|
def preprocess_input(input_data):
|
|
"""
|
|
Convert validated input data to DataFrame for model prediction.
|
|
"""
|
|
try:
|
|
# Create DataFrame with a single row
|
|
df = pd.DataFrame([input_data])
|
|
|
|
# Determine which features the model expects
|
|
# Use expected_features from loaded model, or fallback to EXPECTED_FEATURES + bmi
|
|
model_features = expected_features if expected_features is not None else EXPECTED_FEATURES + ['bmi'] if 'bmi' not in EXPECTED_FEATURES else EXPECTED_FEATURES
|
|
|
|
# Ensure all expected features are present in the DataFrame
|
|
# Add missing features with default values (should not happen if validation passed)
|
|
for feature in model_features:
|
|
if feature not in df.columns:
|
|
print(f"⚠️ Warning: Feature '{feature}' not in input data, adding default value")
|
|
if feature == 'bmi' and 'height' in df.columns and 'weight' in df.columns:
|
|
# Calculate BMI if missing but height and weight are available
|
|
height_m = df['height'].iloc[0] / 100
|
|
df['bmi'] = df['weight'].iloc[0] / (height_m ** 2)
|
|
else:
|
|
# Add default value (0 for numeric, 1 for categorical)
|
|
if feature in ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']:
|
|
df[feature] = 1
|
|
else:
|
|
df[feature] = 0.0
|
|
|
|
# Ensure correct column order as expected by the model
|
|
df = df[model_features]
|
|
|
|
return df
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error preprocessing input: {e}")
|
|
raise
|
|
|
|
@app.route('/')
|
|
def home():
|
|
"""
|
|
Home page - serve the prediction interface.
|
|
"""
|
|
return render_template('index.html')
|
|
|
|
@app.route('/api/model_info', methods=['GET'])
|
|
def model_info():
|
|
"""
|
|
API endpoint to get model information.
|
|
"""
|
|
try:
|
|
if model is None:
|
|
load_model()
|
|
|
|
if model is None:
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': 'Model not loaded. Please run training script first.'
|
|
}), 503
|
|
|
|
info = {
|
|
'status': 'success',
|
|
'model': {
|
|
'description': model_metadata.get('description', 'CardioAI Prediction Model'),
|
|
'version': model_metadata.get('model_version', '1.0'),
|
|
'creation_date': model_metadata.get('creation_date', 'N/A'),
|
|
'features': expected_features if expected_features else EXPECTED_FEATURES,
|
|
'feature_ranges': FEATURE_RANGES
|
|
}
|
|
}
|
|
|
|
return jsonify(info)
|
|
|
|
except Exception as e:
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': f'Error getting model info: {str(e)}'
|
|
}), 500
|
|
|
|
@app.route('/predict_cardio', methods=['POST'])
|
|
def predict_cardio():
|
|
"""
|
|
API endpoint for cardiovascular disease prediction.
|
|
Accepts JSON POST request with 11 feature values.
|
|
Returns prediction result and probability.
|
|
"""
|
|
global model
|
|
|
|
try:
|
|
# Load model if not already loaded
|
|
if model is None:
|
|
load_model()
|
|
|
|
if model is None:
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': 'Model not available. Please run training script first.'
|
|
}), 503
|
|
|
|
# Get input data
|
|
if request.is_json:
|
|
input_data = request.get_json()
|
|
else:
|
|
# Try form data
|
|
input_data = request.form.to_dict()
|
|
|
|
if not input_data:
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': 'No input data provided. Please provide 11 feature values.'
|
|
}), 400
|
|
|
|
# Validate input
|
|
is_valid, error_msg, validated_data = validate_input_features(input_data)
|
|
|
|
if not is_valid:
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': error_msg,
|
|
'expected_features': EXPECTED_FEATURES,
|
|
'feature_ranges': FEATURE_RANGES
|
|
}), 400
|
|
|
|
# Preprocess input
|
|
input_df = preprocess_input(validated_data)
|
|
|
|
# Make prediction
|
|
try:
|
|
# Get probability prediction
|
|
probability = model.predict_proba(input_df)[0]
|
|
|
|
# Get class prediction (0: no disease, 1: disease)
|
|
prediction = model.predict(input_df)[0]
|
|
|
|
# Calculate risk level based on probability
|
|
risk_probability = float(probability[1]) # Probability of class 1 (disease)
|
|
risk_percentage = risk_probability * 100
|
|
|
|
if risk_probability < 0.3:
|
|
risk_level = "Low"
|
|
recommendation = "Continue healthy lifestyle."
|
|
elif risk_probability < 0.7:
|
|
risk_level = "Medium"
|
|
recommendation = "Consider regular health check-ups."
|
|
else:
|
|
risk_level = "High"
|
|
recommendation = "Consult a healthcare professional."
|
|
|
|
# Prepare response
|
|
response = {
|
|
'status': 'success',
|
|
'prediction': int(prediction),
|
|
'probability': risk_probability,
|
|
'probability_percentage': risk_percentage,
|
|
'risk_level': risk_level,
|
|
'recommendation': recommendation,
|
|
'input_features': validated_data,
|
|
'timestamp': datetime.now().isoformat()
|
|
}
|
|
|
|
print(f"✅ Prediction made: {prediction}, Probability: {risk_probability:.4f}")
|
|
|
|
return jsonify(response)
|
|
|
|
except Exception as e:
|
|
print(f"❌ Prediction error: {e}")
|
|
traceback.print_exc()
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': f'Prediction error: {str(e)}'
|
|
}), 500
|
|
|
|
except Exception as e:
|
|
print(f"❌ API error: {e}")
|
|
traceback.print_exc()
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': f'Internal server error: {str(e)}'
|
|
}), 500
|
|
|
|
@app.route('/health', methods=['GET'])
|
|
def health_check():
|
|
"""
|
|
Health check endpoint.
|
|
"""
|
|
try:
|
|
model_status = 'loaded' if model is not None else 'not_loaded'
|
|
|
|
health_info = {
|
|
'status': 'healthy',
|
|
'service': 'CardioAI Prediction API',
|
|
'model_status': model_status,
|
|
'timestamp': datetime.now().isoformat()
|
|
}
|
|
|
|
return jsonify(health_info)
|
|
|
|
except Exception as e:
|
|
return jsonify({
|
|
'status': 'unhealthy',
|
|
'error': str(e)
|
|
}), 500
|
|
|
|
@app.errorhandler(404)
|
|
def not_found(error):
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': 'Endpoint not found'
|
|
}), 404
|
|
|
|
@app.errorhandler(405)
|
|
def method_not_allowed(error):
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': 'Method not allowed'
|
|
}), 405
|
|
|
|
@app.errorhandler(500)
|
|
def internal_error(error):
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': 'Internal server error'
|
|
}), 500
|
|
|
|
def init_app():
|
|
"""
|
|
Initialize the Flask application.
|
|
"""
|
|
print("=" * 60)
|
|
print("🚀 CardioAI - Cardiovascular Disease Prediction API")
|
|
print("=" * 60)
|
|
|
|
# Load model at startup
|
|
load_model()
|
|
|
|
print("\n📡 API Endpoints:")
|
|
print(" GET / - Prediction web interface")
|
|
print(" GET /api/model_info - Get model information")
|
|
print(" POST /predict_cardio - Make prediction")
|
|
print(" GET /health - Health check")
|
|
|
|
print(f"\n📊 Model status: {'Loaded' if model is not None else 'Not loaded'}")
|
|
|
|
if __name__ == '__main__':
|
|
# Initialize the app
|
|
init_app()
|
|
|
|
# Run the Flask app
|
|
print(f"\n🌍 Starting server on http://127.0.0.1:8080")
|
|
print(" Press Ctrl+C to stop\n")
|
|
|
|
app.run(
|
|
host='0.0.0.0',
|
|
port=8080,
|
|
debug=True,
|
|
threaded=True
|
|
)
|
|
else:
|
|
# For WSGI deployment
|
|
init_app() |