sad_test01/aicodes/module2_predictor/app.py

#!/usr/bin/env python3
"""
CardioAI - Cardiovascular Disease Prediction API
Flask application for real-time cardiovascular disease prediction.
"""

from flask import Flask, request, jsonify, render_template
import pandas as pd
import numpy as np
import joblib
import traceback
from datetime import datetime
import os

# Initialize Flask app
app = Flask(__name__)

# Constants
MODEL_PATH = os.path.join(os.path.dirname(__file__), 'cardio_predictor_model.pkl')

# Global variables for model and metadata
model = None
model_metadata = None
expected_features = None

# Expected feature names for input validation
EXPECTED_FEATURES = [
    'age_years',    # Age in years
    'gender',       # Gender (1: female, 2: male)
    'height',       # Height in cm
    'weight',       # Weight in kg
    'ap_hi',        # Systolic blood pressure
    'ap_lo',        # Diastolic blood pressure
    'cholesterol',  # Cholesterol level (1: normal, 2: above normal, 3: well above normal)
    'gluc',         # Glucose level (1: normal, 2: above normal, 3: well above normal)
    'smoke',        # Smoking (0: no, 1: yes)
    'alco',         # Alcohol intake (0: no, 1: yes)
    'active'        # Physical activity (0: no, 1: yes)
]

# Feature ranges for validation (min, max)
FEATURE_RANGES = {
    'age_years': (20, 100),
    'gender': (1, 2),
    'height': (100, 250),  # cm
    'weight': (30, 200),   # kg
    'ap_hi': (90, 250),    # systolic blood pressure
    'ap_lo': (60, 150),    # diastolic blood pressure
    'cholesterol': (1, 3),
    'gluc': (1, 3),
    'smoke': (0, 1),
    'alco': (0, 1),
    'active': (0, 1),
    'bmi': (10, 50)        # BMI range
}

def load_model():
    """
    Load the trained model pipeline from disk.
    """
    global model, model_metadata, expected_features

    try:
        if os.path.exists(MODEL_PATH):
            print(f"📦 Loading model from: {MODEL_PATH}")
            model = joblib.load(MODEL_PATH)
            print("✅ Model loaded successfully!")

            # Try to extract expected features from model
            try:
                # Get feature names from the preprocessor
                preprocessor = model.named_steps['preprocessor']
                num_features = preprocessor.transformers_[0][2]  # Numeric features
                cat_features = preprocessor.transformers_[1][2]  # Categorical features
                expected_features = list(num_features) + list(cat_features)
                print(f"📋 Model expects features: {expected_features}")
            except Exception as e:
                print(f"⚠️  Could not extract feature names from model: {e}")
                expected_features = EXPECTED_FEATURES

            # Load metadata if exists
            metadata_path = MODEL_PATH.replace('.pkl', '_metadata.json')
            if os.path.exists(metadata_path):
                import json
                with open(metadata_path, 'r') as f:
                    model_metadata = json.load(f)
                print(f"📋 Model metadata loaded: {model_metadata.get('description', 'N/A')}")
            else:
                model_metadata = {'description': 'CardioAI Prediction Model'}

        else:
            print(f"❌ Model file not found at: {MODEL_PATH}")
            print("⚠️  Please run train_and_save.py first to train and save the model.")
            model = None

    except Exception as e:
        print(f"❌ Error loading model: {e}")
        traceback.print_exc()
        model = None

def validate_input_features(input_data):
    """
    Validate input features for prediction.
    Returns (is_valid, error_message, validated_data)
    """
    try:
        # Convert input to dictionary if it's not already
        if isinstance(input_data, dict):
            data = input_data
        else:
            data = dict(input_data)

        # Check for missing features
        missing_features = [feat for feat in EXPECTED_FEATURES if feat not in data]
        if missing_features:
            return False, f"Missing features: {missing_features}", None

        # Check for extra features
        extra_features = [feat for feat in data.keys() if feat not in EXPECTED_FEATURES]
        if extra_features:
            print(f"⚠️  Warning: Extra features provided: {extra_features}")

        # Validate each feature
        validated_data = {}
        for feature in EXPECTED_FEATURES:
            value = data[feature]

            # Convert to appropriate type
            try:
                if feature in ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']:
                    # These should be integers
                    validated_data[feature] = int(float(value))
                else:
                    # These should be floats
                    validated_data[feature] = float(value)
            except (ValueError, TypeError):
                return False, f"Invalid value for {feature}: {value} (should be numeric)", None

            # Check value ranges
            if feature in FEATURE_RANGES:
                min_val, max_val = FEATURE_RANGES[feature]
                if not (min_val <= validated_data[feature] <= max_val):
                    return False, f"{feature} value {validated_data[feature]} out of range ({min_val}-{max_val})", None

        # Calculate BMI if height and weight are available
        if 'height' in validated_data and 'weight' in validated_data:
            height_m = validated_data['height'] / 100  # convert cm to meters
            bmi_value = validated_data['weight'] / (height_m ** 2)
            bmi_value = round(bmi_value, 2)
            validated_data['bmi'] = bmi_value

            # Validate BMI range
            if 'bmi' in FEATURE_RANGES:
                min_val, max_val = FEATURE_RANGES['bmi']
                if not (min_val <= bmi_value <= max_val):
                    return False, f"Calculated BMI value {bmi_value} out of range ({min_val}-{max_val})", None

        return True, "Validation passed", validated_data

    except Exception as e:
        return False, f"Validation error: {str(e)}", None

def preprocess_input(input_data):
    """
    Convert validated input data to DataFrame for model prediction.
    """
    try:
        # Create DataFrame with a single row
        df = pd.DataFrame([input_data])

        # Determine which features the model expects
        # Use expected_features from loaded model, or fallback to EXPECTED_FEATURES + bmi
        model_features = expected_features if expected_features is not None else EXPECTED_FEATURES + ['bmi'] if 'bmi' not in EXPECTED_FEATURES else EXPECTED_FEATURES

        # Ensure all expected features are present in the DataFrame
        # Add missing features with default values (should not happen if validation passed)
        for feature in model_features:
            if feature not in df.columns:
                print(f"⚠️  Warning: Feature '{feature}' not in input data, adding default value")
                if feature == 'bmi' and 'height' in df.columns and 'weight' in df.columns:
                    # Calculate BMI if missing but height and weight are available
                    height_m = df['height'].iloc[0] / 100
                    df['bmi'] = df['weight'].iloc[0] / (height_m ** 2)
                else:
                    # Add default value (0 for numeric, 1 for categorical)
                    if feature in ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']:
                        df[feature] = 1
                    else:
                        df[feature] = 0.0

        # Ensure correct column order as expected by the model
        df = df[model_features]

        return df

    except Exception as e:
        print(f"❌ Error preprocessing input: {e}")
        raise

@app.route('/')
def home():
    """
    Home page - serve the prediction interface.
    """
    return render_template('index.html')

@app.route('/api/model_info', methods=['GET'])
def model_info():
    """
    API endpoint to get model information.
    """
    try:
        if model is None:
            load_model()

        if model is None:
            return jsonify({
                'status': 'error',
                'message': 'Model not loaded. Please run training script first.'
            }), 503

        info = {
            'status': 'success',
            'model': {
                'description': model_metadata.get('description', 'CardioAI Prediction Model'),
                'version': model_metadata.get('model_version', '1.0'),
                'creation_date': model_metadata.get('creation_date', 'N/A'),
                'features': expected_features if expected_features else EXPECTED_FEATURES,
                'feature_ranges': FEATURE_RANGES
            }
        }

        return jsonify(info)

    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': f'Error getting model info: {str(e)}'
        }), 500

@app.route('/predict_cardio', methods=['POST'])
def predict_cardio():
    """
    API endpoint for cardiovascular disease prediction.
    Accepts JSON POST request with 11 feature values.
    Returns prediction result and probability.
    """
    global model

    try:
        # Load model if not already loaded
        if model is None:
            load_model()

        if model is None:
            return jsonify({
                'status': 'error',
                'message': 'Model not available. Please run training script first.'
            }), 503

        # Get input data
        if request.is_json:
            input_data = request.get_json()
        else:
            # Try form data
            input_data = request.form.to_dict()

        if not input_data:
            return jsonify({
                'status': 'error',
                'message': 'No input data provided. Please provide 11 feature values.'
            }), 400

        # Validate input
        is_valid, error_msg, validated_data = validate_input_features(input_data)

        if not is_valid:
            return jsonify({
                'status': 'error',
                'message': error_msg,
                'expected_features': EXPECTED_FEATURES,
                'feature_ranges': FEATURE_RANGES
            }), 400

        # Preprocess input
        input_df = preprocess_input(validated_data)

        # Make prediction
        try:
            # Get probability prediction
            probability = model.predict_proba(input_df)[0]

            # Get class prediction (0: no disease, 1: disease)
            prediction = model.predict(input_df)[0]

            # Calculate risk level based on probability
            risk_probability = float(probability[1])  # Probability of class 1 (disease)
            risk_percentage = risk_probability * 100

            if risk_probability < 0.3:
                risk_level = "Low"
                recommendation = "Continue healthy lifestyle."
            elif risk_probability < 0.7:
                risk_level = "Medium"
                recommendation = "Consider regular health check-ups."
            else:
                risk_level = "High"
                recommendation = "Consult a healthcare professional."

            # Prepare response
            response = {
                'status': 'success',
                'prediction': int(prediction),
                'probability': risk_probability,
                'probability_percentage': risk_percentage,
                'risk_level': risk_level,
                'recommendation': recommendation,
                'input_features': validated_data,
                'timestamp': datetime.now().isoformat()
            }

            print(f"✅ Prediction made: {prediction}, Probability: {risk_probability:.4f}")

            return jsonify(response)

        except Exception as e:
            print(f"❌ Prediction error: {e}")
            traceback.print_exc()
            return jsonify({
                'status': 'error',
                'message': f'Prediction error: {str(e)}'
            }), 500

    except Exception as e:
        print(f"❌ API error: {e}")
        traceback.print_exc()
        return jsonify({
            'status': 'error',
            'message': f'Internal server error: {str(e)}'
        }), 500

@app.route('/health', methods=['GET'])
def health_check():
    """
    Health check endpoint.
    """
    try:
        model_status = 'loaded' if model is not None else 'not_loaded'

        health_info = {
            'status': 'healthy',
            'service': 'CardioAI Prediction API',
            'model_status': model_status,
            'timestamp': datetime.now().isoformat()
        }

        return jsonify(health_info)

    except Exception as e:
        return jsonify({
            'status': 'unhealthy',
            'error': str(e)
        }), 500

@app.errorhandler(404)
def not_found(error):
    return jsonify({
        'status': 'error',
        'message': 'Endpoint not found'
    }), 404

@app.errorhandler(405)
def method_not_allowed(error):
    return jsonify({
        'status': 'error',
        'message': 'Method not allowed'
    }), 405

@app.errorhandler(500)
def internal_error(error):
    return jsonify({
        'status': 'error',
        'message': 'Internal server error'
    }), 500

def init_app():
    """
    Initialize the Flask application.
    """
    print("=" * 60)
    print("🚀 CardioAI - Cardiovascular Disease Prediction API")
    print("=" * 60)

    # Load model at startup
    load_model()

    print("\n📡 API Endpoints:")
    print("   GET  /                 - Prediction web interface")
    print("   GET  /api/model_info   - Get model information")
    print("   POST /predict_cardio   - Make prediction")
    print("   GET  /health           - Health check")

    print(f"\n📊 Model status: {'Loaded' if model is not None else 'Not loaded'}")

if __name__ == '__main__':
    # Initialize the app
    init_app()

    # Run the Flask app
    print(f"\n🌍 Starting server on http://127.0.0.1:8080")
    print("   Press Ctrl+C to stop\n")

    app.run(
        host='0.0.0.0',
        port=8080,
        debug=True,
        threaded=True
    )
else:
    # For WSGI deployment
    init_app()