# -*- coding: utf-8 -*-
"""
StockTeller Predictor 📊

This script provides an end-to-end system for stock price prediction and analysis
using real-time stock market data via the Yahoo Finance API (yfinance). It includes
data preprocessing, model training (Linear Regression, Random Forest, and SVR),
future price prediction, performance evaluation, alerts on predicted movements,
and optional export of metrics.

Modules Used:
- yfinance: for fetching stock data
- sklearn: for ML model training and evaluation
- pandas, numpy: for data manipulation
- matplotlib, seaborn: for data visualization
- datetime: for handling future prediction dates

Workflow:
1. Fetch stock data from Yahoo Finance for the last 60 days.
2. Preprocess the data and set up training/testing sets.
3. Train multiple ML models.
4. Predict stock prices for the next 5 days.
5. Evaluate model performance using R², MAE, and RMSE.
6. Optionally export results to CSV/Excel.
7. Alert if rise/drop/stable is expected.
8. Visualize predictions using matplotlib.

Author: Sneha Jha
"""

import yfinance as yf  # Yahoo Finance API for real-time financial data
import pandas as pd  # For data manipulation and handling
import numpy as np  # For numerical operations
from sklearn.linear_model import LinearRegression  # Linear Regression model
from sklearn.ensemble import RandomForestRegressor  # Random Forest model
from sklearn.svm import SVR  # Support Vector Regressor
from sklearn.metrics import (
    r2_score,
    mean_absolute_error,
    mean_squared_error,
)  # Model evaluation metrics
import matplotlib.pyplot as plt  # For plotting results
import seaborn as sns  # Optional: for advanced visualizations
import os  # For file path operations
from datetime import datetime, timedelta  # For handling future dates
from sklearn.model_selection import train_test_split  # For splitting dataset


# 1. Fetch Real-Time Data
def fetch_realtime_data(stock_symbol, period="60d"):
    print(f"\U0001f5d5️ Fetching real-time data for {stock_symbol}...")
    df = yf.download(
        stock_symbol, period=period, interval="1d", progress=False
    )  # Download daily data
    df = df.reset_index()  # Reset index to bring 'Date' as a column
    return df


# 2. Preprocess Data
def preprocess_data(df):
    df = df.copy()
    df["Date"] = pd.to_datetime(df["Date"])  # Ensure 'Date' column is datetime
    df.sort_values("Date", inplace=True)  # Sort by date
    df.dropna(inplace=True)  # Remove missing values

    df["Target"] = df["Close"].shift(
        -1
    )  # Set the next day's closing price as the prediction target
    df.dropna(inplace=True)

    features = [
        "Open",
        "High",
        "Low",
        "Close",
        "Volume",
    ]  # Selected features for prediction
    X = df[features].values  # Features as input
    y = df["Target"].values  # Target as output

    # Split data into training and testing sets (80-20 split)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    return df, X_train, X_test, y_train, y_test


# 3. Train Multiple Models
def train_models(X_train, y_train):
    # Dictionary of models to train
    models = {
        "Linear Regression": LinearRegression(),
        "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
        "SVR": SVR(kernel="rbf"),
    }

    # Train each model
    for name, model in models.items():
        model.fit(X_train, y_train.ravel())
    return models


# 4. Predict Future Prices
def predict_future_prices(df, models, days_ahead=5):
    # Use the last row of features as the starting input
    last_known = (
        df[["Open", "High", "Low", "Close", "Volume"]].values[-1].reshape(1, -1)
    )

    predictions = {name: [] for name in models}  # Store predictions for each model
    future_dates = []  # Store future dates for plotting

    current_input = last_known.copy()
    for i in range(days_ahead):
        for name, model in models.items():
            predicted = model.predict(current_input)[0]  # Predict next day
            predictions[name].append(predicted)

        # Use predicted close as the next day's OHLC input
        next_day = current_input.flatten()
        next_day[0] = next_day[1] = next_day[2] = next_day[3] = predicted
        current_input = next_day.reshape(1, -1)

        # Generate the next date
        next_date = df["Date"].iloc[-1] + timedelta(days=i + 1)
        future_dates.append(next_date)

    return future_dates, predictions


# 5. Evaluate Models
def evaluate_models(models, X_test, y_test):
    scores = []
    for name, model in models.items():
        y_pred = model.predict(X_test)  # Predict using test set
        r2 = r2_score(y_test, y_pred)  # R² Score
        mae = mean_absolute_error(y_test, y_pred)  # Mean Absolute Error
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))  # Root Mean Square Error

        # Add scores to the list
        scores.append(
            {
                "Model": name,
                "R2 Score": round(r2, 3),
                "MAE": round(mae, 3),
                "RMSE": round(rmse, 3),
            }
        )

    return pd.DataFrame(scores)


# 6. Export Metrics Table
def export_metrics_table(df_metrics):
    choice = (
        input(
            "\n\U0001f4c2 Do you want to export the model comparison table?\nType 'csv' for CSV, 'excel' for Excel, or 'no' to skip: "
        )
        .strip()
        .lower()
    )

    if choice == "csv":
        filename = (
            input("Enter filename (default: model_metrics.csv): ").strip()
            or "model_metrics.csv"
        )
        if not filename.endswith(".csv"):
            filename += ".csv"
        df_metrics.to_csv(filename, index=False)
        print(f"✅ CSV exported to: {os.path.abspath(filename)}")

    elif choice == "excel":
        filename = (
            input("Enter filename (default: model_metrics.xlsx): ").strip()
            or "model_metrics.xlsx"
        )
        if not filename.endswith(".xlsx"):
            filename += ".xlsx"
        df_metrics.to_excel(filename, index=False)
        print(f"✅ Excel file exported to: {os.path.abspath(filename)}")

    else:
        print("⏭️ Skipped exporting.")


# 7. Alert on Stock Movement
def alert_stock_movement(df, predictions):
    print("\n\U0001f514 Stock Alerts:")
    for model, values in predictions.items():
        change = (
            values[-1] - df["Close"].iloc[-1]
        )  # Compare last prediction to last actual close
        if isinstance(change, np.ndarray):
            change = change.item()

        # Define movement thresholds
        direction = (
            "\U0001f4c8 Rise Expected"
            if change > 1
            else "\U0001f4c9 Drop Expected" if change < -1 else "\U0001f500 Stable"
        )
        print(f"{model}: {direction} ({change:.2f} change)")


# 8. Visualize Predictions
def visualize_predictions(df, future_dates, predictions):
    plt.figure(figsize=(10, 5))
    for model, values in predictions.items():
        plt.plot(future_dates, values, label=model)  # Plot each model’s prediction line

    last_close = df["Close"].iloc[-1]
    if isinstance(last_close, pd.Series):
        last_close = last_close.iloc[0]

    plt.axhline(
        y=last_close, color="gray", linestyle="--", label="Last Close Price"
    )  # Reference line
    plt.title("\U0001f4ca Future Stock Price Predictions")
    plt.xlabel("Date")
    plt.ylabel("Predicted Close Price")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()