# Enhanced PatchTST Stock Classifier with 3-class Labels (SELL, HOLD, BUY) + Class Weight & K-Fold Cross-Validation

import yfinance as yf
import pandas as pd
import numpy as np
import talib
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import StratifiedKFold
from datasets import Dataset
from transformers import (
    PatchTSTConfig, PatchTSTForClassification,
    Trainer, TrainingArguments, EarlyStoppingCallback
)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

# ===== CONFIG =====
CONTEXT_LENGTH = 48
HORIZON = 6
LABEL_COLUMN = "action"
LABEL_NAMES = ['SELL', 'HOLD', 'BUY']
N_SPLITS = 5  # K-Fold Cross Validation


from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from gymnasium import Env, spaces


class StockTradingEnv(Env):
    def __init__(self, features, labels, prices, buy_th=0.02, sell_th=0.02):
        super().__init__()
        self.features = features
        self.labels = labels
        self.prices = prices
        self.buy_th = buy_th
        self.sell_th = sell_th
        self.current_step = 0
        self.action_space = spaces.Discrete(3)  # 0=SELL, 1=HOLD, 2=BUY
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(features.shape[1],), dtype=np.float32)

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        return self.features[self.current_step], {}


    def step(self, action):
        price_now = self.prices[self.current_step]
        future_prices = self.prices[self.current_step+1:self.current_step+HORIZON+1]
        done = self.current_step >= len(self.features) - HORIZON - 1

        reward = 0
        if len(future_prices) > 0:
            future_max = np.max(future_prices)
            future_min = np.min(future_prices)
            max_gain = (future_max - price_now) / price_now
            max_loss = (price_now - future_min) / price_now

            if action == 2 and max_gain >= self.buy_th:
                reward = +1
            elif action == 0 and max_loss >= self.sell_th:
                reward = +1
            elif action == 1 and (max_gain < self.buy_th and max_loss < self.sell_th):
                reward = +0.5  # HOLD benar
            else:
                reward = -1

        self.current_step += 1
        next_obs = self.features[self.current_step] if not done else np.zeros_like(self.features[0])
        return next_obs, reward, done, False, {}

def train_ppo_on_patch_features(X_patch, y, price_series):
    env = DummyVecEnv([lambda: StockTradingEnv(X_patch, y, price_series)])
    model = PPO("MlpPolicy", env, verbose=1)
    model.learn(total_timesteps=20000)
    return model


class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, targets):
        ce_loss = F.cross_entropy(logits, targets, reduction='none', weight=self.alpha)
        pt = torch.exp(-ce_loss)
        loss = (1 - pt) ** self.gamma * ce_loss
        return loss.mean() if self.reduction == 'mean' else loss.sum()

class CustomTrainer(Trainer):
    def __init__(self, *args, focal_alpha=None, focal_gamma=2.0, **kwargs):
        super().__init__(*args, **kwargs)
        self.focal_loss_fn = FocalLoss(alpha=focal_alpha, gamma=focal_gamma)

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("target_values").long()
        outputs = model(**inputs)
        logits = outputs.prediction_logits
        if self.focal_loss_fn.alpha is not None:
            self.focal_loss_fn.alpha = self.focal_loss_fn.alpha.to(logits.device)
        loss = self.focal_loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss

def plot_confusion_and_report(y_true, y_pred, labels):
    cm = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred, target_names=labels, output_dict=True)

    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.show()

    f1_scores = [report[label]['f1-score'] for label in labels]
    plt.figure(figsize=(6, 4))
    sns.barplot(x=labels, y=f1_scores)
    plt.title("F1-Score per Class")
    plt.ylabel("F1 Score")
    plt.ylim(0, 1)
    plt.tight_layout()
    plt.show()

def download_stock_data(path="bbca_1h.csv", use_yfinance=False):
    if use_yfinance:
        df = yf.download("BBCA.JK", interval="1d", start="2024-01-01", auto_adjust=True)
        # Cek jika multi-index, drop level atas
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.get_level_values(0)
        df = df.reset_index().rename(columns={"Date": "Date"})
    # else:
        # df = pd.read_csv(path, parse_dates=["datetime"])
        # df = df.rename(columns={
        #     "datetime": "Date", "open": "Open", "high": "High",
        #     "low": "Low", "close": "Close", "volume": "Volume"
        # })
        # df = df[df["symbol"] == "IDX:BBCA"].drop(columns=["symbol"])

    return df.sort_values("Date").reset_index(drop=True)

def extract_features(df):
    df['return'] = df['Close'].pct_change()
    df['daily_momentum'] = df['Close'] / df['Open']
    df['range_efficiency'] = abs(df['Close'] - df['Open']) / (df['High'] - df['Low']).replace(0, np.nan)
    df['volume_momentum'] = df['Volume'] / df['Volume'].rolling(5).mean().replace(0, np.nan)
    df['adx'] = talib.ADX(df['High'], df['Low'], df['Close'], timeperiod=14) / 100.0
    df['rsi_scaled'] = talib.RSI(df['Close'], timeperiod=14) / 100.0
    return df.dropna().reset_index(drop=True)

def create_labels(df, buy_th=0.02, sell_th=0.02, horizon=HORIZON, label_column='action'):
    labels = []
    for i in range(len(df)):
        if i > len(df) - horizon - 1:
            labels.append(np.nan)
            continue
        base = df['Close'].iloc[i]
        future = df['Close'].iloc[i+1:i+1+horizon]
        max_gain = (np.max(future) - base) / base
        max_loss = (base - np.min(future)) / base
        if max_loss >= sell_th:
            labels.append(0)  # SELL
        elif max_gain >= buy_th:
            labels.append(2)  # BUY
        else:
            labels.append(1)  # HOLD
    df[label_column] = labels
    return df

def create_sliding_windows(df, features):
    X, y = [], []
    for i in range(CONTEXT_LENGTH, len(df)):
        label = df[LABEL_COLUMN].iloc[i]
        if np.isnan(label): continue
        X.append(df[features].iloc[i - CONTEXT_LENGTH:i].values)
        y.append(label)
    return np.array(X), np.array(y)

def compute_metrics(eval_pred):
    preds, labels = eval_pred
    preds = np.argmax(preds, axis=1)
    report = classification_report(labels, preds, target_names=LABEL_NAMES, output_dict=True)
    return {
        'accuracy': accuracy_score(labels, preds),
        'f1_macro': report['macro avg']['f1-score'],
        'precision_macro': report['macro avg']['precision'],
        'recall_macro': report['macro avg']['recall']
    }

def train_model_with_cv(df, feature_columns):
    scaler = StandardScaler()
    df[feature_columns] = scaler.fit_transform(df[feature_columns])
    X, y = create_sliding_windows(df, feature_columns)
    skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        print(f"\n==== Fold {fold+1}/{N_SPLITS} ====")

        train_dataset = Dataset.from_dict({"past_values": X[train_idx], "target_values": torch.tensor(y[train_idx], dtype=torch.long)})
        val_dataset = Dataset.from_dict({"past_values": X[val_idx], "target_values": torch.tensor(y[val_idx], dtype=torch.long)})

        model = PatchTSTForClassification(PatchTSTConfig(
            num_input_channels=len(feature_columns),
            num_targets=3,
            context_length=CONTEXT_LENGTH,
            patch_length=HORIZON,
            stride=HORIZON,
            embedding_dim=512,
            num_layers=4,
            num_heads=16,
            use_cls_token=True
        ))

        args = TrainingArguments(
            output_dir=f"./checkpoints/fold{fold}",
            evaluation_strategy="epoch",
            save_strategy="epoch",
            learning_rate=5e-5,
            per_device_train_batch_size=32,
            per_device_eval_batch_size=32,
            num_train_epochs=50,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            report_to="none",
            label_names=["target_values"],
            weight_decay=0.01,
            warmup_ratio=0.1,
            lr_scheduler_type="cosine"
        )

        class_weights = compute_class_weight('balanced', classes=np.unique(y[train_idx]), y=y[train_idx])

        trainer = CustomTrainer(
            model=model,
            args=args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics,
            focal_alpha=torch.tensor(class_weights, dtype=torch.float32),
            focal_gamma=1.0,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
        )

        trainer.train()
        eval_result = trainer.evaluate()
        predictions = trainer.predict(val_dataset)
        y_pred = predictions.predictions.argmax(axis=1)
        y_true = predictions.label_ids
        # plot_confusion_and_report(y_true, y_pred, LABEL_NAMES)
        trainer.save_model(f"./checkpoints/fold{fold}/best_model")
        print("Eval result:", eval_result)

def extract_patch_features(model, X):
    model.eval()
    features = []
    with torch.no_grad():
        for i in range(0, len(X), 64):  # batch processing
            batch = torch.tensor(X[i:i+64], dtype=torch.float32)
            outputs = model.base_model(
                past_values=batch,
                output_hidden_states=True,
                return_dict=True
            )
            # Ambil CLS token dari hidden state terakhir
            cls_tokens = outputs.hidden_states[-1][:, 0, :]  # [batch_size, hidden_dim]
            features.append(cls_tokens)
    return torch.cat(features, dim=0).numpy()

def evaluate_ppo_model(ppo_model, features, labels, prices, starting_balance=10000, verbose=True):
    env = StockTradingEnv(features, labels, prices)
    obs, _ = env.reset()
    done = False
    balance = starting_balance
    position = None  # {'type': 'BUY'/'SELL', 'price': float}
    rewards = []
    actions = []
    price_history = []

    while not done:
        action, _ = ppo_model.predict(obs, deterministic=True)
        actions.append(action)
        current_price = prices[env.current_step]

        # Execute trading logic (simplified)
        if action == 2:  # BUY
            if position is None:
                position = {'type': 'BUY', 'price': current_price}
        elif action == 0:  # SELL
            if position and position['type'] == 'BUY':
                # Sell and take profit/loss
                profit = (current_price - position['price']) / position['price']
                balance *= (1 + profit)
                position = None
        # HOLD does nothing

        obs, reward, done, _, _ = env.step(action)
        rewards.append(reward)
        price_history.append(balance)

    final_reward = sum(rewards)
    roi = (balance - starting_balance) / starting_balance

    if verbose:
        print(f"Initial Capital: ${starting_balance:.2f}")
        print(f"Final Capital:   ${balance:.2f}")
        print(f"Total ROI:       {roi*100:.2f}%")
        print(f"Total Reward:    {final_reward}")
        print(f"Total Actions:   {len(actions)}")

        plt.figure(figsize=(10, 4))
        plt.plot(price_history, label="Capital over Time")
        plt.title("PPO Trading Simulation")
        plt.ylabel("Capital ($)")
        plt.xlabel("Steps")
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()

    return {
        "final_balance": balance,
        "roi": roi,
        "total_reward": final_reward,
        "actions": actions,
        "price_over_time": price_history,
    }


def main():
    df = download_stock_data(use_yfinance=True)  # Change to False if using local CSV
    if df.empty: return print("No data!")
    df = extract_features(df)
    df = create_labels(df)
    print(df[LABEL_COLUMN].value_counts(normalize=True))

    feature_cols = [col for col in df.columns if col not in ["Date", LABEL_COLUMN, "Open", "High", "Low", "Close", "Volume"]]
    train_model_with_cv(df, feature_cols)

    scaler = StandardScaler()
    df[feature_cols] = scaler.fit_transform(df[feature_cols])
    X, y = create_sliding_windows(df, feature_cols)

    patch_model = PatchTSTForClassification.from_pretrained("./checkpoints/fold3/best_model")
    patch_model.eval()

    patch_features = extract_patch_features(patch_model, X)
    X_patch_final = patch_features[:, 0, :]  # Ambil CLS token -> shape: (265, 128)
    y_aligned = y[:len(X_patch_final)]
    close_aligned = df["Close"].values[-len(X_patch_final):]
    print("patch_features:", patch_features.shape)
    print("X_patch_final:", X_patch_final.shape)
    print("y:", y_aligned.shape)
    print("close:", close_aligned.shape)
    ppo_model = train_ppo_on_patch_features(X_patch_final, y_aligned, close_aligned)

    ppo_model.save("ppo_stock_trading_model")
    ppo_model = PPO.load("ppo_stock_trading_model")
    results = evaluate_ppo_model(
    ppo_model,
    X_patch_final,
    y_aligned,
    close_aligned,
    starting_balance=2000
)


def predict(endDate: str, symbol="BBCA.JK", use_yfinance=True):
    from datetime import datetime, timedelta

    # 1. Load model
    patch_model = PatchTSTForClassification.from_pretrained("./checkpoints/fold3/best_model")
    ppo_model = PPO.load("ppo_stock_trading_model")

    # 2. Set end date dan ambil data cukup panjang ke belakang
    end_date = pd.to_datetime(endDate)
    start_date = end_date - timedelta(days=CONTEXT_LENGTH * 5)

    df = yf.download(symbol, start=start_date.strftime('%Y-%m-%d'),
                     end=(end_date + timedelta(days=1)).strftime('%Y-%m-%d'),
                     interval="1d", auto_adjust=True)

    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)

    df = df.reset_index().rename(columns={"Date": "Date"})
    df = df.sort_values("Date").reset_index(drop=True)

    if df.empty or len(df) < CONTEXT_LENGTH:
        return {"error": "Not enough data for prediction."}

    df = extract_features(df)

    # Cek apakah endDate tersedia dalam df
    if end_date not in df["Date"].values:
        return {"error": f"endDate {endDate} not found in data."}

    # Cari index endDate dalam df
    target_idx = df[df["Date"] == end_date].index[0]

    if target_idx < CONTEXT_LENGTH:
        return {"error": "Not enough candles before endDate."}

    # 3. Ambil window data sebelum endDate
    window_df = df.iloc[target_idx - CONTEXT_LENGTH:target_idx]
    feature_cols = [col for col in df.columns if col not in ["Date", "Open", "High", "Low", "Close", "Volume", LABEL_COLUMN]]

    # 4. Normalize dan ubah ke format tensor
    scaler = StandardScaler()
    X_window = scaler.fit_transform(window_df[feature_cols])
    X_input = np.expand_dims(X_window, axis=0)

    # 5. Ekstrak fitur dari PatchTST (CLS token)
    patch_feature = extract_patch_features(patch_model, X_input)[:, 0, :]  # shape: (1, hidden_dim)

    # 6. PPO prediction
    action, _ = ppo_model.predict(patch_feature[0], deterministic=True)
    action_label = LABEL_NAMES[action]

    close_price = float(df.loc[target_idx, "Close"])

    return {
        "symbol": symbol,
        "clicked_date": end_date.strftime("%Y-%m-%d"),
        "action_code": int(action),
        "action_label": action_label,
        "close_price": close_price
    }


# if __name__ == "__main__":
#     result = predict("2024-07-01", symbol="BBCA.JK")
#     print(result)