# Enhanced PatchTST Stock Classifier with 3-class Labels (SELL, HOLD, BUY) + Class Weight & K-Fold Cross-Validation import yfinance as yf import pandas as pd import numpy as np import talib import torch import torch.nn as nn import torch.nn.functional as F from sklearn.metrics import accuracy_score, classification_report, confusion_matrix from sklearn.preprocessing import StandardScaler from sklearn.utils.class_weight import compute_class_weight from sklearn.model_selection import StratifiedKFold from datasets import Dataset from transformers import ( PatchTSTConfig, PatchTSTForClassification, Trainer, TrainingArguments, EarlyStoppingCallback ) import matplotlib.pyplot as plt import seaborn as sns import warnings warnings.filterwarnings("ignore") # ===== CONFIG ===== CONTEXT_LENGTH = 48 HORIZON = 6 LABEL_COLUMN = "action" LABEL_NAMES = ['SELL', 'HOLD', 'BUY'] N_SPLITS = 5 # K-Fold Cross Validation from stable_baselines3 import PPO from stable_baselines3.common.vec_env import DummyVecEnv from gymnasium import Env, spaces class StockTradingEnv(Env): def __init__(self, features, labels, prices, buy_th=0.02, sell_th=0.02): super().__init__() self.features = features self.labels = labels self.prices = prices self.buy_th = buy_th self.sell_th = sell_th self.current_step = 0 self.action_space = spaces.Discrete(3) # 0=SELL, 1=HOLD, 2=BUY self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(features.shape[1],), dtype=np.float32) def reset(self, *, seed=None, options=None): super().reset(seed=seed) self.current_step = 0 return self.features[self.current_step], {} def step(self, action): price_now = self.prices[self.current_step] future_prices = self.prices[self.current_step+1:self.current_step+HORIZON+1] done = self.current_step >= len(self.features) - HORIZON - 1 reward = 0 if len(future_prices) > 0: future_max = np.max(future_prices) future_min = np.min(future_prices) max_gain = (future_max - price_now) / price_now max_loss = (price_now - future_min) / price_now if action == 2 and max_gain >= self.buy_th: reward = +1 elif action == 0 and max_loss >= self.sell_th: reward = +1 elif action == 1 and (max_gain < self.buy_th and max_loss < self.sell_th): reward = +0.5 # HOLD benar else: reward = -1 self.current_step += 1 next_obs = self.features[self.current_step] if not done else np.zeros_like(self.features[0]) return next_obs, reward, done, False, {} def train_ppo_on_patch_features(X_patch, y, price_series): env = DummyVecEnv([lambda: StockTradingEnv(X_patch, y, price_series)]) model = PPO("MlpPolicy", env, verbose=1) model.learn(total_timesteps=20000) return model class FocalLoss(nn.Module): def __init__(self, alpha=None, gamma=2.0, reduction='mean'): super().__init__() self.alpha = alpha self.gamma = gamma self.reduction = reduction def forward(self, logits, targets): ce_loss = F.cross_entropy(logits, targets, reduction='none', weight=self.alpha) pt = torch.exp(-ce_loss) loss = (1 - pt) ** self.gamma * ce_loss return loss.mean() if self.reduction == 'mean' else loss.sum() class CustomTrainer(Trainer): def __init__(self, *args, focal_alpha=None, focal_gamma=2.0, **kwargs): super().__init__(*args, **kwargs) self.focal_loss_fn = FocalLoss(alpha=focal_alpha, gamma=focal_gamma) def compute_loss(self, model, inputs, return_outputs=False): labels = inputs.pop("target_values").long() outputs = model(**inputs) logits = outputs.prediction_logits if self.focal_loss_fn.alpha is not None: self.focal_loss_fn.alpha = self.focal_loss_fn.alpha.to(logits.device) loss = self.focal_loss_fn(logits, labels) return (loss, outputs) if return_outputs else loss def plot_confusion_and_report(y_true, y_pred, labels): cm = confusion_matrix(y_true, y_pred) report = classification_report(y_true, y_pred, target_names=labels, output_dict=True) plt.figure(figsize=(6, 5)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels) plt.title("Confusion Matrix") plt.xlabel("Predicted") plt.ylabel("True") plt.tight_layout() plt.show() f1_scores = [report[label]['f1-score'] for label in labels] plt.figure(figsize=(6, 4)) sns.barplot(x=labels, y=f1_scores) plt.title("F1-Score per Class") plt.ylabel("F1 Score") plt.ylim(0, 1) plt.tight_layout() plt.show() def download_stock_data(path="bbca_1h.csv", use_yfinance=False): if use_yfinance: df = yf.download("BBCA.JK", interval="1d", start="2024-01-01", auto_adjust=True) # Cek jika multi-index, drop level atas if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.get_level_values(0) df = df.reset_index().rename(columns={"Date": "Date"}) # else: # df = pd.read_csv(path, parse_dates=["datetime"]) # df = df.rename(columns={ # "datetime": "Date", "open": "Open", "high": "High", # "low": "Low", "close": "Close", "volume": "Volume" # }) # df = df[df["symbol"] == "IDX:BBCA"].drop(columns=["symbol"]) return df.sort_values("Date").reset_index(drop=True) def extract_features(df): df['return'] = df['Close'].pct_change() df['daily_momentum'] = df['Close'] / df['Open'] df['range_efficiency'] = abs(df['Close'] - df['Open']) / (df['High'] - df['Low']).replace(0, np.nan) df['volume_momentum'] = df['Volume'] / df['Volume'].rolling(5).mean().replace(0, np.nan) df['adx'] = talib.ADX(df['High'], df['Low'], df['Close'], timeperiod=14) / 100.0 df['rsi_scaled'] = talib.RSI(df['Close'], timeperiod=14) / 100.0 return df.dropna().reset_index(drop=True) def create_labels(df, buy_th=0.02, sell_th=0.02, horizon=HORIZON, label_column='action'): labels = [] for i in range(len(df)): if i > len(df) - horizon - 1: labels.append(np.nan) continue base = df['Close'].iloc[i] future = df['Close'].iloc[i+1:i+1+horizon] max_gain = (np.max(future) - base) / base max_loss = (base - np.min(future)) / base if max_loss >= sell_th: labels.append(0) # SELL elif max_gain >= buy_th: labels.append(2) # BUY else: labels.append(1) # HOLD df[label_column] = labels return df def create_sliding_windows(df, features): X, y = [], [] for i in range(CONTEXT_LENGTH, len(df)): label = df[LABEL_COLUMN].iloc[i] if np.isnan(label): continue X.append(df[features].iloc[i - CONTEXT_LENGTH:i].values) y.append(label) return np.array(X), np.array(y) def compute_metrics(eval_pred): preds, labels = eval_pred preds = np.argmax(preds, axis=1) report = classification_report(labels, preds, target_names=LABEL_NAMES, output_dict=True) return { 'accuracy': accuracy_score(labels, preds), 'f1_macro': report['macro avg']['f1-score'], 'precision_macro': report['macro avg']['precision'], 'recall_macro': report['macro avg']['recall'] } def train_model_with_cv(df, feature_columns): scaler = StandardScaler() df[feature_columns] = scaler.fit_transform(df[feature_columns]) X, y = create_sliding_windows(df, feature_columns) skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42) for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)): print(f"\n==== Fold {fold+1}/{N_SPLITS} ====") train_dataset = Dataset.from_dict({"past_values": X[train_idx], "target_values": torch.tensor(y[train_idx], dtype=torch.long)}) val_dataset = Dataset.from_dict({"past_values": X[val_idx], "target_values": torch.tensor(y[val_idx], dtype=torch.long)}) model = PatchTSTForClassification(PatchTSTConfig( num_input_channels=len(feature_columns), num_targets=3, context_length=CONTEXT_LENGTH, patch_length=HORIZON, stride=HORIZON, embedding_dim=512, num_layers=4, num_heads=16, use_cls_token=True )) args = TrainingArguments( output_dir=f"./checkpoints/fold{fold}", evaluation_strategy="epoch", save_strategy="epoch", learning_rate=5e-5, per_device_train_batch_size=32, per_device_eval_batch_size=32, num_train_epochs=50, load_best_model_at_end=True, metric_for_best_model="accuracy", report_to="none", label_names=["target_values"], weight_decay=0.01, warmup_ratio=0.1, lr_scheduler_type="cosine" ) class_weights = compute_class_weight('balanced', classes=np.unique(y[train_idx]), y=y[train_idx]) trainer = CustomTrainer( model=model, args=args, train_dataset=train_dataset, eval_dataset=val_dataset, compute_metrics=compute_metrics, focal_alpha=torch.tensor(class_weights, dtype=torch.float32), focal_gamma=1.0, callbacks=[EarlyStoppingCallback(early_stopping_patience=5)] ) trainer.train() eval_result = trainer.evaluate() predictions = trainer.predict(val_dataset) y_pred = predictions.predictions.argmax(axis=1) y_true = predictions.label_ids # plot_confusion_and_report(y_true, y_pred, LABEL_NAMES) trainer.save_model(f"./checkpoints/fold{fold}/best_model") print("Eval result:", eval_result) def extract_patch_features(model, X): model.eval() features = [] with torch.no_grad(): for i in range(0, len(X), 64): # batch processing batch = torch.tensor(X[i:i+64], dtype=torch.float32) outputs = model.base_model( past_values=batch, output_hidden_states=True, return_dict=True ) # Ambil CLS token dari hidden state terakhir cls_tokens = outputs.hidden_states[-1][:, 0, :] # [batch_size, hidden_dim] features.append(cls_tokens) return torch.cat(features, dim=0).numpy() def evaluate_ppo_model(ppo_model, features, labels, prices, starting_balance=10000, verbose=True): env = StockTradingEnv(features, labels, prices) obs, _ = env.reset() done = False balance = starting_balance position = None # {'type': 'BUY'/'SELL', 'price': float} rewards = [] actions = [] price_history = [] while not done: action, _ = ppo_model.predict(obs, deterministic=True) actions.append(action) current_price = prices[env.current_step] # Execute trading logic (simplified) if action == 2: # BUY if position is None: position = {'type': 'BUY', 'price': current_price} elif action == 0: # SELL if position and position['type'] == 'BUY': # Sell and take profit/loss profit = (current_price - position['price']) / position['price'] balance *= (1 + profit) position = None # HOLD does nothing obs, reward, done, _, _ = env.step(action) rewards.append(reward) price_history.append(balance) final_reward = sum(rewards) roi = (balance - starting_balance) / starting_balance if verbose: print(f"Initial Capital: ${starting_balance:.2f}") print(f"Final Capital: ${balance:.2f}") print(f"Total ROI: {roi*100:.2f}%") print(f"Total Reward: {final_reward}") print(f"Total Actions: {len(actions)}") plt.figure(figsize=(10, 4)) plt.plot(price_history, label="Capital over Time") plt.title("PPO Trading Simulation") plt.ylabel("Capital ($)") plt.xlabel("Steps") plt.grid(True) plt.legend() plt.tight_layout() plt.show() return { "final_balance": balance, "roi": roi, "total_reward": final_reward, "actions": actions, "price_over_time": price_history, } def main(): df = download_stock_data(use_yfinance=True) # Change to False if using local CSV if df.empty: return print("No data!") df = extract_features(df) df = create_labels(df) print(df[LABEL_COLUMN].value_counts(normalize=True)) feature_cols = [col for col in df.columns if col not in ["Date", LABEL_COLUMN, "Open", "High", "Low", "Close", "Volume"]] train_model_with_cv(df, feature_cols) scaler = StandardScaler() df[feature_cols] = scaler.fit_transform(df[feature_cols]) X, y = create_sliding_windows(df, feature_cols) patch_model = PatchTSTForClassification.from_pretrained("./checkpoints/fold3/best_model") patch_model.eval() patch_features = extract_patch_features(patch_model, X) X_patch_final = patch_features[:, 0, :] # Ambil CLS token -> shape: (265, 128) y_aligned = y[:len(X_patch_final)] close_aligned = df["Close"].values[-len(X_patch_final):] print("patch_features:", patch_features.shape) print("X_patch_final:", X_patch_final.shape) print("y:", y_aligned.shape) print("close:", close_aligned.shape) ppo_model = train_ppo_on_patch_features(X_patch_final, y_aligned, close_aligned) ppo_model.save("ppo_stock_trading_model") ppo_model = PPO.load("ppo_stock_trading_model") results = evaluate_ppo_model( ppo_model, X_patch_final, y_aligned, close_aligned, starting_balance=2000 ) def predict(endDate: str, symbol="BBCA.JK", use_yfinance=True): from datetime import datetime, timedelta # 1. Load model patch_model = PatchTSTForClassification.from_pretrained("./checkpoints/fold3/best_model") ppo_model = PPO.load("ppo_stock_trading_model") # 2. Set end date dan ambil data cukup panjang ke belakang end_date = pd.to_datetime(endDate) start_date = end_date - timedelta(days=CONTEXT_LENGTH * 5) df = yf.download(symbol, start=start_date.strftime('%Y-%m-%d'), end=(end_date + timedelta(days=1)).strftime('%Y-%m-%d'), interval="1d", auto_adjust=True) if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.get_level_values(0) df = df.reset_index().rename(columns={"Date": "Date"}) df = df.sort_values("Date").reset_index(drop=True) if df.empty or len(df) < CONTEXT_LENGTH: return {"error": "Not enough data for prediction."} df = extract_features(df) # Cek apakah endDate tersedia dalam df if end_date not in df["Date"].values: return {"error": f"endDate {endDate} not found in data."} # Cari index endDate dalam df target_idx = df[df["Date"] == end_date].index[0] if target_idx < CONTEXT_LENGTH: return {"error": "Not enough candles before endDate."} # 3. Ambil window data sebelum endDate window_df = df.iloc[target_idx - CONTEXT_LENGTH:target_idx] feature_cols = [col for col in df.columns if col not in ["Date", "Open", "High", "Low", "Close", "Volume", LABEL_COLUMN]] # 4. Normalize dan ubah ke format tensor scaler = StandardScaler() X_window = scaler.fit_transform(window_df[feature_cols]) X_input = np.expand_dims(X_window, axis=0) # 5. Ekstrak fitur dari PatchTST (CLS token) patch_feature = extract_patch_features(patch_model, X_input)[:, 0, :] # shape: (1, hidden_dim) # 6. PPO prediction action, _ = ppo_model.predict(patch_feature[0], deterministic=True) action_label = LABEL_NAMES[action] close_price = float(df.loc[target_idx, "Close"]) return { "symbol": symbol, "clicked_date": end_date.strftime("%Y-%m-%d"), "action_code": int(action), "action_label": action_label, "close_price": close_price } # if __name__ == "__main__": # result = predict("2024-07-01", symbol="BBCA.JK") # print(result)