import streamlit as st import os import pickle import pandas as pd import shap import matplotlib.pyplot as plt from catboost import Pool import numpy as np import mysql.connector import io import json import streamlit.components.v1 as components import time import json reg_model = pickle.load(open('regression_model_final_1year.sav', 'rb')) class_model = pickle.load(open('clasification_final_model_smote.sav', 'rb')) train_file_path = 'X_train.csv' with open("feature_explanation.json", "r") as f: feature_dict = json.load(f) def connect_to_db(): try: conn = mysql.connector.connect( host=st.secrets["mysql"]["host"], user=st.secrets["mysql"]["user"], password=st.secrets["mysql"]["password"], database=st.secrets["mysql"]["dbname"], port=st.secrets["mysql"]["port"] ) return conn except mysql.connector.Error as e: st.error(f"Koneksi ke database gagal: {e}") return None def get_employee_data_from_db(employee_id): conn = connect_to_db() if conn: try: cursor = conn.cursor(dictionary=True) query = "SELECT * FROM data_employee_db WHERE employee_id = %s" cursor.execute(query, (employee_id,)) result = cursor.fetchone() return result except mysql.connector.Error as e: st.error(f"Terjadi kesalahan saat mengakses database: {e}") return None finally: conn.close() def process_employee_data(df): numeric_columns = [ "job_satisfaction", "performance_rating", "absent_90D", "income", "dependant", "avg_time_work", "total_komp" ] for col in numeric_columns: if col in df.columns: df[col] = pd.to_numeric(df[col], errors="coerce") end_date = pd.to_datetime("2024-10-31") df["date_of_birth"] = pd.to_datetime(df["date_of_birth"], errors='coerce') df["age_years"] = (end_date - df["date_of_birth"]).dt.days // 365 df["join_date"] = pd.to_datetime(df["join_date"]) df["resign_date"] = pd.to_datetime(df["resign_date"]) df["resign_date"].fillna(end_date, inplace=True) df["total_komp"].fillna(0, inplace=True) df["absent_90D"].fillna(0, inplace=True) df["active_work"] = (df["resign_date"] - df["join_date"]).dt.days df["active_work_months"] = df["active_work"] // 30 df["income_3_months"] = df["income"] * 3 df["income_6_months"] = df["income"] * 6 df["total_income_work"] = df["income"] * df["active_work_months"] df["income_dependant_ratio"] = df["income"] / (df["dependant"] + 1) df["work_efficiency"] = df["avg_time_work"] / 8 job_satisfaction_mapping = {1.0: 'Low', 2.0: 'Medium', 3.0: 'High', 4.0: 'Very High'} df['job_satisfaction'] = df['job_satisfaction'].map(job_satisfaction_mapping) performance_rating_mapping = {1.0: 'Low', 2.0: 'Good', 3.0: 'Excellent', 4.0: 'Outstanding'} df['performance_rating'] = df['performance_rating'].map(performance_rating_mapping) return df # Fungsi untuk mendapatkan gambar sebagai base64 def get_image_as_base64(image_path): import base64 with open(image_path, "rb") as img_file: return base64.b64encode(img_file.read()).decode("utf-8") def navbar(): logo_path = os.path.join(os.path.dirname(__file__), "../asset/logo.png") st.markdown( f""" """, unsafe_allow_html=True) col1, col2 = st.columns([10, 1]) with col1: st.markdown(f""" ", unsafe_allow_html=True) def menu(): if "page" not in st.session_state: st.session_state["page"] = "Home" # **Gunakan Streamlit Columns agar Navbar Sejajar (3 Kolom)** col2, col3, col4 = st.columns([1.5, 1.5, 1.5]) # 3 Kolom (Tanpa col5) # **Custom CSS untuk Tombol Navbar yang Spesifik** st.markdown( """ """, unsafe_allow_html=True ) # Tombol navigasi dengan warna berbeda di col2, col3, col4 with col2: if st.button("Prediksi", key="nav_prediksi"): st.switch_page("pages/Prediksi.py") # Pindah ke halaman yang sudah ada with col3: if st.button("Dashboard", key="nav_dashboard"): st.switch_page("pages/Dashboard.py") # Pindah ke halaman yang sesuai with col4: if st.button("Laporan", key="nav_laporan"): st.switch_page("pages/Laporan.py") # Pindah ke halaman laporan def save_prediction_to_db(employee_id, hasil_prediksi_klasifikasi, probabilitas_pred_klasifikasi, hasil_prediksi_regresi): conn = connect_to_db() if conn: try: cursor = conn.cursor() query = """ INSERT INTO history_prediction (employee_id, hasil_prediksi_klasifikasi, probabilitas_pred_klasifikasi, hasil_prediksi_regresi) VALUES (%s, %s, %s, %s) """ cursor.execute(query, (employee_id, hasil_prediksi_klasifikasi, probabilitas_pred_klasifikasi, hasil_prediksi_regresi)) conn.commit() # Pastikan perubahan disimpan except mysql.connector.Error as e: st.error(f"Terjadi kesalahan saat menyimpan ke database: {e}") finally: conn.close() def save_shap_to_db_with_features(employee_id, shap_dict): conn = connect_to_db() if conn: try: shap_values_json = json.dumps(shap_dict) # Query untuk menyimpan data ke database query = """ INSERT INTO shap_pred_result (employee_id, shap_values) VALUES (%s, %s) """ cursor = conn.cursor() cursor.execute(query, (employee_id, shap_values_json)) conn.commit() except mysql.connector.Error as e: st.error(f"Terjadi kesalahan saat menyimpan SHAP values: {e}") finally: conn.close() def show_prediction(): navbar() menu() st.markdown(""" """, unsafe_allow_html=True) st.markdown( """

Masukkan ID Karyawan dan Lihat Hasilnya

""", unsafe_allow_html=True ) employee_id = st.text_input("Masukkan ID Karyawan yang ingin dicek", placeholder="Contoh: EM12345") # Tombol untuk memulai prediksi if st.button("Lihat Hasil Prediksi"): if not employee_id: st.error("Harap masukkan ID Karyawan terlebih dahulu.") return # Ambil data karyawan dari database employee_data = get_employee_data_from_db(employee_id) if employee_data is None: st.error("ID Karyawan tidak ditemukan. Harap masukkan ID yang valid.") return # Proses data karyawan df = pd.DataFrame([employee_data]) df = process_employee_data(df) expected_columns_class = class_model.feature_names_ expected_columns_reg = reg_model.feature_names_ # Kolom kategori cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating', 'education', 'jenis_kelamin'] X_test_class = df[expected_columns_class] X_test_reg = df[expected_columns_reg] for col in cat_feature: if col in X_test_class.columns: X_test_class[col] = X_test_class[col].astype(str) if col in X_test_reg.columns: X_test_reg[col] = X_test_reg[col].astype(str) # Pool untuk data uji test_pool_class = Pool(data=X_test_class, cat_features=cat_feature) test_pool_reg = Pool(data=X_test_reg, cat_features=cat_feature) # Prediksi classification_prob = class_model.predict_proba(test_pool_class) regression_result = reg_model.predict(test_pool_reg) # Ambil hasil prediksi predicted_class = 1 if classification_prob[0][1] > 0.5 else 0 hasil_prediksi_retensi = 'Tidak Retensi' if predicted_class == 1 else 'Retensi' probabilitas_pred_retensi = classification_prob[0][1] * 100 # Dalam persen hasil_prediksi_regresi = round(regression_result[0], 2) warna_retensi = "green" if hasil_prediksi_retensi == "Retensi" else "red" # Tampilkan hasil dalam kotak dengan warna st.markdown( f"""

Prediksi Kemungkinan Retensi: {hasil_prediksi_retensi}

Probabilitas Kemungkinan Retensi: {classification_prob[0][0]:.2f}

Prediksi Durasi Kerja (bulan): {hasil_prediksi_regresi} bulan

""", unsafe_allow_html=True ) # Simpan hasil ke database save_prediction_to_db(employee_id, hasil_prediksi_retensi, probabilitas_pred_retensi, hasil_prediksi_regresi) df_train = pd.read_csv(train_file_path) background_data = df_train.sample(n=min(len(df_train), 50), random_state=42) # TreeExplainer untuk model klasifikasi explainer_class = shap.TreeExplainer(class_model, feature_perturbation="tree_path_dependent") shap_values_class = explainer_class.shap_values(X_test_class) # Pastikan SHAP values valid if isinstance(shap_values_class, list) and len(shap_values_class) > 1: try: if predicted_class == 1: shap_values = shap_values_class[1][0] # Ambil nilai SHAP untuk class 1 else: shap_values = shap_values_class[0][0] # Ambil nilai SHAP untuk class 0 except IndexError: st.error("SHAP values list index out of range.") return elif not isinstance(shap_values_class, list): shap_values = shap_values_class[0] # Single-class output else: st.error("SHAP values tidak valid.") return # Konversi SHAP values ke bentuk list shap_values_list = shap_values.flatten() # Rata array SHAP values menjadi 1D feature_names = list(X_test_class.columns) # Ambil semua nama fitur # Validasi panjang SHAP values dan nama fitur if len(feature_names) != len(shap_values_list): st.error( f"Jumlah fitur ({len(feature_names)}) tidak sesuai dengan jumlah SHAP values ({len(shap_values_list)})." ) return # Stop eksekusi jika tidak sesuai shap_dict = {feature: shap_values_list[i] for i, feature in enumerate(feature_names)} save_shap_to_db_with_features(employee_id, shap_dict) plot_placeholder = st.empty() def generate_shap_plot(X_test_class, explainer_class, shap_dict, predicted_class): plt.close('all') try: shap_explanation = explainer_class(X_test_class.iloc[0:1]) plt.figure(figsize=(4, 2), dpi=100) if isinstance(shap_explanation, list): shap.plots.waterfall(shap_explanation[1][0] if predicted_class == 1 and len(shap_explanation) > 1 else shap_explanation[0][0]) else: shap.plots.waterfall(shap_explanation[0]) buf = io.BytesIO() plt.savefig(buf, format='png', bbox_inches="tight", dpi=100) buf.seek(0) col1, col2 = st.columns([1.5, 2]) with col1: st.image(buf, caption="SHAP Waterfall Plot", use_container_width=True) with col2: top_factors = sorted(shap_dict.items(), key=lambda x: abs(x[1]), reverse=True)[:5] summary_list = [ f"• {feature_dict.get(factor, factor)}: {X_test_class.iloc[0][factor]}" if factor in X_test_class.columns else f"• {feature_dict.get(factor, factor)}" for factor, _ in top_factors ] summary_text = "
".join(summary_list) st.markdown( f"""

Faktor Utama yang Mempengaruhi Prediksi:

{summary_text}

""", unsafe_allow_html=True ) plt.close() except Exception as e: st.error(f"Error generating SHAP plot: {str(e)}") plt.close() generate_shap_plot(X_test_class, explainer_class, shap_dict, predicted_class) st.markdown( """ """, unsafe_allow_html=True ) if __name__ == "__main__": show_prediction()