first commit

2025-07-11 09:05:19 +07:00
parent 1506c2d72c
commit 572cd4c572
22 changed files with 4533 additions and 2226 deletions
--- a/clasification_final_model_smote.sav
+++ b/clasification_final_model_smote.sav
--- a/model/clasification_final_model_smote.sav
+++ b/model/clasification_final_model_smote.sav
--- a/model/clasification_model_smote.sav
+++ b/model/clasification_model_smote.sav
--- a/model/regression_model_1year.sav
+++ b/model/regression_model_1year.sav
--- a/model/regression_model_final_1year.sav
+++ b/model/regression_model_final_1year.sav
--- a/notebook/01_exploratory_data.ipynb
+++ b/notebook/01_exploratory_data.ipynb
@ -2,7 +2,33 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-0.2470000000000001"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "values = [-0.005, 0.003, -0.014, 0.003, -0.002, -0.014, -0.014, -0.005, -0.014, -0.014,\n",
+    "          -0.005, -0.005, -0.014, -0.014, -0.014, -0.014, -0.014, -0.014, -0.014, -0.014,\n",
+    "          -0.014, -0.014, -0.014, -0.014, 0.009, -0.002]\n",
+    "\n",
+    "# Calculate the total\n",
+    "total_sum = sum(values)\n",
+    "total_sum"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@ -199,7 +225,7 @@
       "4             1  "
      ]
     },
-     "execution_count": 20,
+     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -3264,7 +3290,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.3"
+   "version": "3.10.1"
  }
 },
 "nbformat": 4,
--- a/notebook/03_model_klasifikasi.ipynb
+++ b/notebook/03_model_klasifikasi.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@ -221,7 +221,7 @@
       "[5 rows x 33 columns]"
      ]
     },
-     "execution_count": 1,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -234,7 +234,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.drop(columns=['active_work_category', 'work_stability_score', \n",
+    "                      'position_score', 'job_income_position_score',\n",
+    "                      'education_score', 'education_income_ratio',\n",
+    "                      'weighted_satisfaction_performance'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@ -243,7 +255,7 @@
       "12288"
      ]
     },
-     "execution_count": 2,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -254,7 +266,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@ -266,7 +278,7 @@
       "Name: count, dtype: int64"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -277,7 +289,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -292,6 +304,261 @@
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['domisili', 'jenis_kelamin', 'marriage_stat', 'dependant', 'education',\n",
+       "       'absent_90D', 'avg_time_work', 'departemen', 'position', 'income',\n",
+       "       'total_komp', 'job_satisfaction', 'performance_rating', 'age_years',\n",
+       "       'active_work', 'income_3_months', 'income_6_months',\n",
+       "       'total_income_work', 'income_dependant_ratio', 'work_efficiency'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>domisili</th>\n",
+       "      <th>jenis_kelamin</th>\n",
+       "      <th>marriage_stat</th>\n",
+       "      <th>dependant</th>\n",
+       "      <th>education</th>\n",
+       "      <th>absent_90D</th>\n",
+       "      <th>avg_time_work</th>\n",
+       "      <th>departemen</th>\n",
+       "      <th>position</th>\n",
+       "      <th>income</th>\n",
+       "      <th>...</th>\n",
+       "      <th>total_income_work</th>\n",
+       "      <th>income_dependant_ratio</th>\n",
+       "      <th>work_efficiency</th>\n",
+       "      <th>active_work_category</th>\n",
+       "      <th>work_stability_score</th>\n",
+       "      <th>position_score</th>\n",
+       "      <th>job_income_position_score</th>\n",
+       "      <th>education_score</th>\n",
+       "      <th>education_income_ratio</th>\n",
+       "      <th>weighted_satisfaction_performance</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Kota Jakarta Timur</td>\n",
+       "      <td>Perempuan</td>\n",
+       "      <td>Single</td>\n",
+       "      <td>0</td>\n",
+       "      <td>D2</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>9.02</td>\n",
+       "      <td>Corporate Strategy &amp; Communications</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>3.100943e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4.341320e+07</td>\n",
+       "      <td>3.100943e+06</td>\n",
+       "      <td>1.12750</td>\n",
+       "      <td>Mid-term</td>\n",
+       "      <td>2.800000</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.100943e+06</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1.033648e+06</td>\n",
+       "      <td>1.8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Tangerang</td>\n",
+       "      <td>Laki-laki</td>\n",
+       "      <td>Single</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>9.80</td>\n",
+       "      <td>Engineering &amp; IT</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>1.146038e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.489849e+07</td>\n",
+       "      <td>1.146038e+06</td>\n",
+       "      <td>1.22500</td>\n",
+       "      <td>Mid-term</td>\n",
+       "      <td>4.333333</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.146038e+06</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.146038e+06</td>\n",
+       "      <td>2.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Kabupaten Bekasi</td>\n",
+       "      <td>Laki-laki</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>2</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>9.45</td>\n",
+       "      <td>Creative &amp; Design</td>\n",
+       "      <td>Manager</td>\n",
+       "      <td>8.013796e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.003449e+08</td>\n",
+       "      <td>2.671265e+06</td>\n",
+       "      <td>1.18125</td>\n",
+       "      <td>Mid-term</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2.003449e+06</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8.013796e+06</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Kabupaten Bekasi</td>\n",
+       "      <td>Laki-laki</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>3</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>9.76</td>\n",
+       "      <td>Marketing</td>\n",
+       "      <td>Manager</td>\n",
+       "      <td>1.015002e+07</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.537505e+08</td>\n",
+       "      <td>2.537505e+06</td>\n",
+       "      <td>1.22000</td>\n",
+       "      <td>Mid-term</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2.537505e+06</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.015002e+07</td>\n",
+       "      <td>4.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Kota Jakarta Pusat</td>\n",
+       "      <td>Laki-laki</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>1</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>9.46</td>\n",
+       "      <td>Operations</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>2.548043e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.312456e+07</td>\n",
+       "      <td>1.274022e+06</td>\n",
+       "      <td>1.18250</td>\n",
+       "      <td>Mid-term</td>\n",
+       "      <td>1.444444</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.548043e+06</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.548043e+06</td>\n",
+       "      <td>1.8</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             domisili jenis_kelamin marriage_stat  dependant education  \\\n",
+       "0  Kota Jakarta Timur     Perempuan        Single          0        D2   \n",
+       "1           Tangerang     Laki-laki        Single          0      SLTA   \n",
+       "2    Kabupaten Bekasi     Laki-laki       Married          2      SLTA   \n",
+       "3    Kabupaten Bekasi     Laki-laki       Married          3      SLTA   \n",
+       "4  Kota Jakarta Pusat     Laki-laki       Married          1      SLTA   \n",
+       "\n",
+       "   absent_90D  avg_time_work                           departemen position  \\\n",
+       "0         4.0           9.02  Corporate Strategy & Communications    Staff   \n",
+       "1         2.0           9.80                     Engineering & IT    Staff   \n",
+       "2         0.0           9.45                    Creative & Design  Manager   \n",
+       "3         0.0           9.76                            Marketing  Manager   \n",
+       "4         8.0           9.46                           Operations    Staff   \n",
+       "\n",
+       "         income  ...  total_income_work income_dependant_ratio  \\\n",
+       "0  3.100943e+06  ...       4.341320e+07           3.100943e+06   \n",
+       "1  1.146038e+06  ...       1.489849e+07           1.146038e+06   \n",
+       "2  8.013796e+06  ...       2.003449e+08           2.671265e+06   \n",
+       "3  1.015002e+07  ...       2.537505e+08           2.537505e+06   \n",
+       "4  2.548043e+06  ...       3.312456e+07           1.274022e+06   \n",
+       "\n",
+       "  work_efficiency  active_work_category  work_stability_score  position_score  \\\n",
+       "0         1.12750              Mid-term              2.800000               1   \n",
+       "1         1.22500              Mid-term              4.333333               1   \n",
+       "2         1.18125              Mid-term             25.000000               4   \n",
+       "3         1.22000              Mid-term             25.000000               4   \n",
+       "4         1.18250              Mid-term              1.444444               1   \n",
+       "\n",
+       "   job_income_position_score  education_score  education_income_ratio  \\\n",
+       "0               3.100943e+06                3            1.033648e+06   \n",
+       "1               1.146038e+06                1            1.146038e+06   \n",
+       "2               2.003449e+06                1            8.013796e+06   \n",
+       "3               2.537505e+06                1            1.015002e+07   \n",
+       "4               2.548043e+06                1            2.548043e+06   \n",
+       "\n",
+       "   weighted_satisfaction_performance  \n",
+       "0                                1.8  \n",
+       "1                                2.6  \n",
+       "2                                3.0  \n",
+       "3                                4.0  \n",
+       "4                                1.8  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.head()"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 4,
--- a/notebook/04_model_smote.ipynb
+++ b/notebook/04_model_smote.ipynb
@ -232,6 +232,18 @@
    "df.head()"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.drop(columns=['active_work_category', 'work_stability_score', \n",
+    "                      'position_score', 'job_income_position_score',\n",
+    "                      'education_score', 'education_income_ratio',\n",
+    "                      'weighted_satisfaction_performance'])"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
@ -319,18 +331,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating',\n",
-    "               'education', 'active_work_category', 'jenis_kelamin']\n",
+    "               'education', 'jenis_kelamin']\n",
    "\n",
-    "X = df.drop(columns=['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', \n",
-    "                     'active_work_months', 'position_score', 'job_income_position_score',\n",
-    "                     'education_score', 'education_income_ratio'])\n",
+    "X = df.drop(columns=['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', 'active_work_months'])\n",
    "y = df['churn_status']\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
@ -434,7 +444,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@ -456,7 +466,7 @@
    "\n",
    "cat_indices = [X.columns.get_loc(col) for col in cat_feature]\n",
    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
+    "# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
    "\n",
    "sm = SMOTENC(categorical_features=cat_indices, random_state=42)\n",
    "X_train_res, y_train_res = sm.fit_resample(X_train, y_train)\n",
@ -467,7 +477,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@ -534,7 +544,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@ -568,7 +578,9 @@
       "      <th>departemen</th>\n",
       "      <th>position</th>\n",
       "      <th>income</th>\n",
-       "      <th>...</th>\n",
+       "      <th>total_komp</th>\n",
+       "      <th>job_satisfaction</th>\n",
+       "      <th>performance_rating</th>\n",
       "      <th>age_years</th>\n",
       "      <th>active_work</th>\n",
       "      <th>income_3_months</th>\n",
@ -576,9 +588,6 @@
       "      <th>total_income_work</th>\n",
       "      <th>income_dependant_ratio</th>\n",
       "      <th>work_efficiency</th>\n",
-       "      <th>active_work_category</th>\n",
-       "      <th>work_stability_score</th>\n",
-       "      <th>weighted_satisfaction_performance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
@ -594,7 +603,9 @@
       "      <td>Corporate Strategy &amp; Communications</td>\n",
       "      <td>Staff</td>\n",
       "      <td>3.100943e+06</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Low</td>\n",
+       "      <td>Excellent</td>\n",
       "      <td>25</td>\n",
       "      <td>429</td>\n",
       "      <td>9.302829e+06</td>\n",
@ -602,9 +613,6 @@
       "      <td>4.341320e+07</td>\n",
       "      <td>3.100943e+06</td>\n",
       "      <td>1.12750</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>2.800000</td>\n",
-       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
@ -618,7 +626,9 @@
       "      <td>Engineering &amp; IT</td>\n",
       "      <td>Staff</td>\n",
       "      <td>1.146038e+06</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>High</td>\n",
+       "      <td>Good</td>\n",
       "      <td>26</td>\n",
       "      <td>410</td>\n",
       "      <td>3.438114e+06</td>\n",
@ -626,9 +636,6 @@
       "      <td>1.489849e+07</td>\n",
       "      <td>1.146038e+06</td>\n",
       "      <td>1.22500</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>4.333333</td>\n",
-       "      <td>2.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
@ -642,7 +649,9 @@
       "      <td>Creative &amp; Design</td>\n",
       "      <td>Manager</td>\n",
       "      <td>8.013796e+06</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>High</td>\n",
+       "      <td>Excellent</td>\n",
       "      <td>43</td>\n",
       "      <td>776</td>\n",
       "      <td>2.404139e+07</td>\n",
@ -650,9 +659,6 @@
       "      <td>2.003449e+08</td>\n",
       "      <td>2.671265e+06</td>\n",
       "      <td>1.18125</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>25.000000</td>\n",
-       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
@ -666,7 +672,9 @@
       "      <td>Marketing</td>\n",
       "      <td>Manager</td>\n",
       "      <td>1.015002e+07</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Very High</td>\n",
+       "      <td>Outstanding</td>\n",
       "      <td>43</td>\n",
       "      <td>778</td>\n",
       "      <td>3.045007e+07</td>\n",
@ -674,9 +682,6 @@
       "      <td>2.537505e+08</td>\n",
       "      <td>2.537505e+06</td>\n",
       "      <td>1.22000</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>25.000000</td>\n",
-       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
@ -690,7 +695,9 @@
       "      <td>Operations</td>\n",
       "      <td>Staff</td>\n",
       "      <td>2.548043e+06</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Low</td>\n",
+       "      <td>Excellent</td>\n",
       "      <td>36</td>\n",
       "      <td>405</td>\n",
       "      <td>7.644129e+06</td>\n",
@ -698,98 +705,257 @@
       "      <td>3.312456e+07</td>\n",
       "      <td>1.274022e+06</td>\n",
       "      <td>1.18250</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>1.444444</td>\n",
-       "      <td>1.8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12283</th>\n",
+       "      <td>Kabupaten Bogor</td>\n",
+       "      <td>Perempuan</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>9.40</td>\n",
+       "      <td>HR</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>1.092339e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>High</td>\n",
+       "      <td>Excellent</td>\n",
+       "      <td>40</td>\n",
+       "      <td>832</td>\n",
+       "      <td>3.277017e+06</td>\n",
+       "      <td>6.554034e+06</td>\n",
+       "      <td>2.949315e+07</td>\n",
+       "      <td>1.092339e+06</td>\n",
+       "      <td>1.17500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12284</th>\n",
+       "      <td>Kota Jakarta Barat</td>\n",
+       "      <td>Laki-laki</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>2</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>9.11</td>\n",
+       "      <td>Corporate Strategy &amp; Communications</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>1.175199e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Medium</td>\n",
+       "      <td>Good</td>\n",
+       "      <td>32</td>\n",
+       "      <td>408</td>\n",
+       "      <td>3.525597e+06</td>\n",
+       "      <td>7.051194e+06</td>\n",
+       "      <td>1.527759e+07</td>\n",
+       "      <td>3.917330e+05</td>\n",
+       "      <td>1.13875</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12285</th>\n",
+       "      <td>Tangerang</td>\n",
+       "      <td>Laki-laki</td>\n",
+       "      <td>Single</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>9.82</td>\n",
+       "      <td>HR</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>1.479552e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Medium</td>\n",
+       "      <td>Good</td>\n",
+       "      <td>24</td>\n",
+       "      <td>539</td>\n",
+       "      <td>4.438656e+06</td>\n",
+       "      <td>8.877312e+06</td>\n",
+       "      <td>2.515238e+07</td>\n",
+       "      <td>1.479552e+06</td>\n",
+       "      <td>1.22750</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12286</th>\n",
+       "      <td>Tangerang</td>\n",
+       "      <td>Perempuan</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>1</td>\n",
+       "      <td>D1</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>9.17</td>\n",
+       "      <td>Finance &amp; Accounting</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>4.655009e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Low</td>\n",
+       "      <td>Low</td>\n",
+       "      <td>26</td>\n",
+       "      <td>918</td>\n",
+       "      <td>1.396503e+07</td>\n",
+       "      <td>2.793005e+07</td>\n",
+       "      <td>1.396503e+08</td>\n",
+       "      <td>2.327504e+06</td>\n",
+       "      <td>1.14625</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12287</th>\n",
+       "      <td>Tangerang</td>\n",
+       "      <td>Perempuan</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>2</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>9.37</td>\n",
+       "      <td>Creative &amp; Design</td>\n",
+       "      <td>Senior</td>\n",
+       "      <td>6.400201e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Very High</td>\n",
+       "      <td>Excellent</td>\n",
+       "      <td>47</td>\n",
+       "      <td>559</td>\n",
+       "      <td>1.920060e+07</td>\n",
+       "      <td>3.840121e+07</td>\n",
+       "      <td>1.152036e+08</td>\n",
+       "      <td>2.133400e+06</td>\n",
+       "      <td>1.17125</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
-       "<p>5 rows × 23 columns</p>\n",
+       "<p>12288 rows × 20 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
-       "             domisili jenis_kelamin marriage_stat  dependant education  \\\n",
-       "0  Kota Jakarta Timur     Perempuan        Single          0        D2   \n",
-       "1           Tangerang     Laki-laki        Single          0      SLTA   \n",
-       "2    Kabupaten Bekasi     Laki-laki       Married          2      SLTA   \n",
-       "3    Kabupaten Bekasi     Laki-laki       Married          3      SLTA   \n",
-       "4  Kota Jakarta Pusat     Laki-laki       Married          1      SLTA   \n",
+       "                 domisili jenis_kelamin marriage_stat  dependant education  \\\n",
+       "0      Kota Jakarta Timur     Perempuan        Single          0        D2   \n",
+       "1               Tangerang     Laki-laki        Single          0      SLTA   \n",
+       "2        Kabupaten Bekasi     Laki-laki       Married          2      SLTA   \n",
+       "3        Kabupaten Bekasi     Laki-laki       Married          3      SLTA   \n",
+       "4      Kota Jakarta Pusat     Laki-laki       Married          1      SLTA   \n",
+       "...                   ...           ...           ...        ...       ...   \n",
+       "12283     Kabupaten Bogor     Perempuan       Married          0      SLTA   \n",
+       "12284  Kota Jakarta Barat     Laki-laki       Married          2      SLTA   \n",
+       "12285           Tangerang     Laki-laki        Single          0      SLTA   \n",
+       "12286           Tangerang     Perempuan       Married          1        D1   \n",
+       "12287           Tangerang     Perempuan       Married          2      SLTA   \n",
       "\n",
-       "   absent_90D  avg_time_work                           departemen position  \\\n",
-       "0         4.0           9.02  Corporate Strategy & Communications    Staff   \n",
-       "1         2.0           9.80                     Engineering & IT    Staff   \n",
-       "2         0.0           9.45                    Creative & Design  Manager   \n",
-       "3         0.0           9.76                            Marketing  Manager   \n",
-       "4         8.0           9.46                           Operations    Staff   \n",
+       "       absent_90D  avg_time_work                           departemen  \\\n",
+       "0             4.0           9.02  Corporate Strategy & Communications   \n",
+       "1             2.0           9.80                     Engineering & IT   \n",
+       "2             0.0           9.45                    Creative & Design   \n",
+       "3             0.0           9.76                            Marketing   \n",
+       "4             8.0           9.46                           Operations   \n",
+       "...           ...            ...                                  ...   \n",
+       "12283         4.0           9.40                                   HR   \n",
+       "12284         3.0           9.11  Corporate Strategy & Communications   \n",
+       "12285         0.0           9.82                                   HR   \n",
+       "12286         5.0           9.17                 Finance & Accounting   \n",
+       "12287         7.0           9.37                    Creative & Design   \n",
       "\n",
-       "         income  ...  age_years active_work income_3_months  income_6_months  \\\n",
-       "0  3.100943e+06  ...         25         429    9.302829e+06     1.860566e+07   \n",
-       "1  1.146038e+06  ...         26         410    3.438114e+06     6.876228e+06   \n",
-       "2  8.013796e+06  ...         43         776    2.404139e+07     4.808278e+07   \n",
-       "3  1.015002e+07  ...         43         778    3.045007e+07     6.090013e+07   \n",
-       "4  2.548043e+06  ...         36         405    7.644129e+06     1.528826e+07   \n",
+       "      position        income  total_komp job_satisfaction performance_rating  \\\n",
+       "0        Staff  3.100943e+06         0.0              Low          Excellent   \n",
+       "1        Staff  1.146038e+06         0.0             High               Good   \n",
+       "2      Manager  8.013796e+06         0.0             High          Excellent   \n",
+       "3      Manager  1.015002e+07         0.0        Very High        Outstanding   \n",
+       "4        Staff  2.548043e+06         0.0              Low          Excellent   \n",
+       "...        ...           ...         ...              ...                ...   \n",
+       "12283    Staff  1.092339e+06         0.0             High          Excellent   \n",
+       "12284    Staff  1.175199e+06         0.0           Medium               Good   \n",
+       "12285    Staff  1.479552e+06         0.0           Medium               Good   \n",
+       "12286    Staff  4.655009e+06         0.0              Low                Low   \n",
+       "12287   Senior  6.400201e+06         0.0        Very High          Excellent   \n",
       "\n",
-       "   total_income_work  income_dependant_ratio  work_efficiency  \\\n",
-       "0       4.341320e+07            3.100943e+06          1.12750   \n",
-       "1       1.489849e+07            1.146038e+06          1.22500   \n",
-       "2       2.003449e+08            2.671265e+06          1.18125   \n",
-       "3       2.537505e+08            2.537505e+06          1.22000   \n",
-       "4       3.312456e+07            1.274022e+06          1.18250   \n",
+       "       age_years  active_work  income_3_months  income_6_months  \\\n",
+       "0             25          429     9.302829e+06     1.860566e+07   \n",
+       "1             26          410     3.438114e+06     6.876228e+06   \n",
+       "2             43          776     2.404139e+07     4.808278e+07   \n",
+       "3             43          778     3.045007e+07     6.090013e+07   \n",
+       "4             36          405     7.644129e+06     1.528826e+07   \n",
+       "...          ...          ...              ...              ...   \n",
+       "12283         40          832     3.277017e+06     6.554034e+06   \n",
+       "12284         32          408     3.525597e+06     7.051194e+06   \n",
+       "12285         24          539     4.438656e+06     8.877312e+06   \n",
+       "12286         26          918     1.396503e+07     2.793005e+07   \n",
+       "12287         47          559     1.920060e+07     3.840121e+07   \n",
       "\n",
-       "   active_work_category  work_stability_score  \\\n",
-       "0              Mid-term              2.800000   \n",
-       "1              Mid-term              4.333333   \n",
-       "2              Mid-term             25.000000   \n",
-       "3              Mid-term             25.000000   \n",
-       "4              Mid-term              1.444444   \n",
+       "       total_income_work  income_dependant_ratio  work_efficiency  \n",
+       "0           4.341320e+07            3.100943e+06          1.12750  \n",
+       "1           1.489849e+07            1.146038e+06          1.22500  \n",
+       "2           2.003449e+08            2.671265e+06          1.18125  \n",
+       "3           2.537505e+08            2.537505e+06          1.22000  \n",
+       "4           3.312456e+07            1.274022e+06          1.18250  \n",
+       "...                  ...                     ...              ...  \n",
+       "12283       2.949315e+07            1.092339e+06          1.17500  \n",
+       "12284       1.527759e+07            3.917330e+05          1.13875  \n",
+       "12285       2.515238e+07            1.479552e+06          1.22750  \n",
+       "12286       1.396503e+08            2.327504e+06          1.14625  \n",
+       "12287       1.152036e+08            2.133400e+06          1.17125  \n",
       "\n",
-       "   weighted_satisfaction_performance  \n",
-       "0                                1.8  \n",
-       "1                                2.6  \n",
-       "2                                3.0  \n",
-       "3                                4.0  \n",
-       "4                                1.8  \n",
-       "\n",
-       "[5 rows x 23 columns]"
+       "[12288 rows x 20 columns]"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "X.head()"
+    "X"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0:\ttest: 0.9289412\tbest: 0.9289412 (0)\ttotal: 209ms\tremaining: 3m 28s\n",
-      "200:\ttest: 0.9741746\tbest: 0.9742290 (183)\ttotal: 18.6s\tremaining: 1m 13s\n",
-      "400:\ttest: 0.9759996\tbest: 0.9759996 (400)\ttotal: 46.4s\tremaining: 1m 9s\n",
-      "600:\ttest: 0.9764617\tbest: 0.9765179 (576)\ttotal: 1m 8s\tremaining: 45.2s\n",
-      "800:\ttest: 0.9769282\tbest: 0.9769282 (800)\ttotal: 1m 27s\tremaining: 21.8s\n",
-      "999:\ttest: 0.9772636\tbest: 0.9772636 (999)\ttotal: 1m 55s\tremaining: 0us\n",
+      "0:\ttest: 0.9522383\tbest: 0.9522383 (0)\ttotal: 371ms\tremaining: 6m 10s\n",
+      "200:\ttest: 0.9749033\tbest: 0.9749426 (188)\ttotal: 15.7s\tremaining: 1m 2s\n",
+      "400:\ttest: 0.9751700\tbest: 0.9753029 (270)\ttotal: 32.3s\tremaining: 48.2s\n",
+      "600:\ttest: 0.9757035\tbest: 0.9757052 (599)\ttotal: 47.2s\tremaining: 31.3s\n",
+      "800:\ttest: 0.9760228\tbest: 0.9760585 (762)\ttotal: 1m 2s\tremaining: 15.5s\n",
+      "999:\ttest: 0.9761958\tbest: 0.9762119 (990)\ttotal: 1m 17s\tremaining: 0us\n",
      "\n",
-      "bestTest = 0.9772635842\n",
-      "bestIteration = 999\n",
-      "\n"
+      "bestTest = 0.9762119056\n",
+      "bestIteration = 990\n",
+      "\n",
+      "Shrink model to first 991 iterations.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
-       "<catboost.core.CatBoostClassifier at 0x23afa652260>"
+       "<catboost.core.CatBoostClassifier at 0x1cb3665ba30>"
      ]
     },
-     "execution_count": 14,
+     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -815,7 +981,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@ -864,34 +1030,34 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "[I 2025-03-20 15:00:41,602] A new study created in memory with name: no-name-25d292cb-21f5-4296-b4f8-60550f9419f3\n",
-      "[I 2025-03-20 15:02:01,024] Trial 0 finished with value: 0.9732495439604305 and parameters: {'iterations': 885, 'learning_rate': 0.014037075669208683, 'depth': 6, 'subsample': 0.7347642926574603, 'colsample_bylevel': 0.628056031091488, 'l2_leaf_reg': 18.77314488035107, 'random_strength': 7.563973089971057}. Best is trial 0 with value: 0.9732495439604305.\n",
-      "[I 2025-03-20 15:03:17,085] Trial 1 finished with value: 0.9711497549205443 and parameters: {'iterations': 708, 'learning_rate': 0.012363987021937311, 'depth': 6, 'subsample': 0.5278155628504972, 'colsample_bylevel': 0.5923839431004807, 'l2_leaf_reg': 13.672040817303856, 'random_strength': 5.047595540352549}. Best is trial 0 with value: 0.9732495439604305.\n",
-      "[I 2025-03-20 15:04:48,051] Trial 2 finished with value: 0.9648459277561962 and parameters: {'iterations': 993, 'learning_rate': 0.001016519352919797, 'depth': 6, 'subsample': 0.7618891207779287, 'colsample_bylevel': 0.7088194762415717, 'l2_leaf_reg': 15.900968017238345, 'random_strength': 8.689884293115323}. Best is trial 0 with value: 0.9732495439604305.\n",
-      "[I 2025-03-20 15:05:54,013] Trial 3 finished with value: 0.9774063056111822 and parameters: {'iterations': 667, 'learning_rate': 0.053616906724399574, 'depth': 6, 'subsample': 0.5295054888915093, 'colsample_bylevel': 0.5919019897109834, 'l2_leaf_reg': 7.4789520770307085, 'random_strength': 9.983783900508625}. Best is trial 3 with value: 0.9774063056111822.\n",
-      "[I 2025-03-20 15:06:51,248] Trial 4 finished with value: 0.968719922573624 and parameters: {'iterations': 588, 'learning_rate': 0.012727034718753798, 'depth': 6, 'subsample': 0.6426790020391226, 'colsample_bylevel': 0.5191821820085022, 'l2_leaf_reg': 15.153416708004082, 'random_strength': 5.999182374601503}. Best is trial 3 with value: 0.9774063056111822.\n",
-      "[I 2025-03-20 15:08:16,470] Trial 5 finished with value: 0.9776203877562853 and parameters: {'iterations': 764, 'learning_rate': 0.07266083579909825, 'depth': 5, 'subsample': 0.5191615563488426, 'colsample_bylevel': 0.7302518018135663, 'l2_leaf_reg': 6.949740475447557, 'random_strength': 8.548754890547242}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:09:10,733] Trial 6 finished with value: 0.9675852872045778 and parameters: {'iterations': 731, 'learning_rate': 0.013438903268682896, 'depth': 4, 'subsample': 0.5131276397700636, 'colsample_bylevel': 0.6859814430568429, 'l2_leaf_reg': 14.057136374487015, 'random_strength': 8.372349610104749}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:10:01,815] Trial 7 finished with value: 0.977360813155348 and parameters: {'iterations': 642, 'learning_rate': 0.062155964429978755, 'depth': 5, 'subsample': 0.7661053749507193, 'colsample_bylevel': 0.6922388149758989, 'l2_leaf_reg': 5.284164428504138, 'random_strength': 6.232456641121701}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:10:59,528] Trial 8 finished with value: 0.9653231525379885 and parameters: {'iterations': 754, 'learning_rate': 0.00208109923189855, 'depth': 5, 'subsample': 0.596137479445154, 'colsample_bylevel': 0.7574060768484172, 'l2_leaf_reg': 11.210689475672972, 'random_strength': 7.219680822486467}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:12:14,806] Trial 9 finished with value: 0.9645587008781827 and parameters: {'iterations': 797, 'learning_rate': 0.002876994974206422, 'depth': 6, 'subsample': 0.7623605250175415, 'colsample_bylevel': 0.5792373869617546, 'l2_leaf_reg': 11.933900864140389, 'random_strength': 9.537789817015767}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:13:20,035] Trial 10 finished with value: 0.9777185087394575 and parameters: {'iterations': 843, 'learning_rate': 0.09347955751894015, 'depth': 4, 'subsample': 0.5919290139946178, 'colsample_bylevel': 0.7823226761425409, 'l2_leaf_reg': 8.906630269811362, 'random_strength': 8.822275155143554}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:14:26,950] Trial 11 finished with value: 0.9772716122615548 and parameters: {'iterations': 866, 'learning_rate': 0.09609340816462177, 'depth': 4, 'subsample': 0.588006292400181, 'colsample_bylevel': 0.7921811422710393, 'l2_leaf_reg': 8.754759631995423, 'random_strength': 8.823729815067631}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:15:31,960] Trial 12 finished with value: 0.9770432579734449 and parameters: {'iterations': 842, 'learning_rate': 0.03150622304414378, 'depth': 4, 'subsample': 0.5815107101504045, 'colsample_bylevel': 0.7526738793511021, 'l2_leaf_reg': 8.96571033386253, 'random_strength': 7.901521206622082}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:16:19,764] Trial 13 finished with value: 0.9762975385013358 and parameters: {'iterations': 500, 'learning_rate': 0.030813140864715382, 'depth': 5, 'subsample': 0.6807936806371823, 'colsample_bylevel': 0.7992043286169884, 'l2_leaf_reg': 6.54189325170135, 'random_strength': 9.2194946118034}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:17:30,598] Trial 14 finished with value: 0.9771627871711274 and parameters: {'iterations': 960, 'learning_rate': 0.08725168283234642, 'depth': 4, 'subsample': 0.5611916358409927, 'colsample_bylevel': 0.7407206600062527, 'l2_leaf_reg': 10.877310276695814, 'random_strength': 8.279759834834062}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:18:55,623] Trial 15 finished with value: 0.9773510010570307 and parameters: {'iterations': 921, 'learning_rate': 0.030478650701436915, 'depth': 5, 'subsample': 0.6426668840891531, 'colsample_bylevel': 0.6580544384365046, 'l2_leaf_reg': 9.275448656004336, 'random_strength': 7.054540552553462}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:20:03,449] Trial 16 finished with value: 0.9770013335533622 and parameters: {'iterations': 802, 'learning_rate': 0.04717986995915269, 'depth': 4, 'subsample': 0.5502437476216447, 'colsample_bylevel': 0.7293227273801556, 'l2_leaf_reg': 5.910885625677492, 'random_strength': 9.224856294634591}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:21:08,738] Trial 17 finished with value: 0.9664417317461521 and parameters: {'iterations': 794, 'learning_rate': 0.005786331156808205, 'depth': 5, 'subsample': 0.5027107752205993, 'colsample_bylevel': 0.7639160019272435, 'l2_leaf_reg': 7.568889551395079, 'random_strength': 7.889004164110706}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:22:13,898] Trial 18 finished with value: 0.9765954694866041 and parameters: {'iterations': 919, 'learning_rate': 0.02240626498044735, 'depth': 4, 'subsample': 0.6106354847964582, 'colsample_bylevel': 0.6625660136271165, 'l2_leaf_reg': 10.544957798217377, 'random_strength': 6.657969347763323}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:23:05,997] Trial 19 finished with value: 0.9776159277115958 and parameters: {'iterations': 597, 'learning_rate': 0.07175908110255513, 'depth': 5, 'subsample': 0.681492227991095, 'colsample_bylevel': 0.7774154754002688, 'l2_leaf_reg': 7.472076834725921, 'random_strength': 9.99934503292809}. Best is trial 10 with value: 0.9777185087394575.\n"
+      "[I 2025-04-15 14:46:04,525] A new study created in memory with name: no-name-a82f1532-93fd-444e-a2a9-e64f1ab1581d\n",
+      "[I 2025-04-15 14:46:47,591] Trial 0 finished with value: 0.9760700762221638 and parameters: {'iterations': 879, 'learning_rate': 0.03194622545083888, 'depth': 4, 'subsample': 0.6984770397557132, 'colsample_bylevel': 0.6107784129933028, 'l2_leaf_reg': 19.31004315136751, 'random_strength': 8.538025613654153}. Best is trial 0 with value: 0.9760700762221638.\n",
+      "[I 2025-04-15 14:47:49,613] Trial 1 finished with value: 0.968974145120934 and parameters: {'iterations': 987, 'learning_rate': 0.00497466285240473, 'depth': 6, 'subsample': 0.7324064062513642, 'colsample_bylevel': 0.5860390386187008, 'l2_leaf_reg': 13.6700598295073, 'random_strength': 8.68535276828941}. Best is trial 0 with value: 0.9760700762221638.\n",
+      "[I 2025-04-15 14:48:52,501] Trial 2 finished with value: 0.9771271068136104 and parameters: {'iterations': 958, 'learning_rate': 0.029644496717173407, 'depth': 5, 'subsample': 0.7324841402342017, 'colsample_bylevel': 0.7540959396418829, 'l2_leaf_reg': 7.126960456494959, 'random_strength': 7.4284219069859745}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:49:44,391] Trial 3 finished with value: 0.9687270586451275 and parameters: {'iterations': 778, 'learning_rate': 0.002458862957591489, 'depth': 6, 'subsample': 0.5532293644121917, 'colsample_bylevel': 0.5854134727534657, 'l2_leaf_reg': 16.679333851474986, 'random_strength': 7.393624819420116}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:50:18,713] Trial 4 finished with value: 0.9763555190823012 and parameters: {'iterations': 562, 'learning_rate': 0.036725493598596365, 'depth': 5, 'subsample': 0.7722133171879484, 'colsample_bylevel': 0.6532823306754292, 'l2_leaf_reg': 12.030754990066507, 'random_strength': 8.877550003001012}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:51:17,350] Trial 5 finished with value: 0.968570065072052 and parameters: {'iterations': 962, 'learning_rate': 0.0011370325299877532, 'depth': 5, 'subsample': 0.6728515618578689, 'colsample_bylevel': 0.6826782056061581, 'l2_leaf_reg': 13.623633572161166, 'random_strength': 6.601819735133423}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:51:57,207] Trial 6 finished with value: 0.9687047584216792 and parameters: {'iterations': 617, 'learning_rate': 0.0015432741560526652, 'depth': 5, 'subsample': 0.7499204041791451, 'colsample_bylevel': 0.7446512678837157, 'l2_leaf_reg': 10.072108741396685, 'random_strength': 7.00923150345308}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:52:37,654] Trial 7 finished with value: 0.976307350599653 and parameters: {'iterations': 619, 'learning_rate': 0.09249177862479464, 'depth': 5, 'subsample': 0.5961272939306226, 'colsample_bylevel': 0.6454652012357972, 'l2_leaf_reg': 13.952857635588412, 'random_strength': 5.3135323404658195}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:53:16,002] Trial 8 finished with value: 0.9680910562723839 and parameters: {'iterations': 668, 'learning_rate': 0.0017846267770558202, 'depth': 5, 'subsample': 0.6091299857459994, 'colsample_bylevel': 0.5146530801783137, 'l2_leaf_reg': 6.463177992505614, 'random_strength': 9.83403822639651}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:54:07,117] Trial 9 finished with value: 0.9701837092407667 and parameters: {'iterations': 899, 'learning_rate': 0.009675766544107381, 'depth': 5, 'subsample': 0.7887519851916923, 'colsample_bylevel': 0.7583643579850647, 'l2_leaf_reg': 12.884965304659517, 'random_strength': 7.548006066067643}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:54:49,239] Trial 10 finished with value: 0.9754644021533097 and parameters: {'iterations': 777, 'learning_rate': 0.024122661029117063, 'depth': 4, 'subsample': 0.5013447329900989, 'colsample_bylevel': 0.7800307305883676, 'l2_leaf_reg': 5.638932266363305, 'random_strength': 5.942927125428197}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:55:16,702] Trial 11 finished with value: 0.9764081476096389 and parameters: {'iterations': 508, 'learning_rate': 0.04726537771631546, 'depth': 4, 'subsample': 0.7937595144792454, 'colsample_bylevel': 0.6888257437132431, 'l2_leaf_reg': 9.252398775691933, 'random_strength': 8.42482116769386}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:55:43,712] Trial 12 finished with value: 0.9769504890439002 and parameters: {'iterations': 520, 'learning_rate': 0.07517500503326581, 'depth': 4, 'subsample': 0.7116563324529114, 'colsample_bylevel': 0.7144947743480992, 'l2_leaf_reg': 8.350160243659916, 'random_strength': 8.168242005926878}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:56:21,069] Trial 13 finished with value: 0.9774294978435685 and parameters: {'iterations': 703, 'learning_rate': 0.06983698579391724, 'depth': 4, 'subsample': 0.7065367059268508, 'colsample_bylevel': 0.724103178768751, 'l2_leaf_reg': 8.023903029922751, 'random_strength': 7.928311734643061}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 14:57:09,353] Trial 14 finished with value: 0.9731246627091203 and parameters: {'iterations': 724, 'learning_rate': 0.015490359934301434, 'depth': 6, 'subsample': 0.6446878985673278, 'colsample_bylevel': 0.7920667146800474, 'l2_leaf_reg': 7.371821145261041, 'random_strength': 6.451885650217026}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 14:57:52,357] Trial 15 finished with value: 0.9738400538773399 and parameters: {'iterations': 859, 'learning_rate': 0.014795165553926048, 'depth': 4, 'subsample': 0.665044880376952, 'colsample_bylevel': 0.7290363615996102, 'l2_leaf_reg': 5.2279489954943, 'random_strength': 9.524780688568056}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 14:58:30,199] Trial 16 finished with value: 0.9766686142195145 and parameters: {'iterations': 722, 'learning_rate': 0.058843073710673745, 'depth': 4, 'subsample': 0.7070259824991089, 'colsample_bylevel': 0.7046723312898295, 'l2_leaf_reg': 9.886755559850055, 'random_strength': 7.515414918520503}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 14:59:29,101] Trial 17 finished with value: 0.9759648191674881 and parameters: {'iterations': 836, 'learning_rate': 0.0201164375942546, 'depth': 6, 'subsample': 0.7464329768357236, 'colsample_bylevel': 0.751756591889216, 'l2_leaf_reg': 11.194529638284472, 'random_strength': 7.890747626459661}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 15:00:13,792] Trial 18 finished with value: 0.9691284626671959 and parameters: {'iterations': 941, 'learning_rate': 0.008139386305286263, 'depth': 4, 'subsample': 0.6324467522547773, 'colsample_bylevel': 0.6640606522826965, 'l2_leaf_reg': 7.637155074182216, 'random_strength': 9.118659113863696}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 15:01:04,807] Trial 19 finished with value: 0.9772921284671273 and parameters: {'iterations': 812, 'learning_rate': 0.047376913098252045, 'depth': 5, 'subsample': 0.6838122071503733, 'colsample_bylevel': 0.7976712282377681, 'l2_leaf_reg': 15.51559416611185, 'random_strength': 6.860978796602465}. Best is trial 13 with value: 0.9774294978435685.\n"
     ]
    },
    {
@ -899,15 +1065,15 @@
     "output_type": "stream",
     "text": [
      "Best Trial:\n",
-      "AUC: 0.9777185087394575\n",
+      "AUC: 0.9774294978435685\n",
      "Params:\n",
-      "  iterations: 843\n",
-      "  learning_rate: 0.09347955751894015\n",
+      "  iterations: 703\n",
+      "  learning_rate: 0.06983698579391724\n",
      "  depth: 4\n",
-      "  subsample: 0.5919290139946178\n",
-      "  colsample_bylevel: 0.7823226761425409\n",
-      "  l2_leaf_reg: 8.906630269811362\n",
-      "  random_strength: 8.822275155143554\n"
+      "  subsample: 0.7065367059268508\n",
+      "  colsample_bylevel: 0.724103178768751\n",
+      "  l2_leaf_reg: 8.023903029922751\n",
+      "  random_strength: 7.928311734643061\n"
     ]
    }
   ],
@ -926,24 +1092,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0:\ttest: 0.9345234\tbest: 0.9345234 (0)\ttotal: 67.6ms\tremaining: 56.9s\n",
-      "100:\ttest: 0.9676504\tbest: 0.9677699 (99)\ttotal: 7.48s\tremaining: 55s\n",
-      "200:\ttest: 0.9763805\tbest: 0.9763805 (200)\ttotal: 14.6s\tremaining: 46.6s\n",
-      "300:\ttest: 0.9768756\tbest: 0.9770299 (281)\ttotal: 21.1s\tremaining: 38s\n",
+      "0:\ttest: 0.8633242\tbest: 0.8633242 (0)\ttotal: 105ms\tremaining: 1m 13s\n",
+      "100:\ttest: 0.9693666\tbest: 0.9696128 (84)\ttotal: 4.78s\tremaining: 28.5s\n",
      "Stopped by overfitting detector  (50 iterations wait)\n",
      "\n",
-      "bestTest = 0.9770298778\n",
-      "bestIteration = 281\n",
+      "bestTest = 0.9696128235\n",
+      "bestIteration = 84\n",
      "\n",
-      "Shrink model to first 282 iterations.\n",
-      "Learn AUC: 0.9921 | Test AUC: 0.9770\n"
+      "Shrink model to first 85 iterations.\n",
+      "Learn AUC: 0.9742 | Test AUC: 0.9696\n"
     ]
    }
   ],
@ -989,7 +1153,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
--- a/notebook/07_model_aug23.ipynb
+++ b/notebook/07_model_aug23.ipynb
@ -237,6 +237,18 @@
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
+   "source": [
+    "df = df.drop(columns=['active_work_category', 'work_stability_score', \n",
+    "                      'position_score', 'job_income_position_score',\n",
+    "                      'education_score', 'education_income_ratio',\n",
+    "                      'weighted_satisfaction_performance'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "X = df.drop(columns=['active_work_months','churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date'])\n",
    "y = df['active_work_months']\n",
@ -253,7 +265,7 @@
    "y_valid = valid_data['active_work_months']\n",
    "\n",
    "cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating',\n",
-    "               'education', 'active_work_category', 'jenis_kelamin']"
+    "               'education', 'jenis_kelamin']"
   ]
  },
  {
@ -334,21 +346,21 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0:\tlearn: 14.5009742\ttest: 20.7509764\tbest: 20.7509764 (0)\ttotal: 299ms\tremaining: 4m 58s\n",
-      "200:\tlearn: 2.3680725\ttest: 3.7582712\tbest: 3.7582712 (200)\ttotal: 23.4s\tremaining: 1m 33s\n",
-      "400:\tlearn: 0.6074381\ttest: 1.2438678\tbest: 1.2438678 (400)\ttotal: 39s\tremaining: 58.2s\n",
-      "600:\tlearn: 0.3585168\ttest: 0.6968303\tbest: 0.6968303 (600)\ttotal: 50.6s\tremaining: 33.6s\n",
-      "800:\tlearn: 0.3105016\ttest: 0.5057286\tbest: 0.5057286 (800)\ttotal: 1m 7s\tremaining: 16.7s\n",
-      "999:\tlearn: 0.2918692\ttest: 0.4375722\tbest: 0.4375722 (999)\ttotal: 1m 26s\tremaining: 0us\n",
+      "0:\tlearn: 14.5012046\ttest: 20.7328222\tbest: 20.7328222 (0)\ttotal: 274ms\tremaining: 4m 33s\n",
+      "200:\tlearn: 2.3174890\ttest: 3.6207810\tbest: 3.6207810 (200)\ttotal: 13.5s\tremaining: 53.6s\n",
+      "400:\tlearn: 0.5080249\ttest: 0.7918809\tbest: 0.7918809 (400)\ttotal: 27.5s\tremaining: 41.1s\n",
+      "600:\tlearn: 0.3180844\ttest: 0.3664973\tbest: 0.3664973 (600)\ttotal: 42.6s\tremaining: 28.3s\n",
+      "800:\tlearn: 0.2992983\ttest: 0.3231799\tbest: 0.3231799 (800)\ttotal: 55.5s\tremaining: 13.8s\n",
+      "999:\tlearn: 0.2885793\ttest: 0.3135297\tbest: 0.3135297 (999)\ttotal: 1m 7s\tremaining: 0us\n",
      "\n",
-      "bestTest = 0.4375721622\n",
+      "bestTest = 0.3135296768\n",
      "bestIteration = 999\n",
      "\n"
     ]
@ -356,10 +368,10 @@
    {
     "data": {
      "text/plain": [
-       "<catboost.core.CatBoostRegressor at 0x2204d48bd60>"
+       "<catboost.core.CatBoostRegressor at 0x26bbda6b3d0>"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -549,29 +561,27 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "d:\\Tugas Akhir\\Codingan\\Development\\App\\.venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n",
-      "[I 2025-03-20 21:00:35,783] A new study created in memory with name: no-name-90c20206-17ac-4d18-b328-3e83b0b68b02\n",
-      "[I 2025-03-20 21:01:21,239] Trial 0 finished with value: 8.419168492565904 and parameters: {'iterations': 829, 'learning_rate': 0.0015555213398614615, 'depth': 5, 'subsample': 0.7218038924225667, 'colsample_bylevel': 0.6317251084903674, 'l2_leaf_reg': 9.824596206043214, 'random_strength': 5.86501034445993}. Best is trial 0 with value: 8.419168492565904.\n",
-      "[I 2025-03-20 21:02:07,180] Trial 1 finished with value: 2.733330716926934 and parameters: {'iterations': 884, 'learning_rate': 0.0053516318860709, 'depth': 5, 'subsample': 0.7097793314114498, 'colsample_bylevel': 0.6435980070119711, 'l2_leaf_reg': 13.121533573607655, 'random_strength': 9.324119789288343}. Best is trial 1 with value: 2.733330716926934.\n",
-      "[I 2025-03-20 21:02:32,264] Trial 2 finished with value: 1.1897665017142585 and parameters: {'iterations': 635, 'learning_rate': 0.023704951537819915, 'depth': 4, 'subsample': 0.7686389413016427, 'colsample_bylevel': 0.5266671315496564, 'l2_leaf_reg': 15.175290458163712, 'random_strength': 7.30745741132724}. Best is trial 2 with value: 1.1897665017142585.\n",
-      "[I 2025-03-20 21:03:16,738] Trial 3 finished with value: 1.7154507371626437 and parameters: {'iterations': 782, 'learning_rate': 0.012722641891947339, 'depth': 5, 'subsample': 0.6239927785252992, 'colsample_bylevel': 0.551697952957819, 'l2_leaf_reg': 8.011605642650462, 'random_strength': 7.351190134542403}. Best is trial 2 with value: 1.1897665017142585.\n",
-      "[I 2025-03-20 21:03:56,278] Trial 4 finished with value: 1.0342169780611064 and parameters: {'iterations': 590, 'learning_rate': 0.026972969679793426, 'depth': 5, 'subsample': 0.7586099689360802, 'colsample_bylevel': 0.6775428452906951, 'l2_leaf_reg': 8.716335425228655, 'random_strength': 8.009471482166916}. Best is trial 4 with value: 1.0342169780611064.\n",
-      "[I 2025-03-20 21:04:38,974] Trial 5 finished with value: 0.9367588642619448 and parameters: {'iterations': 806, 'learning_rate': 0.025597264277354852, 'depth': 5, 'subsample': 0.6503807167080523, 'colsample_bylevel': 0.6858283170142669, 'l2_leaf_reg': 15.122433690868604, 'random_strength': 9.26015271022034}. Best is trial 5 with value: 0.9367588642619448.\n",
-      "[I 2025-03-20 21:05:17,061] Trial 6 finished with value: 3.3099767746788875 and parameters: {'iterations': 789, 'learning_rate': 0.005334295156738072, 'depth': 4, 'subsample': 0.6150191027518872, 'colsample_bylevel': 0.5100653940971351, 'l2_leaf_reg': 16.26273857150823, 'random_strength': 9.363327197652792}. Best is trial 5 with value: 0.9367588642619448.\n",
-      "[I 2025-03-20 21:06:11,133] Trial 7 finished with value: 2.430297164863871 and parameters: {'iterations': 837, 'learning_rate': 0.006452050997341309, 'depth': 5, 'subsample': 0.5569957978664557, 'colsample_bylevel': 0.7974026616845746, 'l2_leaf_reg': 17.640163162948227, 'random_strength': 8.93735519303648}. Best is trial 5 with value: 0.9367588642619448.\n",
-      "[I 2025-03-20 21:06:40,233] Trial 8 finished with value: 3.059791812580938 and parameters: {'iterations': 590, 'learning_rate': 0.006568464895546362, 'depth': 5, 'subsample': 0.5764687596920144, 'colsample_bylevel': 0.5755754855362132, 'l2_leaf_reg': 8.522648009171846, 'random_strength': 9.538382092752938}. Best is trial 5 with value: 0.9367588642619448.\n",
-      "[I 2025-03-20 21:07:13,639] Trial 9 finished with value: 7.76817781695066 and parameters: {'iterations': 617, 'learning_rate': 0.002539968080520203, 'depth': 4, 'subsample': 0.6372121192487497, 'colsample_bylevel': 0.6082864452448382, 'l2_leaf_reg': 17.654618698706546, 'random_strength': 9.459556159452063}. Best is trial 5 with value: 0.9367588642619448.\n",
-      "[I 2025-03-20 21:08:20,357] Trial 10 finished with value: 0.4103763995321103 and parameters: {'iterations': 979, 'learning_rate': 0.09529892832775133, 'depth': 6, 'subsample': 0.5209516193113186, 'colsample_bylevel': 0.7296584784356337, 'l2_leaf_reg': 5.36555480808817, 'random_strength': 5.055795530221549}. Best is trial 10 with value: 0.4103763995321103.\n",
-      "[I 2025-03-20 21:09:34,792] Trial 11 finished with value: 0.33873805409112234 and parameters: {'iterations': 989, 'learning_rate': 0.07851963900488154, 'depth': 6, 'subsample': 0.5200611359234908, 'colsample_bylevel': 0.7185209948848563, 'l2_leaf_reg': 5.259953652103869, 'random_strength': 5.611812114220319}. Best is trial 11 with value: 0.33873805409112234.\n",
-      "[I 2025-03-20 21:10:53,294] Trial 12 finished with value: 0.33747220251551263 and parameters: {'iterations': 1000, 'learning_rate': 0.09306028108980487, 'depth': 6, 'subsample': 0.5034416033403175, 'colsample_bylevel': 0.7452683981181829, 'l2_leaf_reg': 5.279315402542746, 'random_strength': 5.294692039250562}. Best is trial 12 with value: 0.33747220251551263.\n",
-      "[I 2025-03-20 21:11:51,728] Trial 13 finished with value: 1.6267506473342959 and parameters: {'iterations': 996, 'learning_rate': 0.09325178395683269, 'depth': 6, 'subsample': 0.5125699226468535, 'colsample_bylevel': 0.756377904971589, 'l2_leaf_reg': 5.519312813157406, 'random_strength': 6.120809817278766}. Best is trial 12 with value: 0.33747220251551263.\n",
-      "[I 2025-03-20 21:12:42,167] Trial 14 finished with value: 0.4564504106167195 and parameters: {'iterations': 919, 'learning_rate': 0.052020741738658456, 'depth': 6, 'subsample': 0.5007317413228829, 'colsample_bylevel': 0.727489010605425, 'l2_leaf_reg': 11.083178624062024, 'random_strength': 5.261787256931901}. Best is trial 12 with value: 0.33747220251551263.\n",
-      "[I 2025-03-20 21:13:32,464] Trial 15 finished with value: 0.6231066699606179 and parameters: {'iterations': 696, 'learning_rate': 0.04926421412297825, 'depth': 6, 'subsample': 0.5528336472517523, 'colsample_bylevel': 0.7710394282985423, 'l2_leaf_reg': 6.189751630816335, 'random_strength': 6.502158303659374}. Best is trial 12 with value: 0.33747220251551263.\n",
-      "[I 2025-03-20 21:14:53,119] Trial 16 finished with value: 0.4792492732081974 and parameters: {'iterations': 930, 'learning_rate': 0.04640033657256652, 'depth': 6, 'subsample': 0.5859719980036684, 'colsample_bylevel': 0.700113722637731, 'l2_leaf_reg': 7.099786946755341, 'random_strength': 6.688129574746723}. Best is trial 12 with value: 0.33747220251551263.\n",
-      "[I 2025-03-20 21:15:49,049] Trial 17 finished with value: 1.61841931396652 and parameters: {'iterations': 518, 'learning_rate': 0.015200955659638398, 'depth': 6, 'subsample': 0.534977261704631, 'colsample_bylevel': 0.7246250429139481, 'l2_leaf_reg': 19.80600077744929, 'random_strength': 5.673411846650424}. Best is trial 12 with value: 0.33747220251551263.\n",
-      "[I 2025-03-20 21:17:36,811] Trial 18 finished with value: 0.5548288731588591 and parameters: {'iterations': 938, 'learning_rate': 0.07194118095780964, 'depth': 6, 'subsample': 0.6665507151821397, 'colsample_bylevel': 0.790138958827564, 'l2_leaf_reg': 12.301321434830928, 'random_strength': 6.796168129707886}. Best is trial 12 with value: 0.33747220251551263.\n",
-      "[I 2025-03-20 21:18:43,121] Trial 19 finished with value: 0.6231757616191587 and parameters: {'iterations': 719, 'learning_rate': 0.03682071614516647, 'depth': 6, 'subsample': 0.5866796565002883, 'colsample_bylevel': 0.7504213556109071, 'l2_leaf_reg': 10.442003723130012, 'random_strength': 8.36360703538023}. Best is trial 12 with value: 0.33747220251551263.\n"
+      "[I 2025-04-15 15:08:11,750] A new study created in memory with name: no-name-7d5ebf34-fc55-4b59-b90c-b22b64311951\n",
+      "[I 2025-04-15 15:08:48,891] Trial 0 finished with value: 0.646062322615293 and parameters: {'iterations': 898, 'learning_rate': 0.03145083099289301, 'depth': 4, 'subsample': 0.6444105892248361, 'colsample_bylevel': 0.6186096575774811, 'l2_leaf_reg': 17.64514783498297, 'random_strength': 9.88786820068755}. Best is trial 0 with value: 0.646062322615293.\n",
+      "[I 2025-04-15 15:09:26,486] Trial 1 finished with value: 0.9184440015716631 and parameters: {'iterations': 897, 'learning_rate': 0.011291887138100785, 'depth': 4, 'subsample': 0.7769524548266592, 'colsample_bylevel': 0.5647743432086197, 'l2_leaf_reg': 7.76546189160343, 'random_strength': 7.668666969032969}. Best is trial 0 with value: 0.646062322615293.\n",
+      "[I 2025-04-15 15:09:52,174] Trial 2 finished with value: 5.984384729372321 and parameters: {'iterations': 598, 'learning_rate': 0.004160874913367698, 'depth': 4, 'subsample': 0.5118790113991489, 'colsample_bylevel': 0.5110086574656876, 'l2_leaf_reg': 18.661041022397058, 'random_strength': 5.943307589760212}. Best is trial 0 with value: 0.646062322615293.\n",
+      "[I 2025-04-15 15:10:26,575] Trial 3 finished with value: 9.228750640503423 and parameters: {'iterations': 682, 'learning_rate': 0.0021975421219896157, 'depth': 5, 'subsample': 0.6755358151480091, 'colsample_bylevel': 0.6596729110914161, 'l2_leaf_reg': 14.736656536586944, 'random_strength': 7.136912007339907}. Best is trial 0 with value: 0.646062322615293.\n",
+      "[I 2025-04-15 15:11:10,104] Trial 4 finished with value: 0.6227136479365861 and parameters: {'iterations': 895, 'learning_rate': 0.019358429409891725, 'depth': 5, 'subsample': 0.6733355260936932, 'colsample_bylevel': 0.6148187038915378, 'l2_leaf_reg': 12.647440499487868, 'random_strength': 9.155074074386231}. Best is trial 4 with value: 0.6227136479365861.\n",
+      "[I 2025-04-15 15:11:42,996] Trial 5 finished with value: 2.612658773597153 and parameters: {'iterations': 607, 'learning_rate': 0.0064697585951560485, 'depth': 6, 'subsample': 0.5164557295530797, 'colsample_bylevel': 0.6841029537714532, 'l2_leaf_reg': 11.459664006812375, 'random_strength': 5.575562702191829}. Best is trial 4 with value: 0.6227136479365861.\n",
+      "[I 2025-04-15 15:12:09,055] Trial 6 finished with value: 5.0563199381852675 and parameters: {'iterations': 634, 'learning_rate': 0.0041378039430250224, 'depth': 4, 'subsample': 0.6296012316767893, 'colsample_bylevel': 0.6928068433595623, 'l2_leaf_reg': 18.60249289606864, 'random_strength': 6.29108320180428}. Best is trial 4 with value: 0.6227136479365861.\n",
+      "[I 2025-04-15 15:12:48,070] Trial 7 finished with value: 6.064226235109285 and parameters: {'iterations': 659, 'learning_rate': 0.003194121199679894, 'depth': 6, 'subsample': 0.6850485984896206, 'colsample_bylevel': 0.7100729641171083, 'l2_leaf_reg': 11.897665097253151, 'random_strength': 5.260194361969255}. Best is trial 4 with value: 0.6227136479365861.\n",
+      "[I 2025-04-15 15:13:18,641] Trial 8 finished with value: 9.453945919462235 and parameters: {'iterations': 700, 'learning_rate': 0.0020817037350174448, 'depth': 4, 'subsample': 0.6203198807137237, 'colsample_bylevel': 0.5266772331906592, 'l2_leaf_reg': 9.86926120392112, 'random_strength': 5.474003236157858}. Best is trial 4 with value: 0.6227136479365861.\n",
+      "[I 2025-04-15 15:14:05,709] Trial 9 finished with value: 0.49267024172790747 and parameters: {'iterations': 687, 'learning_rate': 0.052164593502382166, 'depth': 6, 'subsample': 0.6660663261019623, 'colsample_bylevel': 0.7314001039861265, 'l2_leaf_reg': 17.594770060423073, 'random_strength': 9.339230639028976}. Best is trial 9 with value: 0.49267024172790747.\n",
+      "[I 2025-04-15 15:14:39,848] Trial 10 finished with value: 0.5500268252840363 and parameters: {'iterations': 509, 'learning_rate': 0.07967431030832754, 'depth': 6, 'subsample': 0.7602272787841288, 'colsample_bylevel': 0.770229071536223, 'l2_leaf_reg': 15.503851488383855, 'random_strength': 8.492987824560748}. Best is trial 9 with value: 0.49267024172790747.\n",
+      "[I 2025-04-15 15:15:11,853] Trial 11 finished with value: 0.49023595998871994 and parameters: {'iterations': 500, 'learning_rate': 0.08779241979856825, 'depth': 6, 'subsample': 0.796075504979686, 'colsample_bylevel': 0.7976372716940351, 'l2_leaf_reg': 14.93516913511941, 'random_strength': 8.423535620874652}. Best is trial 11 with value: 0.49023595998871994.\n",
+      "[I 2025-04-15 15:16:03,033] Trial 12 finished with value: 0.49819783655018374 and parameters: {'iterations': 793, 'learning_rate': 0.09384534188388771, 'depth': 6, 'subsample': 0.7184900016067328, 'colsample_bylevel': 0.7836196128120625, 'l2_leaf_reg': 15.663609062290409, 'random_strength': 8.589899472344362}. Best is trial 11 with value: 0.49023595998871994.\n",
+      "[I 2025-04-15 15:16:29,620] Trial 13 finished with value: 0.4846695605629031 and parameters: {'iterations': 537, 'learning_rate': 0.042877612832129756, 'depth': 5, 'subsample': 0.5706634178949412, 'colsample_bylevel': 0.7465651513848619, 'l2_leaf_reg': 5.218951437778482, 'random_strength': 9.92877520468897}. Best is trial 13 with value: 0.4846695605629031.\n",
+      "[I 2025-04-15 15:16:57,316] Trial 14 finished with value: 0.7077219444540906 and parameters: {'iterations': 504, 'learning_rate': 0.034174948663442034, 'depth': 5, 'subsample': 0.569558755228507, 'colsample_bylevel': 0.7511709728692211, 'l2_leaf_reg': 7.512089049291371, 'random_strength': 7.737784610626449}. Best is trial 13 with value: 0.4846695605629031.\n",
+      "[I 2025-04-15 15:17:22,465] Trial 15 finished with value: 0.8736557180076023 and parameters: {'iterations': 560, 'learning_rate': 0.017043234915075773, 'depth': 5, 'subsample': 0.5741018674613553, 'colsample_bylevel': 0.7991676085324451, 'l2_leaf_reg': 5.982967530770232, 'random_strength': 9.756085101353683}. Best is trial 13 with value: 0.4846695605629031.\n",
+      "[I 2025-04-15 15:18:03,448] Trial 16 finished with value: 0.45211315741628616 and parameters: {'iterations': 779, 'learning_rate': 0.049661228877482096, 'depth': 5, 'subsample': 0.7989351952288142, 'colsample_bylevel': 0.7407581801522412, 'l2_leaf_reg': 5.003342383992051, 'random_strength': 8.391196012503668}. Best is trial 16 with value: 0.45211315741628616.\n",
+      "[I 2025-04-15 15:18:44,012] Trial 17 finished with value: 12.445283924362869 and parameters: {'iterations': 791, 'learning_rate': 0.0011590379392432504, 'depth': 5, 'subsample': 0.5835792160594921, 'colsample_bylevel': 0.7376020551248276, 'l2_leaf_reg': 5.088499987791439, 'random_strength': 7.132621480885446}. Best is trial 16 with value: 0.45211315741628616.\n",
+      "[I 2025-04-15 15:19:38,776] Trial 18 finished with value: 0.4335016501614797 and parameters: {'iterations': 970, 'learning_rate': 0.04163009683311559, 'depth': 5, 'subsample': 0.7311095765931528, 'colsample_bylevel': 0.6587798610446518, 'l2_leaf_reg': 7.70099272097691, 'random_strength': 9.134434266536488}. Best is trial 18 with value: 0.4335016501614797.\n",
+      "[I 2025-04-15 15:20:27,679] Trial 19 finished with value: 0.639097289338391 and parameters: {'iterations': 979, 'learning_rate': 0.012183824359018583, 'depth': 5, 'subsample': 0.7371061668104383, 'colsample_bylevel': 0.6302275501729142, 'l2_leaf_reg': 8.50453575427487, 'random_strength': 9.030709992017943}. Best is trial 18 with value: 0.4335016501614797.\n"
     ]
    },
    {
@ -579,8 +589,8 @@
     "output_type": "stream",
     "text": [
      "Best trial:\n",
-      "  RMSE: 0.33747220251551263\n",
-      "  Params: {'iterations': 1000, 'learning_rate': 0.09306028108980487, 'depth': 6, 'subsample': 0.5034416033403175, 'colsample_bylevel': 0.7452683981181829, 'l2_leaf_reg': 5.279315402542746, 'random_strength': 5.294692039250562}\n"
+      "  RMSE: 0.4335016501614797\n",
+      "  Params: {'iterations': 970, 'learning_rate': 0.04163009683311559, 'depth': 5, 'subsample': 0.7311095765931528, 'colsample_bylevel': 0.6587798610446518, 'l2_leaf_reg': 7.70099272097691, 'random_strength': 9.134434266536488}\n"
     ]
    }
   ],
@ -588,6 +598,7 @@
    "import optuna\n",
    "from catboost import CatBoostRegressor\n",
    "from sklearn.metrics import mean_squared_error\n",
+    "import numpy as np\n",
    "\n",
    "# Fungsi objective untuk Optuna\n",
    "def objective(trial):\n",
@ -639,18 +650,17 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0:\tlearn: 13.4115622\ttest: 19.0660791\tbest: 19.0660791 (0)\ttotal: 193ms\tremaining: 3m 13s\n",
-      "200:\tlearn: 0.3985082\ttest: 0.5382811\tbest: 0.5382373 (199)\ttotal: 21.7s\tremaining: 1m 26s\n",
-      "400:\tlearn: 0.2915505\ttest: 0.4082137\tbest: 0.4080268 (398)\ttotal: 38.8s\tremaining: 57.9s\n",
-      "600:\tlearn: 0.2452564\ttest: 0.3555501\tbest: 0.3555501 (600)\ttotal: 55.4s\tremaining: 36.8s\n",
-      "800:\tlearn: 0.2201339\ttest: 0.3435762\tbest: 0.3435629 (786)\ttotal: 1m 16s\tremaining: 18.9s\n",
-      "999:\tlearn: 0.2028099\ttest: 0.3375486\tbest: 0.3374722 (998)\ttotal: 1m 40s\tremaining: 0us\n",
+      "0:\tlearn: 14.2026549\ttest: 20.4309858\tbest: 20.4309858 (0)\ttotal: 111ms\tremaining: 1m 47s\n",
+      "200:\tlearn: 0.7177922\ttest: 1.1667817\tbest: 1.1667817 (200)\ttotal: 10.7s\tremaining: 40.8s\n",
+      "400:\tlearn: 0.4239619\ttest: 0.5853924\tbest: 0.5853924 (400)\ttotal: 22.1s\tremaining: 31.4s\n",
+      "600:\tlearn: 0.3515389\ttest: 0.5013024\tbest: 0.5012549 (597)\ttotal: 32.9s\tremaining: 20.2s\n",
+      "800:\tlearn: 0.3162819\ttest: 0.4551766\tbest: 0.4551445 (798)\ttotal: 44.8s\tremaining: 9.45s\n",
+      "969:\tlearn: 0.2948077\ttest: 0.4335017\tbest: 0.4335017 (969)\ttotal: 53.6s\tremaining: 0us\n",
      "\n",
-      "bestTest = 0.3374722025\n",
-      "bestIteration = 998\n",
+      "bestTest = 0.4335016502\n",
+      "bestIteration = 969\n",
      "\n",
-      "Shrink model to first 999 iterations.\n",
-      "Final RMSE: 0.33747220251551263\n"
+      "Final RMSE: 0.4335016501614797\n"
     ]
    }
   ],
@ -950,7 +960,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
--- a/notebook/catboost_info/catboost_training.json
+++ b/notebook/catboost_info/catboost_training.json
--- a/notebook/catboost_info/learn/events.out.tfevents
+++ b/notebook/catboost_info/learn/events.out.tfevents
--- a/notebook/catboost_info/learn_error.tsv
+++ b/notebook/catboost_info/learn_error.tsv
--- a/notebook/catboost_info/test/events.out.tfevents
+++ b/notebook/catboost_info/test/events.out.tfevents
--- a/notebook/catboost_info/test_error.tsv
+++ b/notebook/catboost_info/test_error.tsv
--- a/notebook/catboost_info/time_left.tsv
+++ b/notebook/catboost_info/time_left.tsv
--- a/pages/Prediksi.py
+++ b/pages/Prediksi.py
@ -13,7 +13,7 @@ import streamlit.components.v1 as components
 import time
 import json

-reg_model = pickle.load(open('regression_model_final.sav', 'rb'))
+reg_model = pickle.load(open('regression_model_final_1year.sav', 'rb'))
 class_model = pickle.load(open('clasification_final_model_smote.sav', 'rb'))
 train_file_path = 'X_train.csv'

@ -81,34 +81,6 @@ def process_employee_data(df):
    df["income_dependant_ratio"] = df["income"] / (df["dependant"] + 1)
    df["work_efficiency"] = df["avg_time_work"] / 8

-    def categorize_work_duration_months(months):
-        if months < 12:
-            return "Short-term"
-        elif 12 <= months <= 36:
-            return "Mid-term"
-        else:
-            return "Long-term"
-
-    df['active_work_category'] = df['active_work_months'].apply(categorize_work_duration_months)
-
-    # Work Stability Score
-    df['work_stability_score'] = df['active_work_months'] / (df['absent_90D'] + 1)
-
-    # Job Income to Position Score
-    position_score_mapping = {'Junior': 2, 'Staff': 1, 'Senior': 3, 'Manager': 4}
-    df['position_score'] = df['position'].map(position_score_mapping)
-    df['job_income_position_score'] = df['income'] / df['position_score']
-
-    # Education-Adjusted Income
-    education_score_mapping = {'SLTA': 1, 'D1': 2, 'D2': 3, 'D3': 4, 'S1': 5, 'S2': 6, 'S3': 7}
-    df['education_score'] = df['education'].map(education_score_mapping)
-    df['education_income_ratio'] = df['income'] / df['education_score']
-
-    # Weighted Satisfaction-Performance Score
-    df['weighted_satisfaction_performance'] = (
-        0.6 * df['job_satisfaction'] + 0.4 * df['performance_rating']
-    )
-
    job_satisfaction_mapping = {1.0: 'Low', 2.0: 'Medium', 3.0: 'High', 4.0: 'Very High'}
    df['job_satisfaction'] = df['job_satisfaction'].map(job_satisfaction_mapping)

@ -383,7 +355,7 @@ def show_prediction():

        # Kolom kategori
        cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 
-                       'performance_rating', 'education', 'active_work_category', 'jenis_kelamin']
+                       'performance_rating', 'education', 'jenis_kelamin']

        X_test_class = df[expected_columns_class]
        X_test_reg = df[expected_columns_reg]
--- a/pages/pycache/Dashboard.cpython-310.pyc
+++ b/pages/pycache/Dashboard.cpython-310.pyc
--- a/pages/pycache/Dashboard_Pimpinan.cpython-310.pyc
+++ b/pages/pycache/Dashboard_Pimpinan.cpython-310.pyc
--- a/pages/pycache/Laporan.cpython-310.pyc
+++ b/pages/pycache/Laporan.cpython-310.pyc
--- a/pages/pycache/Prediksi.cpython-310.pyc
+++ b/pages/pycache/Prediksi.cpython-310.pyc
--- a/pages/pycache/login.cpython-310.pyc
+++ b/pages/pycache/login.cpython-310.pyc
--- a/regression_model_final_1year.sav
+++ b/regression_model_final_1year.sav