first commit

2025-07-11 09:05:19 +07:00
parent 1506c2d72c
commit 572cd4c572
22 changed files with 4533 additions and 2226 deletions
--- a/notebook/04_model_smote.ipynb
+++ b/notebook/04_model_smote.ipynb
@ -232,6 +232,18 @@
    "df.head()"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.drop(columns=['active_work_category', 'work_stability_score', \n",
+    "                      'position_score', 'job_income_position_score',\n",
+    "                      'education_score', 'education_income_ratio',\n",
+    "                      'weighted_satisfaction_performance'])"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
@ -319,18 +331,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating',\n",
-    "               'education', 'active_work_category', 'jenis_kelamin']\n",
+    "               'education', 'jenis_kelamin']\n",
    "\n",
-    "X = df.drop(columns=['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', \n",
-    "                     'active_work_months', 'position_score', 'job_income_position_score',\n",
-    "                     'education_score', 'education_income_ratio'])\n",
+    "X = df.drop(columns=['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', 'active_work_months'])\n",
    "y = df['churn_status']\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
@ -434,7 +444,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@ -456,7 +466,7 @@
    "\n",
    "cat_indices = [X.columns.get_loc(col) for col in cat_feature]\n",
    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
+    "# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
    "\n",
    "sm = SMOTENC(categorical_features=cat_indices, random_state=42)\n",
    "X_train_res, y_train_res = sm.fit_resample(X_train, y_train)\n",
@ -467,7 +477,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@ -534,7 +544,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@ -568,7 +578,9 @@
       "      <th>departemen</th>\n",
       "      <th>position</th>\n",
       "      <th>income</th>\n",
-       "      <th>...</th>\n",
+       "      <th>total_komp</th>\n",
+       "      <th>job_satisfaction</th>\n",
+       "      <th>performance_rating</th>\n",
       "      <th>age_years</th>\n",
       "      <th>active_work</th>\n",
       "      <th>income_3_months</th>\n",
@ -576,9 +588,6 @@
       "      <th>total_income_work</th>\n",
       "      <th>income_dependant_ratio</th>\n",
       "      <th>work_efficiency</th>\n",
-       "      <th>active_work_category</th>\n",
-       "      <th>work_stability_score</th>\n",
-       "      <th>weighted_satisfaction_performance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
@ -594,7 +603,9 @@
       "      <td>Corporate Strategy &amp; Communications</td>\n",
       "      <td>Staff</td>\n",
       "      <td>3.100943e+06</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Low</td>\n",
+       "      <td>Excellent</td>\n",
       "      <td>25</td>\n",
       "      <td>429</td>\n",
       "      <td>9.302829e+06</td>\n",
@ -602,9 +613,6 @@
       "      <td>4.341320e+07</td>\n",
       "      <td>3.100943e+06</td>\n",
       "      <td>1.12750</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>2.800000</td>\n",
-       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
@ -618,7 +626,9 @@
       "      <td>Engineering &amp; IT</td>\n",
       "      <td>Staff</td>\n",
       "      <td>1.146038e+06</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>High</td>\n",
+       "      <td>Good</td>\n",
       "      <td>26</td>\n",
       "      <td>410</td>\n",
       "      <td>3.438114e+06</td>\n",
@ -626,9 +636,6 @@
       "      <td>1.489849e+07</td>\n",
       "      <td>1.146038e+06</td>\n",
       "      <td>1.22500</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>4.333333</td>\n",
-       "      <td>2.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
@ -642,7 +649,9 @@
       "      <td>Creative &amp; Design</td>\n",
       "      <td>Manager</td>\n",
       "      <td>8.013796e+06</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>High</td>\n",
+       "      <td>Excellent</td>\n",
       "      <td>43</td>\n",
       "      <td>776</td>\n",
       "      <td>2.404139e+07</td>\n",
@ -650,9 +659,6 @@
       "      <td>2.003449e+08</td>\n",
       "      <td>2.671265e+06</td>\n",
       "      <td>1.18125</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>25.000000</td>\n",
-       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
@ -666,7 +672,9 @@
       "      <td>Marketing</td>\n",
       "      <td>Manager</td>\n",
       "      <td>1.015002e+07</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Very High</td>\n",
+       "      <td>Outstanding</td>\n",
       "      <td>43</td>\n",
       "      <td>778</td>\n",
       "      <td>3.045007e+07</td>\n",
@ -674,9 +682,6 @@
       "      <td>2.537505e+08</td>\n",
       "      <td>2.537505e+06</td>\n",
       "      <td>1.22000</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>25.000000</td>\n",
-       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
@ -690,7 +695,9 @@
       "      <td>Operations</td>\n",
       "      <td>Staff</td>\n",
       "      <td>2.548043e+06</td>\n",
-       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Low</td>\n",
+       "      <td>Excellent</td>\n",
       "      <td>36</td>\n",
       "      <td>405</td>\n",
       "      <td>7.644129e+06</td>\n",
@ -698,98 +705,257 @@
       "      <td>3.312456e+07</td>\n",
       "      <td>1.274022e+06</td>\n",
       "      <td>1.18250</td>\n",
-       "      <td>Mid-term</td>\n",
-       "      <td>1.444444</td>\n",
-       "      <td>1.8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12283</th>\n",
+       "      <td>Kabupaten Bogor</td>\n",
+       "      <td>Perempuan</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>9.40</td>\n",
+       "      <td>HR</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>1.092339e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>High</td>\n",
+       "      <td>Excellent</td>\n",
+       "      <td>40</td>\n",
+       "      <td>832</td>\n",
+       "      <td>3.277017e+06</td>\n",
+       "      <td>6.554034e+06</td>\n",
+       "      <td>2.949315e+07</td>\n",
+       "      <td>1.092339e+06</td>\n",
+       "      <td>1.17500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12284</th>\n",
+       "      <td>Kota Jakarta Barat</td>\n",
+       "      <td>Laki-laki</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>2</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>9.11</td>\n",
+       "      <td>Corporate Strategy &amp; Communications</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>1.175199e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Medium</td>\n",
+       "      <td>Good</td>\n",
+       "      <td>32</td>\n",
+       "      <td>408</td>\n",
+       "      <td>3.525597e+06</td>\n",
+       "      <td>7.051194e+06</td>\n",
+       "      <td>1.527759e+07</td>\n",
+       "      <td>3.917330e+05</td>\n",
+       "      <td>1.13875</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12285</th>\n",
+       "      <td>Tangerang</td>\n",
+       "      <td>Laki-laki</td>\n",
+       "      <td>Single</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>9.82</td>\n",
+       "      <td>HR</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>1.479552e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Medium</td>\n",
+       "      <td>Good</td>\n",
+       "      <td>24</td>\n",
+       "      <td>539</td>\n",
+       "      <td>4.438656e+06</td>\n",
+       "      <td>8.877312e+06</td>\n",
+       "      <td>2.515238e+07</td>\n",
+       "      <td>1.479552e+06</td>\n",
+       "      <td>1.22750</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12286</th>\n",
+       "      <td>Tangerang</td>\n",
+       "      <td>Perempuan</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>1</td>\n",
+       "      <td>D1</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>9.17</td>\n",
+       "      <td>Finance &amp; Accounting</td>\n",
+       "      <td>Staff</td>\n",
+       "      <td>4.655009e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Low</td>\n",
+       "      <td>Low</td>\n",
+       "      <td>26</td>\n",
+       "      <td>918</td>\n",
+       "      <td>1.396503e+07</td>\n",
+       "      <td>2.793005e+07</td>\n",
+       "      <td>1.396503e+08</td>\n",
+       "      <td>2.327504e+06</td>\n",
+       "      <td>1.14625</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12287</th>\n",
+       "      <td>Tangerang</td>\n",
+       "      <td>Perempuan</td>\n",
+       "      <td>Married</td>\n",
+       "      <td>2</td>\n",
+       "      <td>SLTA</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>9.37</td>\n",
+       "      <td>Creative &amp; Design</td>\n",
+       "      <td>Senior</td>\n",
+       "      <td>6.400201e+06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>Very High</td>\n",
+       "      <td>Excellent</td>\n",
+       "      <td>47</td>\n",
+       "      <td>559</td>\n",
+       "      <td>1.920060e+07</td>\n",
+       "      <td>3.840121e+07</td>\n",
+       "      <td>1.152036e+08</td>\n",
+       "      <td>2.133400e+06</td>\n",
+       "      <td>1.17125</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
-       "<p>5 rows × 23 columns</p>\n",
+       "<p>12288 rows × 20 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
-       "             domisili jenis_kelamin marriage_stat  dependant education  \\\n",
-       "0  Kota Jakarta Timur     Perempuan        Single          0        D2   \n",
-       "1           Tangerang     Laki-laki        Single          0      SLTA   \n",
-       "2    Kabupaten Bekasi     Laki-laki       Married          2      SLTA   \n",
-       "3    Kabupaten Bekasi     Laki-laki       Married          3      SLTA   \n",
-       "4  Kota Jakarta Pusat     Laki-laki       Married          1      SLTA   \n",
+       "                 domisili jenis_kelamin marriage_stat  dependant education  \\\n",
+       "0      Kota Jakarta Timur     Perempuan        Single          0        D2   \n",
+       "1               Tangerang     Laki-laki        Single          0      SLTA   \n",
+       "2        Kabupaten Bekasi     Laki-laki       Married          2      SLTA   \n",
+       "3        Kabupaten Bekasi     Laki-laki       Married          3      SLTA   \n",
+       "4      Kota Jakarta Pusat     Laki-laki       Married          1      SLTA   \n",
+       "...                   ...           ...           ...        ...       ...   \n",
+       "12283     Kabupaten Bogor     Perempuan       Married          0      SLTA   \n",
+       "12284  Kota Jakarta Barat     Laki-laki       Married          2      SLTA   \n",
+       "12285           Tangerang     Laki-laki        Single          0      SLTA   \n",
+       "12286           Tangerang     Perempuan       Married          1        D1   \n",
+       "12287           Tangerang     Perempuan       Married          2      SLTA   \n",
       "\n",
-       "   absent_90D  avg_time_work                           departemen position  \\\n",
-       "0         4.0           9.02  Corporate Strategy & Communications    Staff   \n",
-       "1         2.0           9.80                     Engineering & IT    Staff   \n",
-       "2         0.0           9.45                    Creative & Design  Manager   \n",
-       "3         0.0           9.76                            Marketing  Manager   \n",
-       "4         8.0           9.46                           Operations    Staff   \n",
+       "       absent_90D  avg_time_work                           departemen  \\\n",
+       "0             4.0           9.02  Corporate Strategy & Communications   \n",
+       "1             2.0           9.80                     Engineering & IT   \n",
+       "2             0.0           9.45                    Creative & Design   \n",
+       "3             0.0           9.76                            Marketing   \n",
+       "4             8.0           9.46                           Operations   \n",
+       "...           ...            ...                                  ...   \n",
+       "12283         4.0           9.40                                   HR   \n",
+       "12284         3.0           9.11  Corporate Strategy & Communications   \n",
+       "12285         0.0           9.82                                   HR   \n",
+       "12286         5.0           9.17                 Finance & Accounting   \n",
+       "12287         7.0           9.37                    Creative & Design   \n",
       "\n",
-       "         income  ...  age_years active_work income_3_months  income_6_months  \\\n",
-       "0  3.100943e+06  ...         25         429    9.302829e+06     1.860566e+07   \n",
-       "1  1.146038e+06  ...         26         410    3.438114e+06     6.876228e+06   \n",
-       "2  8.013796e+06  ...         43         776    2.404139e+07     4.808278e+07   \n",
-       "3  1.015002e+07  ...         43         778    3.045007e+07     6.090013e+07   \n",
-       "4  2.548043e+06  ...         36         405    7.644129e+06     1.528826e+07   \n",
+       "      position        income  total_komp job_satisfaction performance_rating  \\\n",
+       "0        Staff  3.100943e+06         0.0              Low          Excellent   \n",
+       "1        Staff  1.146038e+06         0.0             High               Good   \n",
+       "2      Manager  8.013796e+06         0.0             High          Excellent   \n",
+       "3      Manager  1.015002e+07         0.0        Very High        Outstanding   \n",
+       "4        Staff  2.548043e+06         0.0              Low          Excellent   \n",
+       "...        ...           ...         ...              ...                ...   \n",
+       "12283    Staff  1.092339e+06         0.0             High          Excellent   \n",
+       "12284    Staff  1.175199e+06         0.0           Medium               Good   \n",
+       "12285    Staff  1.479552e+06         0.0           Medium               Good   \n",
+       "12286    Staff  4.655009e+06         0.0              Low                Low   \n",
+       "12287   Senior  6.400201e+06         0.0        Very High          Excellent   \n",
       "\n",
-       "   total_income_work  income_dependant_ratio  work_efficiency  \\\n",
-       "0       4.341320e+07            3.100943e+06          1.12750   \n",
-       "1       1.489849e+07            1.146038e+06          1.22500   \n",
-       "2       2.003449e+08            2.671265e+06          1.18125   \n",
-       "3       2.537505e+08            2.537505e+06          1.22000   \n",
-       "4       3.312456e+07            1.274022e+06          1.18250   \n",
+       "       age_years  active_work  income_3_months  income_6_months  \\\n",
+       "0             25          429     9.302829e+06     1.860566e+07   \n",
+       "1             26          410     3.438114e+06     6.876228e+06   \n",
+       "2             43          776     2.404139e+07     4.808278e+07   \n",
+       "3             43          778     3.045007e+07     6.090013e+07   \n",
+       "4             36          405     7.644129e+06     1.528826e+07   \n",
+       "...          ...          ...              ...              ...   \n",
+       "12283         40          832     3.277017e+06     6.554034e+06   \n",
+       "12284         32          408     3.525597e+06     7.051194e+06   \n",
+       "12285         24          539     4.438656e+06     8.877312e+06   \n",
+       "12286         26          918     1.396503e+07     2.793005e+07   \n",
+       "12287         47          559     1.920060e+07     3.840121e+07   \n",
       "\n",
-       "   active_work_category  work_stability_score  \\\n",
-       "0              Mid-term              2.800000   \n",
-       "1              Mid-term              4.333333   \n",
-       "2              Mid-term             25.000000   \n",
-       "3              Mid-term             25.000000   \n",
-       "4              Mid-term              1.444444   \n",
+       "       total_income_work  income_dependant_ratio  work_efficiency  \n",
+       "0           4.341320e+07            3.100943e+06          1.12750  \n",
+       "1           1.489849e+07            1.146038e+06          1.22500  \n",
+       "2           2.003449e+08            2.671265e+06          1.18125  \n",
+       "3           2.537505e+08            2.537505e+06          1.22000  \n",
+       "4           3.312456e+07            1.274022e+06          1.18250  \n",
+       "...                  ...                     ...              ...  \n",
+       "12283       2.949315e+07            1.092339e+06          1.17500  \n",
+       "12284       1.527759e+07            3.917330e+05          1.13875  \n",
+       "12285       2.515238e+07            1.479552e+06          1.22750  \n",
+       "12286       1.396503e+08            2.327504e+06          1.14625  \n",
+       "12287       1.152036e+08            2.133400e+06          1.17125  \n",
       "\n",
-       "   weighted_satisfaction_performance  \n",
-       "0                                1.8  \n",
-       "1                                2.6  \n",
-       "2                                3.0  \n",
-       "3                                4.0  \n",
-       "4                                1.8  \n",
-       "\n",
-       "[5 rows x 23 columns]"
+       "[12288 rows x 20 columns]"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "X.head()"
+    "X"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0:\ttest: 0.9289412\tbest: 0.9289412 (0)\ttotal: 209ms\tremaining: 3m 28s\n",
-      "200:\ttest: 0.9741746\tbest: 0.9742290 (183)\ttotal: 18.6s\tremaining: 1m 13s\n",
-      "400:\ttest: 0.9759996\tbest: 0.9759996 (400)\ttotal: 46.4s\tremaining: 1m 9s\n",
-      "600:\ttest: 0.9764617\tbest: 0.9765179 (576)\ttotal: 1m 8s\tremaining: 45.2s\n",
-      "800:\ttest: 0.9769282\tbest: 0.9769282 (800)\ttotal: 1m 27s\tremaining: 21.8s\n",
-      "999:\ttest: 0.9772636\tbest: 0.9772636 (999)\ttotal: 1m 55s\tremaining: 0us\n",
+      "0:\ttest: 0.9522383\tbest: 0.9522383 (0)\ttotal: 371ms\tremaining: 6m 10s\n",
+      "200:\ttest: 0.9749033\tbest: 0.9749426 (188)\ttotal: 15.7s\tremaining: 1m 2s\n",
+      "400:\ttest: 0.9751700\tbest: 0.9753029 (270)\ttotal: 32.3s\tremaining: 48.2s\n",
+      "600:\ttest: 0.9757035\tbest: 0.9757052 (599)\ttotal: 47.2s\tremaining: 31.3s\n",
+      "800:\ttest: 0.9760228\tbest: 0.9760585 (762)\ttotal: 1m 2s\tremaining: 15.5s\n",
+      "999:\ttest: 0.9761958\tbest: 0.9762119 (990)\ttotal: 1m 17s\tremaining: 0us\n",
      "\n",
-      "bestTest = 0.9772635842\n",
-      "bestIteration = 999\n",
-      "\n"
+      "bestTest = 0.9762119056\n",
+      "bestIteration = 990\n",
+      "\n",
+      "Shrink model to first 991 iterations.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
-       "<catboost.core.CatBoostClassifier at 0x23afa652260>"
+       "<catboost.core.CatBoostClassifier at 0x1cb3665ba30>"
      ]
     },
-     "execution_count": 14,
+     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -815,7 +981,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@ -864,34 +1030,34 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "[I 2025-03-20 15:00:41,602] A new study created in memory with name: no-name-25d292cb-21f5-4296-b4f8-60550f9419f3\n",
-      "[I 2025-03-20 15:02:01,024] Trial 0 finished with value: 0.9732495439604305 and parameters: {'iterations': 885, 'learning_rate': 0.014037075669208683, 'depth': 6, 'subsample': 0.7347642926574603, 'colsample_bylevel': 0.628056031091488, 'l2_leaf_reg': 18.77314488035107, 'random_strength': 7.563973089971057}. Best is trial 0 with value: 0.9732495439604305.\n",
-      "[I 2025-03-20 15:03:17,085] Trial 1 finished with value: 0.9711497549205443 and parameters: {'iterations': 708, 'learning_rate': 0.012363987021937311, 'depth': 6, 'subsample': 0.5278155628504972, 'colsample_bylevel': 0.5923839431004807, 'l2_leaf_reg': 13.672040817303856, 'random_strength': 5.047595540352549}. Best is trial 0 with value: 0.9732495439604305.\n",
-      "[I 2025-03-20 15:04:48,051] Trial 2 finished with value: 0.9648459277561962 and parameters: {'iterations': 993, 'learning_rate': 0.001016519352919797, 'depth': 6, 'subsample': 0.7618891207779287, 'colsample_bylevel': 0.7088194762415717, 'l2_leaf_reg': 15.900968017238345, 'random_strength': 8.689884293115323}. Best is trial 0 with value: 0.9732495439604305.\n",
-      "[I 2025-03-20 15:05:54,013] Trial 3 finished with value: 0.9774063056111822 and parameters: {'iterations': 667, 'learning_rate': 0.053616906724399574, 'depth': 6, 'subsample': 0.5295054888915093, 'colsample_bylevel': 0.5919019897109834, 'l2_leaf_reg': 7.4789520770307085, 'random_strength': 9.983783900508625}. Best is trial 3 with value: 0.9774063056111822.\n",
-      "[I 2025-03-20 15:06:51,248] Trial 4 finished with value: 0.968719922573624 and parameters: {'iterations': 588, 'learning_rate': 0.012727034718753798, 'depth': 6, 'subsample': 0.6426790020391226, 'colsample_bylevel': 0.5191821820085022, 'l2_leaf_reg': 15.153416708004082, 'random_strength': 5.999182374601503}. Best is trial 3 with value: 0.9774063056111822.\n",
-      "[I 2025-03-20 15:08:16,470] Trial 5 finished with value: 0.9776203877562853 and parameters: {'iterations': 764, 'learning_rate': 0.07266083579909825, 'depth': 5, 'subsample': 0.5191615563488426, 'colsample_bylevel': 0.7302518018135663, 'l2_leaf_reg': 6.949740475447557, 'random_strength': 8.548754890547242}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:09:10,733] Trial 6 finished with value: 0.9675852872045778 and parameters: {'iterations': 731, 'learning_rate': 0.013438903268682896, 'depth': 4, 'subsample': 0.5131276397700636, 'colsample_bylevel': 0.6859814430568429, 'l2_leaf_reg': 14.057136374487015, 'random_strength': 8.372349610104749}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:10:01,815] Trial 7 finished with value: 0.977360813155348 and parameters: {'iterations': 642, 'learning_rate': 0.062155964429978755, 'depth': 5, 'subsample': 0.7661053749507193, 'colsample_bylevel': 0.6922388149758989, 'l2_leaf_reg': 5.284164428504138, 'random_strength': 6.232456641121701}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:10:59,528] Trial 8 finished with value: 0.9653231525379885 and parameters: {'iterations': 754, 'learning_rate': 0.00208109923189855, 'depth': 5, 'subsample': 0.596137479445154, 'colsample_bylevel': 0.7574060768484172, 'l2_leaf_reg': 11.210689475672972, 'random_strength': 7.219680822486467}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:12:14,806] Trial 9 finished with value: 0.9645587008781827 and parameters: {'iterations': 797, 'learning_rate': 0.002876994974206422, 'depth': 6, 'subsample': 0.7623605250175415, 'colsample_bylevel': 0.5792373869617546, 'l2_leaf_reg': 11.933900864140389, 'random_strength': 9.537789817015767}. Best is trial 5 with value: 0.9776203877562853.\n",
-      "[I 2025-03-20 15:13:20,035] Trial 10 finished with value: 0.9777185087394575 and parameters: {'iterations': 843, 'learning_rate': 0.09347955751894015, 'depth': 4, 'subsample': 0.5919290139946178, 'colsample_bylevel': 0.7823226761425409, 'l2_leaf_reg': 8.906630269811362, 'random_strength': 8.822275155143554}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:14:26,950] Trial 11 finished with value: 0.9772716122615548 and parameters: {'iterations': 866, 'learning_rate': 0.09609340816462177, 'depth': 4, 'subsample': 0.588006292400181, 'colsample_bylevel': 0.7921811422710393, 'l2_leaf_reg': 8.754759631995423, 'random_strength': 8.823729815067631}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:15:31,960] Trial 12 finished with value: 0.9770432579734449 and parameters: {'iterations': 842, 'learning_rate': 0.03150622304414378, 'depth': 4, 'subsample': 0.5815107101504045, 'colsample_bylevel': 0.7526738793511021, 'l2_leaf_reg': 8.96571033386253, 'random_strength': 7.901521206622082}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:16:19,764] Trial 13 finished with value: 0.9762975385013358 and parameters: {'iterations': 500, 'learning_rate': 0.030813140864715382, 'depth': 5, 'subsample': 0.6807936806371823, 'colsample_bylevel': 0.7992043286169884, 'l2_leaf_reg': 6.54189325170135, 'random_strength': 9.2194946118034}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:17:30,598] Trial 14 finished with value: 0.9771627871711274 and parameters: {'iterations': 960, 'learning_rate': 0.08725168283234642, 'depth': 4, 'subsample': 0.5611916358409927, 'colsample_bylevel': 0.7407206600062527, 'l2_leaf_reg': 10.877310276695814, 'random_strength': 8.279759834834062}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:18:55,623] Trial 15 finished with value: 0.9773510010570307 and parameters: {'iterations': 921, 'learning_rate': 0.030478650701436915, 'depth': 5, 'subsample': 0.6426668840891531, 'colsample_bylevel': 0.6580544384365046, 'l2_leaf_reg': 9.275448656004336, 'random_strength': 7.054540552553462}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:20:03,449] Trial 16 finished with value: 0.9770013335533622 and parameters: {'iterations': 802, 'learning_rate': 0.04717986995915269, 'depth': 4, 'subsample': 0.5502437476216447, 'colsample_bylevel': 0.7293227273801556, 'l2_leaf_reg': 5.910885625677492, 'random_strength': 9.224856294634591}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:21:08,738] Trial 17 finished with value: 0.9664417317461521 and parameters: {'iterations': 794, 'learning_rate': 0.005786331156808205, 'depth': 5, 'subsample': 0.5027107752205993, 'colsample_bylevel': 0.7639160019272435, 'l2_leaf_reg': 7.568889551395079, 'random_strength': 7.889004164110706}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:22:13,898] Trial 18 finished with value: 0.9765954694866041 and parameters: {'iterations': 919, 'learning_rate': 0.02240626498044735, 'depth': 4, 'subsample': 0.6106354847964582, 'colsample_bylevel': 0.6625660136271165, 'l2_leaf_reg': 10.544957798217377, 'random_strength': 6.657969347763323}. Best is trial 10 with value: 0.9777185087394575.\n",
-      "[I 2025-03-20 15:23:05,997] Trial 19 finished with value: 0.9776159277115958 and parameters: {'iterations': 597, 'learning_rate': 0.07175908110255513, 'depth': 5, 'subsample': 0.681492227991095, 'colsample_bylevel': 0.7774154754002688, 'l2_leaf_reg': 7.472076834725921, 'random_strength': 9.99934503292809}. Best is trial 10 with value: 0.9777185087394575.\n"
+      "[I 2025-04-15 14:46:04,525] A new study created in memory with name: no-name-a82f1532-93fd-444e-a2a9-e64f1ab1581d\n",
+      "[I 2025-04-15 14:46:47,591] Trial 0 finished with value: 0.9760700762221638 and parameters: {'iterations': 879, 'learning_rate': 0.03194622545083888, 'depth': 4, 'subsample': 0.6984770397557132, 'colsample_bylevel': 0.6107784129933028, 'l2_leaf_reg': 19.31004315136751, 'random_strength': 8.538025613654153}. Best is trial 0 with value: 0.9760700762221638.\n",
+      "[I 2025-04-15 14:47:49,613] Trial 1 finished with value: 0.968974145120934 and parameters: {'iterations': 987, 'learning_rate': 0.00497466285240473, 'depth': 6, 'subsample': 0.7324064062513642, 'colsample_bylevel': 0.5860390386187008, 'l2_leaf_reg': 13.6700598295073, 'random_strength': 8.68535276828941}. Best is trial 0 with value: 0.9760700762221638.\n",
+      "[I 2025-04-15 14:48:52,501] Trial 2 finished with value: 0.9771271068136104 and parameters: {'iterations': 958, 'learning_rate': 0.029644496717173407, 'depth': 5, 'subsample': 0.7324841402342017, 'colsample_bylevel': 0.7540959396418829, 'l2_leaf_reg': 7.126960456494959, 'random_strength': 7.4284219069859745}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:49:44,391] Trial 3 finished with value: 0.9687270586451275 and parameters: {'iterations': 778, 'learning_rate': 0.002458862957591489, 'depth': 6, 'subsample': 0.5532293644121917, 'colsample_bylevel': 0.5854134727534657, 'l2_leaf_reg': 16.679333851474986, 'random_strength': 7.393624819420116}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:50:18,713] Trial 4 finished with value: 0.9763555190823012 and parameters: {'iterations': 562, 'learning_rate': 0.036725493598596365, 'depth': 5, 'subsample': 0.7722133171879484, 'colsample_bylevel': 0.6532823306754292, 'l2_leaf_reg': 12.030754990066507, 'random_strength': 8.877550003001012}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:51:17,350] Trial 5 finished with value: 0.968570065072052 and parameters: {'iterations': 962, 'learning_rate': 0.0011370325299877532, 'depth': 5, 'subsample': 0.6728515618578689, 'colsample_bylevel': 0.6826782056061581, 'l2_leaf_reg': 13.623633572161166, 'random_strength': 6.601819735133423}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:51:57,207] Trial 6 finished with value: 0.9687047584216792 and parameters: {'iterations': 617, 'learning_rate': 0.0015432741560526652, 'depth': 5, 'subsample': 0.7499204041791451, 'colsample_bylevel': 0.7446512678837157, 'l2_leaf_reg': 10.072108741396685, 'random_strength': 7.00923150345308}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:52:37,654] Trial 7 finished with value: 0.976307350599653 and parameters: {'iterations': 619, 'learning_rate': 0.09249177862479464, 'depth': 5, 'subsample': 0.5961272939306226, 'colsample_bylevel': 0.6454652012357972, 'l2_leaf_reg': 13.952857635588412, 'random_strength': 5.3135323404658195}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:53:16,002] Trial 8 finished with value: 0.9680910562723839 and parameters: {'iterations': 668, 'learning_rate': 0.0017846267770558202, 'depth': 5, 'subsample': 0.6091299857459994, 'colsample_bylevel': 0.5146530801783137, 'l2_leaf_reg': 6.463177992505614, 'random_strength': 9.83403822639651}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:54:07,117] Trial 9 finished with value: 0.9701837092407667 and parameters: {'iterations': 899, 'learning_rate': 0.009675766544107381, 'depth': 5, 'subsample': 0.7887519851916923, 'colsample_bylevel': 0.7583643579850647, 'l2_leaf_reg': 12.884965304659517, 'random_strength': 7.548006066067643}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:54:49,239] Trial 10 finished with value: 0.9754644021533097 and parameters: {'iterations': 777, 'learning_rate': 0.024122661029117063, 'depth': 4, 'subsample': 0.5013447329900989, 'colsample_bylevel': 0.7800307305883676, 'l2_leaf_reg': 5.638932266363305, 'random_strength': 5.942927125428197}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:55:16,702] Trial 11 finished with value: 0.9764081476096389 and parameters: {'iterations': 508, 'learning_rate': 0.04726537771631546, 'depth': 4, 'subsample': 0.7937595144792454, 'colsample_bylevel': 0.6888257437132431, 'l2_leaf_reg': 9.252398775691933, 'random_strength': 8.42482116769386}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:55:43,712] Trial 12 finished with value: 0.9769504890439002 and parameters: {'iterations': 520, 'learning_rate': 0.07517500503326581, 'depth': 4, 'subsample': 0.7116563324529114, 'colsample_bylevel': 0.7144947743480992, 'l2_leaf_reg': 8.350160243659916, 'random_strength': 8.168242005926878}. Best is trial 2 with value: 0.9771271068136104.\n",
+      "[I 2025-04-15 14:56:21,069] Trial 13 finished with value: 0.9774294978435685 and parameters: {'iterations': 703, 'learning_rate': 0.06983698579391724, 'depth': 4, 'subsample': 0.7065367059268508, 'colsample_bylevel': 0.724103178768751, 'l2_leaf_reg': 8.023903029922751, 'random_strength': 7.928311734643061}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 14:57:09,353] Trial 14 finished with value: 0.9731246627091203 and parameters: {'iterations': 724, 'learning_rate': 0.015490359934301434, 'depth': 6, 'subsample': 0.6446878985673278, 'colsample_bylevel': 0.7920667146800474, 'l2_leaf_reg': 7.371821145261041, 'random_strength': 6.451885650217026}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 14:57:52,357] Trial 15 finished with value: 0.9738400538773399 and parameters: {'iterations': 859, 'learning_rate': 0.014795165553926048, 'depth': 4, 'subsample': 0.665044880376952, 'colsample_bylevel': 0.7290363615996102, 'l2_leaf_reg': 5.2279489954943, 'random_strength': 9.524780688568056}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 14:58:30,199] Trial 16 finished with value: 0.9766686142195145 and parameters: {'iterations': 722, 'learning_rate': 0.058843073710673745, 'depth': 4, 'subsample': 0.7070259824991089, 'colsample_bylevel': 0.7046723312898295, 'l2_leaf_reg': 9.886755559850055, 'random_strength': 7.515414918520503}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 14:59:29,101] Trial 17 finished with value: 0.9759648191674881 and parameters: {'iterations': 836, 'learning_rate': 0.0201164375942546, 'depth': 6, 'subsample': 0.7464329768357236, 'colsample_bylevel': 0.751756591889216, 'l2_leaf_reg': 11.194529638284472, 'random_strength': 7.890747626459661}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 15:00:13,792] Trial 18 finished with value: 0.9691284626671959 and parameters: {'iterations': 941, 'learning_rate': 0.008139386305286263, 'depth': 4, 'subsample': 0.6324467522547773, 'colsample_bylevel': 0.6640606522826965, 'l2_leaf_reg': 7.637155074182216, 'random_strength': 9.118659113863696}. Best is trial 13 with value: 0.9774294978435685.\n",
+      "[I 2025-04-15 15:01:04,807] Trial 19 finished with value: 0.9772921284671273 and parameters: {'iterations': 812, 'learning_rate': 0.047376913098252045, 'depth': 5, 'subsample': 0.6838122071503733, 'colsample_bylevel': 0.7976712282377681, 'l2_leaf_reg': 15.51559416611185, 'random_strength': 6.860978796602465}. Best is trial 13 with value: 0.9774294978435685.\n"
     ]
    },
    {
@ -899,15 +1065,15 @@
     "output_type": "stream",
     "text": [
      "Best Trial:\n",
-      "AUC: 0.9777185087394575\n",
+      "AUC: 0.9774294978435685\n",
      "Params:\n",
-      "  iterations: 843\n",
-      "  learning_rate: 0.09347955751894015\n",
+      "  iterations: 703\n",
+      "  learning_rate: 0.06983698579391724\n",
      "  depth: 4\n",
-      "  subsample: 0.5919290139946178\n",
-      "  colsample_bylevel: 0.7823226761425409\n",
-      "  l2_leaf_reg: 8.906630269811362\n",
-      "  random_strength: 8.822275155143554\n"
+      "  subsample: 0.7065367059268508\n",
+      "  colsample_bylevel: 0.724103178768751\n",
+      "  l2_leaf_reg: 8.023903029922751\n",
+      "  random_strength: 7.928311734643061\n"
     ]
    }
   ],
@ -926,24 +1092,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0:\ttest: 0.9345234\tbest: 0.9345234 (0)\ttotal: 67.6ms\tremaining: 56.9s\n",
-      "100:\ttest: 0.9676504\tbest: 0.9677699 (99)\ttotal: 7.48s\tremaining: 55s\n",
-      "200:\ttest: 0.9763805\tbest: 0.9763805 (200)\ttotal: 14.6s\tremaining: 46.6s\n",
-      "300:\ttest: 0.9768756\tbest: 0.9770299 (281)\ttotal: 21.1s\tremaining: 38s\n",
+      "0:\ttest: 0.8633242\tbest: 0.8633242 (0)\ttotal: 105ms\tremaining: 1m 13s\n",
+      "100:\ttest: 0.9693666\tbest: 0.9696128 (84)\ttotal: 4.78s\tremaining: 28.5s\n",
      "Stopped by overfitting detector  (50 iterations wait)\n",
      "\n",
-      "bestTest = 0.9770298778\n",
-      "bestIteration = 281\n",
+      "bestTest = 0.9696128235\n",
+      "bestIteration = 84\n",
      "\n",
-      "Shrink model to first 282 iterations.\n",
-      "Learn AUC: 0.9921 | Test AUC: 0.9770\n"
+      "Shrink model to first 85 iterations.\n",
+      "Learn AUC: 0.9742 | Test AUC: 0.9696\n"
     ]
    }
   ],
@ -989,7 +1153,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [