first commit

This commit is contained in:
Jesselyn Mu
2025-07-11 09:05:19 +07:00
parent 1506c2d72c
commit 572cd4c572
22 changed files with 4533 additions and 2226 deletions

View File

@ -232,6 +232,18 @@
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = df.drop(columns=['active_work_category', 'work_stability_score', \n",
" 'position_score', 'job_income_position_score',\n",
" 'education_score', 'education_income_ratio',\n",
" 'weighted_satisfaction_performance'])"
]
},
{
"cell_type": "code",
"execution_count": 2,
@ -319,18 +331,16 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating',\n",
" 'education', 'active_work_category', 'jenis_kelamin']\n",
" 'education', 'jenis_kelamin']\n",
"\n",
"X = df.drop(columns=['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', \n",
" 'active_work_months', 'position_score', 'job_income_position_score',\n",
" 'education_score', 'education_income_ratio'])\n",
"X = df.drop(columns=['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', 'active_work_months'])\n",
"y = df['churn_status']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
@ -434,7 +444,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@ -456,7 +466,7 @@
"\n",
"cat_indices = [X.columns.get_loc(col) for col in cat_feature]\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"\n",
"sm = SMOTENC(categorical_features=cat_indices, random_state=42)\n",
"X_train_res, y_train_res = sm.fit_resample(X_train, y_train)\n",
@ -467,7 +477,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"outputs": [
{
@ -534,7 +544,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 10,
"metadata": {},
"outputs": [
{
@ -568,7 +578,9 @@
" <th>departemen</th>\n",
" <th>position</th>\n",
" <th>income</th>\n",
" <th>...</th>\n",
" <th>total_komp</th>\n",
" <th>job_satisfaction</th>\n",
" <th>performance_rating</th>\n",
" <th>age_years</th>\n",
" <th>active_work</th>\n",
" <th>income_3_months</th>\n",
@ -576,9 +588,6 @@
" <th>total_income_work</th>\n",
" <th>income_dependant_ratio</th>\n",
" <th>work_efficiency</th>\n",
" <th>active_work_category</th>\n",
" <th>work_stability_score</th>\n",
" <th>weighted_satisfaction_performance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
@ -594,7 +603,9 @@
" <td>Corporate Strategy &amp; Communications</td>\n",
" <td>Staff</td>\n",
" <td>3.100943e+06</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>Low</td>\n",
" <td>Excellent</td>\n",
" <td>25</td>\n",
" <td>429</td>\n",
" <td>9.302829e+06</td>\n",
@ -602,9 +613,6 @@
" <td>4.341320e+07</td>\n",
" <td>3.100943e+06</td>\n",
" <td>1.12750</td>\n",
" <td>Mid-term</td>\n",
" <td>2.800000</td>\n",
" <td>1.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
@ -618,7 +626,9 @@
" <td>Engineering &amp; IT</td>\n",
" <td>Staff</td>\n",
" <td>1.146038e+06</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>High</td>\n",
" <td>Good</td>\n",
" <td>26</td>\n",
" <td>410</td>\n",
" <td>3.438114e+06</td>\n",
@ -626,9 +636,6 @@
" <td>1.489849e+07</td>\n",
" <td>1.146038e+06</td>\n",
" <td>1.22500</td>\n",
" <td>Mid-term</td>\n",
" <td>4.333333</td>\n",
" <td>2.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
@ -642,7 +649,9 @@
" <td>Creative &amp; Design</td>\n",
" <td>Manager</td>\n",
" <td>8.013796e+06</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>High</td>\n",
" <td>Excellent</td>\n",
" <td>43</td>\n",
" <td>776</td>\n",
" <td>2.404139e+07</td>\n",
@ -650,9 +659,6 @@
" <td>2.003449e+08</td>\n",
" <td>2.671265e+06</td>\n",
" <td>1.18125</td>\n",
" <td>Mid-term</td>\n",
" <td>25.000000</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
@ -666,7 +672,9 @@
" <td>Marketing</td>\n",
" <td>Manager</td>\n",
" <td>1.015002e+07</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>Very High</td>\n",
" <td>Outstanding</td>\n",
" <td>43</td>\n",
" <td>778</td>\n",
" <td>3.045007e+07</td>\n",
@ -674,9 +682,6 @@
" <td>2.537505e+08</td>\n",
" <td>2.537505e+06</td>\n",
" <td>1.22000</td>\n",
" <td>Mid-term</td>\n",
" <td>25.000000</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
@ -690,7 +695,9 @@
" <td>Operations</td>\n",
" <td>Staff</td>\n",
" <td>2.548043e+06</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>Low</td>\n",
" <td>Excellent</td>\n",
" <td>36</td>\n",
" <td>405</td>\n",
" <td>7.644129e+06</td>\n",
@ -698,98 +705,257 @@
" <td>3.312456e+07</td>\n",
" <td>1.274022e+06</td>\n",
" <td>1.18250</td>\n",
" <td>Mid-term</td>\n",
" <td>1.444444</td>\n",
" <td>1.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12283</th>\n",
" <td>Kabupaten Bogor</td>\n",
" <td>Perempuan</td>\n",
" <td>Married</td>\n",
" <td>0</td>\n",
" <td>SLTA</td>\n",
" <td>4.0</td>\n",
" <td>9.40</td>\n",
" <td>HR</td>\n",
" <td>Staff</td>\n",
" <td>1.092339e+06</td>\n",
" <td>0.0</td>\n",
" <td>High</td>\n",
" <td>Excellent</td>\n",
" <td>40</td>\n",
" <td>832</td>\n",
" <td>3.277017e+06</td>\n",
" <td>6.554034e+06</td>\n",
" <td>2.949315e+07</td>\n",
" <td>1.092339e+06</td>\n",
" <td>1.17500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12284</th>\n",
" <td>Kota Jakarta Barat</td>\n",
" <td>Laki-laki</td>\n",
" <td>Married</td>\n",
" <td>2</td>\n",
" <td>SLTA</td>\n",
" <td>3.0</td>\n",
" <td>9.11</td>\n",
" <td>Corporate Strategy &amp; Communications</td>\n",
" <td>Staff</td>\n",
" <td>1.175199e+06</td>\n",
" <td>0.0</td>\n",
" <td>Medium</td>\n",
" <td>Good</td>\n",
" <td>32</td>\n",
" <td>408</td>\n",
" <td>3.525597e+06</td>\n",
" <td>7.051194e+06</td>\n",
" <td>1.527759e+07</td>\n",
" <td>3.917330e+05</td>\n",
" <td>1.13875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12285</th>\n",
" <td>Tangerang</td>\n",
" <td>Laki-laki</td>\n",
" <td>Single</td>\n",
" <td>0</td>\n",
" <td>SLTA</td>\n",
" <td>0.0</td>\n",
" <td>9.82</td>\n",
" <td>HR</td>\n",
" <td>Staff</td>\n",
" <td>1.479552e+06</td>\n",
" <td>0.0</td>\n",
" <td>Medium</td>\n",
" <td>Good</td>\n",
" <td>24</td>\n",
" <td>539</td>\n",
" <td>4.438656e+06</td>\n",
" <td>8.877312e+06</td>\n",
" <td>2.515238e+07</td>\n",
" <td>1.479552e+06</td>\n",
" <td>1.22750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12286</th>\n",
" <td>Tangerang</td>\n",
" <td>Perempuan</td>\n",
" <td>Married</td>\n",
" <td>1</td>\n",
" <td>D1</td>\n",
" <td>5.0</td>\n",
" <td>9.17</td>\n",
" <td>Finance &amp; Accounting</td>\n",
" <td>Staff</td>\n",
" <td>4.655009e+06</td>\n",
" <td>0.0</td>\n",
" <td>Low</td>\n",
" <td>Low</td>\n",
" <td>26</td>\n",
" <td>918</td>\n",
" <td>1.396503e+07</td>\n",
" <td>2.793005e+07</td>\n",
" <td>1.396503e+08</td>\n",
" <td>2.327504e+06</td>\n",
" <td>1.14625</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12287</th>\n",
" <td>Tangerang</td>\n",
" <td>Perempuan</td>\n",
" <td>Married</td>\n",
" <td>2</td>\n",
" <td>SLTA</td>\n",
" <td>7.0</td>\n",
" <td>9.37</td>\n",
" <td>Creative &amp; Design</td>\n",
" <td>Senior</td>\n",
" <td>6.400201e+06</td>\n",
" <td>0.0</td>\n",
" <td>Very High</td>\n",
" <td>Excellent</td>\n",
" <td>47</td>\n",
" <td>559</td>\n",
" <td>1.920060e+07</td>\n",
" <td>3.840121e+07</td>\n",
" <td>1.152036e+08</td>\n",
" <td>2.133400e+06</td>\n",
" <td>1.17125</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 23 columns</p>\n",
"<p>12288 rows × 20 columns</p>\n",
"</div>"
],
"text/plain": [
" domisili jenis_kelamin marriage_stat dependant education \\\n",
"0 Kota Jakarta Timur Perempuan Single 0 D2 \n",
"1 Tangerang Laki-laki Single 0 SLTA \n",
"2 Kabupaten Bekasi Laki-laki Married 2 SLTA \n",
"3 Kabupaten Bekasi Laki-laki Married 3 SLTA \n",
"4 Kota Jakarta Pusat Laki-laki Married 1 SLTA \n",
" domisili jenis_kelamin marriage_stat dependant education \\\n",
"0 Kota Jakarta Timur Perempuan Single 0 D2 \n",
"1 Tangerang Laki-laki Single 0 SLTA \n",
"2 Kabupaten Bekasi Laki-laki Married 2 SLTA \n",
"3 Kabupaten Bekasi Laki-laki Married 3 SLTA \n",
"4 Kota Jakarta Pusat Laki-laki Married 1 SLTA \n",
"... ... ... ... ... ... \n",
"12283 Kabupaten Bogor Perempuan Married 0 SLTA \n",
"12284 Kota Jakarta Barat Laki-laki Married 2 SLTA \n",
"12285 Tangerang Laki-laki Single 0 SLTA \n",
"12286 Tangerang Perempuan Married 1 D1 \n",
"12287 Tangerang Perempuan Married 2 SLTA \n",
"\n",
" absent_90D avg_time_work departemen position \\\n",
"0 4.0 9.02 Corporate Strategy & Communications Staff \n",
"1 2.0 9.80 Engineering & IT Staff \n",
"2 0.0 9.45 Creative & Design Manager \n",
"3 0.0 9.76 Marketing Manager \n",
"4 8.0 9.46 Operations Staff \n",
" absent_90D avg_time_work departemen \\\n",
"0 4.0 9.02 Corporate Strategy & Communications \n",
"1 2.0 9.80 Engineering & IT \n",
"2 0.0 9.45 Creative & Design \n",
"3 0.0 9.76 Marketing \n",
"4 8.0 9.46 Operations \n",
"... ... ... ... \n",
"12283 4.0 9.40 HR \n",
"12284 3.0 9.11 Corporate Strategy & Communications \n",
"12285 0.0 9.82 HR \n",
"12286 5.0 9.17 Finance & Accounting \n",
"12287 7.0 9.37 Creative & Design \n",
"\n",
" income ... age_years active_work income_3_months income_6_months \\\n",
"0 3.100943e+06 ... 25 429 9.302829e+06 1.860566e+07 \n",
"1 1.146038e+06 ... 26 410 3.438114e+06 6.876228e+06 \n",
"2 8.013796e+06 ... 43 776 2.404139e+07 4.808278e+07 \n",
"3 1.015002e+07 ... 43 778 3.045007e+07 6.090013e+07 \n",
"4 2.548043e+06 ... 36 405 7.644129e+06 1.528826e+07 \n",
" position income total_komp job_satisfaction performance_rating \\\n",
"0 Staff 3.100943e+06 0.0 Low Excellent \n",
"1 Staff 1.146038e+06 0.0 High Good \n",
"2 Manager 8.013796e+06 0.0 High Excellent \n",
"3 Manager 1.015002e+07 0.0 Very High Outstanding \n",
"4 Staff 2.548043e+06 0.0 Low Excellent \n",
"... ... ... ... ... ... \n",
"12283 Staff 1.092339e+06 0.0 High Excellent \n",
"12284 Staff 1.175199e+06 0.0 Medium Good \n",
"12285 Staff 1.479552e+06 0.0 Medium Good \n",
"12286 Staff 4.655009e+06 0.0 Low Low \n",
"12287 Senior 6.400201e+06 0.0 Very High Excellent \n",
"\n",
" total_income_work income_dependant_ratio work_efficiency \\\n",
"0 4.341320e+07 3.100943e+06 1.12750 \n",
"1 1.489849e+07 1.146038e+06 1.22500 \n",
"2 2.003449e+08 2.671265e+06 1.18125 \n",
"3 2.537505e+08 2.537505e+06 1.22000 \n",
"4 3.312456e+07 1.274022e+06 1.18250 \n",
" age_years active_work income_3_months income_6_months \\\n",
"0 25 429 9.302829e+06 1.860566e+07 \n",
"1 26 410 3.438114e+06 6.876228e+06 \n",
"2 43 776 2.404139e+07 4.808278e+07 \n",
"3 43 778 3.045007e+07 6.090013e+07 \n",
"4 36 405 7.644129e+06 1.528826e+07 \n",
"... ... ... ... ... \n",
"12283 40 832 3.277017e+06 6.554034e+06 \n",
"12284 32 408 3.525597e+06 7.051194e+06 \n",
"12285 24 539 4.438656e+06 8.877312e+06 \n",
"12286 26 918 1.396503e+07 2.793005e+07 \n",
"12287 47 559 1.920060e+07 3.840121e+07 \n",
"\n",
" active_work_category work_stability_score \\\n",
"0 Mid-term 2.800000 \n",
"1 Mid-term 4.333333 \n",
"2 Mid-term 25.000000 \n",
"3 Mid-term 25.000000 \n",
"4 Mid-term 1.444444 \n",
" total_income_work income_dependant_ratio work_efficiency \n",
"0 4.341320e+07 3.100943e+06 1.12750 \n",
"1 1.489849e+07 1.146038e+06 1.22500 \n",
"2 2.003449e+08 2.671265e+06 1.18125 \n",
"3 2.537505e+08 2.537505e+06 1.22000 \n",
"4 3.312456e+07 1.274022e+06 1.18250 \n",
"... ... ... ... \n",
"12283 2.949315e+07 1.092339e+06 1.17500 \n",
"12284 1.527759e+07 3.917330e+05 1.13875 \n",
"12285 2.515238e+07 1.479552e+06 1.22750 \n",
"12286 1.396503e+08 2.327504e+06 1.14625 \n",
"12287 1.152036e+08 2.133400e+06 1.17125 \n",
"\n",
" weighted_satisfaction_performance \n",
"0 1.8 \n",
"1 2.6 \n",
"2 3.0 \n",
"3 4.0 \n",
"4 1.8 \n",
"\n",
"[5 rows x 23 columns]"
"[12288 rows x 20 columns]"
]
},
"execution_count": 13,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.head()"
"X"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\ttest: 0.9289412\tbest: 0.9289412 (0)\ttotal: 209ms\tremaining: 3m 28s\n",
"200:\ttest: 0.9741746\tbest: 0.9742290 (183)\ttotal: 18.6s\tremaining: 1m 13s\n",
"400:\ttest: 0.9759996\tbest: 0.9759996 (400)\ttotal: 46.4s\tremaining: 1m 9s\n",
"600:\ttest: 0.9764617\tbest: 0.9765179 (576)\ttotal: 1m 8s\tremaining: 45.2s\n",
"800:\ttest: 0.9769282\tbest: 0.9769282 (800)\ttotal: 1m 27s\tremaining: 21.8s\n",
"999:\ttest: 0.9772636\tbest: 0.9772636 (999)\ttotal: 1m 55s\tremaining: 0us\n",
"0:\ttest: 0.9522383\tbest: 0.9522383 (0)\ttotal: 371ms\tremaining: 6m 10s\n",
"200:\ttest: 0.9749033\tbest: 0.9749426 (188)\ttotal: 15.7s\tremaining: 1m 2s\n",
"400:\ttest: 0.9751700\tbest: 0.9753029 (270)\ttotal: 32.3s\tremaining: 48.2s\n",
"600:\ttest: 0.9757035\tbest: 0.9757052 (599)\ttotal: 47.2s\tremaining: 31.3s\n",
"800:\ttest: 0.9760228\tbest: 0.9760585 (762)\ttotal: 1m 2s\tremaining: 15.5s\n",
"999:\ttest: 0.9761958\tbest: 0.9762119 (990)\ttotal: 1m 17s\tremaining: 0us\n",
"\n",
"bestTest = 0.9772635842\n",
"bestIteration = 999\n",
"\n"
"bestTest = 0.9762119056\n",
"bestIteration = 990\n",
"\n",
"Shrink model to first 991 iterations.\n"
]
},
{
"data": {
"text/plain": [
"<catboost.core.CatBoostClassifier at 0x23afa652260>"
"<catboost.core.CatBoostClassifier at 0x1cb3665ba30>"
]
},
"execution_count": 14,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@ -815,7 +981,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 12,
"metadata": {},
"outputs": [
{
@ -864,34 +1030,34 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[I 2025-03-20 15:00:41,602] A new study created in memory with name: no-name-25d292cb-21f5-4296-b4f8-60550f9419f3\n",
"[I 2025-03-20 15:02:01,024] Trial 0 finished with value: 0.9732495439604305 and parameters: {'iterations': 885, 'learning_rate': 0.014037075669208683, 'depth': 6, 'subsample': 0.7347642926574603, 'colsample_bylevel': 0.628056031091488, 'l2_leaf_reg': 18.77314488035107, 'random_strength': 7.563973089971057}. Best is trial 0 with value: 0.9732495439604305.\n",
"[I 2025-03-20 15:03:17,085] Trial 1 finished with value: 0.9711497549205443 and parameters: {'iterations': 708, 'learning_rate': 0.012363987021937311, 'depth': 6, 'subsample': 0.5278155628504972, 'colsample_bylevel': 0.5923839431004807, 'l2_leaf_reg': 13.672040817303856, 'random_strength': 5.047595540352549}. Best is trial 0 with value: 0.9732495439604305.\n",
"[I 2025-03-20 15:04:48,051] Trial 2 finished with value: 0.9648459277561962 and parameters: {'iterations': 993, 'learning_rate': 0.001016519352919797, 'depth': 6, 'subsample': 0.7618891207779287, 'colsample_bylevel': 0.7088194762415717, 'l2_leaf_reg': 15.900968017238345, 'random_strength': 8.689884293115323}. Best is trial 0 with value: 0.9732495439604305.\n",
"[I 2025-03-20 15:05:54,013] Trial 3 finished with value: 0.9774063056111822 and parameters: {'iterations': 667, 'learning_rate': 0.053616906724399574, 'depth': 6, 'subsample': 0.5295054888915093, 'colsample_bylevel': 0.5919019897109834, 'l2_leaf_reg': 7.4789520770307085, 'random_strength': 9.983783900508625}. Best is trial 3 with value: 0.9774063056111822.\n",
"[I 2025-03-20 15:06:51,248] Trial 4 finished with value: 0.968719922573624 and parameters: {'iterations': 588, 'learning_rate': 0.012727034718753798, 'depth': 6, 'subsample': 0.6426790020391226, 'colsample_bylevel': 0.5191821820085022, 'l2_leaf_reg': 15.153416708004082, 'random_strength': 5.999182374601503}. Best is trial 3 with value: 0.9774063056111822.\n",
"[I 2025-03-20 15:08:16,470] Trial 5 finished with value: 0.9776203877562853 and parameters: {'iterations': 764, 'learning_rate': 0.07266083579909825, 'depth': 5, 'subsample': 0.5191615563488426, 'colsample_bylevel': 0.7302518018135663, 'l2_leaf_reg': 6.949740475447557, 'random_strength': 8.548754890547242}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:09:10,733] Trial 6 finished with value: 0.9675852872045778 and parameters: {'iterations': 731, 'learning_rate': 0.013438903268682896, 'depth': 4, 'subsample': 0.5131276397700636, 'colsample_bylevel': 0.6859814430568429, 'l2_leaf_reg': 14.057136374487015, 'random_strength': 8.372349610104749}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:10:01,815] Trial 7 finished with value: 0.977360813155348 and parameters: {'iterations': 642, 'learning_rate': 0.062155964429978755, 'depth': 5, 'subsample': 0.7661053749507193, 'colsample_bylevel': 0.6922388149758989, 'l2_leaf_reg': 5.284164428504138, 'random_strength': 6.232456641121701}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:10:59,528] Trial 8 finished with value: 0.9653231525379885 and parameters: {'iterations': 754, 'learning_rate': 0.00208109923189855, 'depth': 5, 'subsample': 0.596137479445154, 'colsample_bylevel': 0.7574060768484172, 'l2_leaf_reg': 11.210689475672972, 'random_strength': 7.219680822486467}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:12:14,806] Trial 9 finished with value: 0.9645587008781827 and parameters: {'iterations': 797, 'learning_rate': 0.002876994974206422, 'depth': 6, 'subsample': 0.7623605250175415, 'colsample_bylevel': 0.5792373869617546, 'l2_leaf_reg': 11.933900864140389, 'random_strength': 9.537789817015767}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:13:20,035] Trial 10 finished with value: 0.9777185087394575 and parameters: {'iterations': 843, 'learning_rate': 0.09347955751894015, 'depth': 4, 'subsample': 0.5919290139946178, 'colsample_bylevel': 0.7823226761425409, 'l2_leaf_reg': 8.906630269811362, 'random_strength': 8.822275155143554}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:14:26,950] Trial 11 finished with value: 0.9772716122615548 and parameters: {'iterations': 866, 'learning_rate': 0.09609340816462177, 'depth': 4, 'subsample': 0.588006292400181, 'colsample_bylevel': 0.7921811422710393, 'l2_leaf_reg': 8.754759631995423, 'random_strength': 8.823729815067631}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:15:31,960] Trial 12 finished with value: 0.9770432579734449 and parameters: {'iterations': 842, 'learning_rate': 0.03150622304414378, 'depth': 4, 'subsample': 0.5815107101504045, 'colsample_bylevel': 0.7526738793511021, 'l2_leaf_reg': 8.96571033386253, 'random_strength': 7.901521206622082}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:16:19,764] Trial 13 finished with value: 0.9762975385013358 and parameters: {'iterations': 500, 'learning_rate': 0.030813140864715382, 'depth': 5, 'subsample': 0.6807936806371823, 'colsample_bylevel': 0.7992043286169884, 'l2_leaf_reg': 6.54189325170135, 'random_strength': 9.2194946118034}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:17:30,598] Trial 14 finished with value: 0.9771627871711274 and parameters: {'iterations': 960, 'learning_rate': 0.08725168283234642, 'depth': 4, 'subsample': 0.5611916358409927, 'colsample_bylevel': 0.7407206600062527, 'l2_leaf_reg': 10.877310276695814, 'random_strength': 8.279759834834062}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:18:55,623] Trial 15 finished with value: 0.9773510010570307 and parameters: {'iterations': 921, 'learning_rate': 0.030478650701436915, 'depth': 5, 'subsample': 0.6426668840891531, 'colsample_bylevel': 0.6580544384365046, 'l2_leaf_reg': 9.275448656004336, 'random_strength': 7.054540552553462}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:20:03,449] Trial 16 finished with value: 0.9770013335533622 and parameters: {'iterations': 802, 'learning_rate': 0.04717986995915269, 'depth': 4, 'subsample': 0.5502437476216447, 'colsample_bylevel': 0.7293227273801556, 'l2_leaf_reg': 5.910885625677492, 'random_strength': 9.224856294634591}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:21:08,738] Trial 17 finished with value: 0.9664417317461521 and parameters: {'iterations': 794, 'learning_rate': 0.005786331156808205, 'depth': 5, 'subsample': 0.5027107752205993, 'colsample_bylevel': 0.7639160019272435, 'l2_leaf_reg': 7.568889551395079, 'random_strength': 7.889004164110706}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:22:13,898] Trial 18 finished with value: 0.9765954694866041 and parameters: {'iterations': 919, 'learning_rate': 0.02240626498044735, 'depth': 4, 'subsample': 0.6106354847964582, 'colsample_bylevel': 0.6625660136271165, 'l2_leaf_reg': 10.544957798217377, 'random_strength': 6.657969347763323}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:23:05,997] Trial 19 finished with value: 0.9776159277115958 and parameters: {'iterations': 597, 'learning_rate': 0.07175908110255513, 'depth': 5, 'subsample': 0.681492227991095, 'colsample_bylevel': 0.7774154754002688, 'l2_leaf_reg': 7.472076834725921, 'random_strength': 9.99934503292809}. Best is trial 10 with value: 0.9777185087394575.\n"
"[I 2025-04-15 14:46:04,525] A new study created in memory with name: no-name-a82f1532-93fd-444e-a2a9-e64f1ab1581d\n",
"[I 2025-04-15 14:46:47,591] Trial 0 finished with value: 0.9760700762221638 and parameters: {'iterations': 879, 'learning_rate': 0.03194622545083888, 'depth': 4, 'subsample': 0.6984770397557132, 'colsample_bylevel': 0.6107784129933028, 'l2_leaf_reg': 19.31004315136751, 'random_strength': 8.538025613654153}. Best is trial 0 with value: 0.9760700762221638.\n",
"[I 2025-04-15 14:47:49,613] Trial 1 finished with value: 0.968974145120934 and parameters: {'iterations': 987, 'learning_rate': 0.00497466285240473, 'depth': 6, 'subsample': 0.7324064062513642, 'colsample_bylevel': 0.5860390386187008, 'l2_leaf_reg': 13.6700598295073, 'random_strength': 8.68535276828941}. Best is trial 0 with value: 0.9760700762221638.\n",
"[I 2025-04-15 14:48:52,501] Trial 2 finished with value: 0.9771271068136104 and parameters: {'iterations': 958, 'learning_rate': 0.029644496717173407, 'depth': 5, 'subsample': 0.7324841402342017, 'colsample_bylevel': 0.7540959396418829, 'l2_leaf_reg': 7.126960456494959, 'random_strength': 7.4284219069859745}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:49:44,391] Trial 3 finished with value: 0.9687270586451275 and parameters: {'iterations': 778, 'learning_rate': 0.002458862957591489, 'depth': 6, 'subsample': 0.5532293644121917, 'colsample_bylevel': 0.5854134727534657, 'l2_leaf_reg': 16.679333851474986, 'random_strength': 7.393624819420116}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:50:18,713] Trial 4 finished with value: 0.9763555190823012 and parameters: {'iterations': 562, 'learning_rate': 0.036725493598596365, 'depth': 5, 'subsample': 0.7722133171879484, 'colsample_bylevel': 0.6532823306754292, 'l2_leaf_reg': 12.030754990066507, 'random_strength': 8.877550003001012}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:51:17,350] Trial 5 finished with value: 0.968570065072052 and parameters: {'iterations': 962, 'learning_rate': 0.0011370325299877532, 'depth': 5, 'subsample': 0.6728515618578689, 'colsample_bylevel': 0.6826782056061581, 'l2_leaf_reg': 13.623633572161166, 'random_strength': 6.601819735133423}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:51:57,207] Trial 6 finished with value: 0.9687047584216792 and parameters: {'iterations': 617, 'learning_rate': 0.0015432741560526652, 'depth': 5, 'subsample': 0.7499204041791451, 'colsample_bylevel': 0.7446512678837157, 'l2_leaf_reg': 10.072108741396685, 'random_strength': 7.00923150345308}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:52:37,654] Trial 7 finished with value: 0.976307350599653 and parameters: {'iterations': 619, 'learning_rate': 0.09249177862479464, 'depth': 5, 'subsample': 0.5961272939306226, 'colsample_bylevel': 0.6454652012357972, 'l2_leaf_reg': 13.952857635588412, 'random_strength': 5.3135323404658195}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:53:16,002] Trial 8 finished with value: 0.9680910562723839 and parameters: {'iterations': 668, 'learning_rate': 0.0017846267770558202, 'depth': 5, 'subsample': 0.6091299857459994, 'colsample_bylevel': 0.5146530801783137, 'l2_leaf_reg': 6.463177992505614, 'random_strength': 9.83403822639651}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:54:07,117] Trial 9 finished with value: 0.9701837092407667 and parameters: {'iterations': 899, 'learning_rate': 0.009675766544107381, 'depth': 5, 'subsample': 0.7887519851916923, 'colsample_bylevel': 0.7583643579850647, 'l2_leaf_reg': 12.884965304659517, 'random_strength': 7.548006066067643}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:54:49,239] Trial 10 finished with value: 0.9754644021533097 and parameters: {'iterations': 777, 'learning_rate': 0.024122661029117063, 'depth': 4, 'subsample': 0.5013447329900989, 'colsample_bylevel': 0.7800307305883676, 'l2_leaf_reg': 5.638932266363305, 'random_strength': 5.942927125428197}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:55:16,702] Trial 11 finished with value: 0.9764081476096389 and parameters: {'iterations': 508, 'learning_rate': 0.04726537771631546, 'depth': 4, 'subsample': 0.7937595144792454, 'colsample_bylevel': 0.6888257437132431, 'l2_leaf_reg': 9.252398775691933, 'random_strength': 8.42482116769386}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:55:43,712] Trial 12 finished with value: 0.9769504890439002 and parameters: {'iterations': 520, 'learning_rate': 0.07517500503326581, 'depth': 4, 'subsample': 0.7116563324529114, 'colsample_bylevel': 0.7144947743480992, 'l2_leaf_reg': 8.350160243659916, 'random_strength': 8.168242005926878}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:56:21,069] Trial 13 finished with value: 0.9774294978435685 and parameters: {'iterations': 703, 'learning_rate': 0.06983698579391724, 'depth': 4, 'subsample': 0.7065367059268508, 'colsample_bylevel': 0.724103178768751, 'l2_leaf_reg': 8.023903029922751, 'random_strength': 7.928311734643061}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 14:57:09,353] Trial 14 finished with value: 0.9731246627091203 and parameters: {'iterations': 724, 'learning_rate': 0.015490359934301434, 'depth': 6, 'subsample': 0.6446878985673278, 'colsample_bylevel': 0.7920667146800474, 'l2_leaf_reg': 7.371821145261041, 'random_strength': 6.451885650217026}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 14:57:52,357] Trial 15 finished with value: 0.9738400538773399 and parameters: {'iterations': 859, 'learning_rate': 0.014795165553926048, 'depth': 4, 'subsample': 0.665044880376952, 'colsample_bylevel': 0.7290363615996102, 'l2_leaf_reg': 5.2279489954943, 'random_strength': 9.524780688568056}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 14:58:30,199] Trial 16 finished with value: 0.9766686142195145 and parameters: {'iterations': 722, 'learning_rate': 0.058843073710673745, 'depth': 4, 'subsample': 0.7070259824991089, 'colsample_bylevel': 0.7046723312898295, 'l2_leaf_reg': 9.886755559850055, 'random_strength': 7.515414918520503}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 14:59:29,101] Trial 17 finished with value: 0.9759648191674881 and parameters: {'iterations': 836, 'learning_rate': 0.0201164375942546, 'depth': 6, 'subsample': 0.7464329768357236, 'colsample_bylevel': 0.751756591889216, 'l2_leaf_reg': 11.194529638284472, 'random_strength': 7.890747626459661}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 15:00:13,792] Trial 18 finished with value: 0.9691284626671959 and parameters: {'iterations': 941, 'learning_rate': 0.008139386305286263, 'depth': 4, 'subsample': 0.6324467522547773, 'colsample_bylevel': 0.6640606522826965, 'l2_leaf_reg': 7.637155074182216, 'random_strength': 9.118659113863696}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 15:01:04,807] Trial 19 finished with value: 0.9772921284671273 and parameters: {'iterations': 812, 'learning_rate': 0.047376913098252045, 'depth': 5, 'subsample': 0.6838122071503733, 'colsample_bylevel': 0.7976712282377681, 'l2_leaf_reg': 15.51559416611185, 'random_strength': 6.860978796602465}. Best is trial 13 with value: 0.9774294978435685.\n"
]
},
{
@ -899,15 +1065,15 @@
"output_type": "stream",
"text": [
"Best Trial:\n",
"AUC: 0.9777185087394575\n",
"AUC: 0.9774294978435685\n",
"Params:\n",
" iterations: 843\n",
" learning_rate: 0.09347955751894015\n",
" iterations: 703\n",
" learning_rate: 0.06983698579391724\n",
" depth: 4\n",
" subsample: 0.5919290139946178\n",
" colsample_bylevel: 0.7823226761425409\n",
" l2_leaf_reg: 8.906630269811362\n",
" random_strength: 8.822275155143554\n"
" subsample: 0.7065367059268508\n",
" colsample_bylevel: 0.724103178768751\n",
" l2_leaf_reg: 8.023903029922751\n",
" random_strength: 7.928311734643061\n"
]
}
],
@ -926,24 +1092,22 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\ttest: 0.9345234\tbest: 0.9345234 (0)\ttotal: 67.6ms\tremaining: 56.9s\n",
"100:\ttest: 0.9676504\tbest: 0.9677699 (99)\ttotal: 7.48s\tremaining: 55s\n",
"200:\ttest: 0.9763805\tbest: 0.9763805 (200)\ttotal: 14.6s\tremaining: 46.6s\n",
"300:\ttest: 0.9768756\tbest: 0.9770299 (281)\ttotal: 21.1s\tremaining: 38s\n",
"0:\ttest: 0.8633242\tbest: 0.8633242 (0)\ttotal: 105ms\tremaining: 1m 13s\n",
"100:\ttest: 0.9693666\tbest: 0.9696128 (84)\ttotal: 4.78s\tremaining: 28.5s\n",
"Stopped by overfitting detector (50 iterations wait)\n",
"\n",
"bestTest = 0.9770298778\n",
"bestIteration = 281\n",
"bestTest = 0.9696128235\n",
"bestIteration = 84\n",
"\n",
"Shrink model to first 282 iterations.\n",
"Learn AUC: 0.9921 | Test AUC: 0.9770\n"
"Shrink model to first 85 iterations.\n",
"Learn AUC: 0.9742 | Test AUC: 0.9696\n"
]
}
],
@ -989,7 +1153,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [