first commit

This commit is contained in:
Jesselyn Mu
2025-07-11 09:05:19 +07:00
parent 1506c2d72c
commit 572cd4c572
22 changed files with 4533 additions and 2226 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -2,7 +2,33 @@
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.2470000000000001"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"values = [-0.005, 0.003, -0.014, 0.003, -0.002, -0.014, -0.014, -0.005, -0.014, -0.014,\n",
" -0.005, -0.005, -0.014, -0.014, -0.014, -0.014, -0.014, -0.014, -0.014, -0.014,\n",
" -0.014, -0.014, -0.014, -0.014, 0.009, -0.002]\n",
"\n",
"# Calculate the total\n",
"total_sum = sum(values)\n",
"total_sum"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
@ -199,7 +225,7 @@
"4 1 "
]
},
"execution_count": 20,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@ -3264,7 +3290,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.3"
"version": "3.10.1"
}
},
"nbformat": 4,

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@ -221,7 +221,7 @@
"[5 rows x 33 columns]"
]
},
"execution_count": 1,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@ -234,7 +234,19 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df = df.drop(columns=['active_work_category', 'work_stability_score', \n",
" 'position_score', 'job_income_position_score',\n",
" 'education_score', 'education_income_ratio',\n",
" 'weighted_satisfaction_performance'])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
@ -243,7 +255,7 @@
"12288"
]
},
"execution_count": 2,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -254,7 +266,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 10,
"metadata": {},
"outputs": [
{
@ -266,7 +278,7 @@
"Name: count, dtype: int64"
]
},
"execution_count": 3,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@ -277,7 +289,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -292,6 +304,261 @@
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['domisili', 'jenis_kelamin', 'marriage_stat', 'dependant', 'education',\n",
" 'absent_90D', 'avg_time_work', 'departemen', 'position', 'income',\n",
" 'total_komp', 'job_satisfaction', 'performance_rating', 'age_years',\n",
" 'active_work', 'income_3_months', 'income_6_months',\n",
" 'total_income_work', 'income_dependant_ratio', 'work_efficiency'],\n",
" dtype='object')"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>domisili</th>\n",
" <th>jenis_kelamin</th>\n",
" <th>marriage_stat</th>\n",
" <th>dependant</th>\n",
" <th>education</th>\n",
" <th>absent_90D</th>\n",
" <th>avg_time_work</th>\n",
" <th>departemen</th>\n",
" <th>position</th>\n",
" <th>income</th>\n",
" <th>...</th>\n",
" <th>total_income_work</th>\n",
" <th>income_dependant_ratio</th>\n",
" <th>work_efficiency</th>\n",
" <th>active_work_category</th>\n",
" <th>work_stability_score</th>\n",
" <th>position_score</th>\n",
" <th>job_income_position_score</th>\n",
" <th>education_score</th>\n",
" <th>education_income_ratio</th>\n",
" <th>weighted_satisfaction_performance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Kota Jakarta Timur</td>\n",
" <td>Perempuan</td>\n",
" <td>Single</td>\n",
" <td>0</td>\n",
" <td>D2</td>\n",
" <td>4.0</td>\n",
" <td>9.02</td>\n",
" <td>Corporate Strategy &amp; Communications</td>\n",
" <td>Staff</td>\n",
" <td>3.100943e+06</td>\n",
" <td>...</td>\n",
" <td>4.341320e+07</td>\n",
" <td>3.100943e+06</td>\n",
" <td>1.12750</td>\n",
" <td>Mid-term</td>\n",
" <td>2.800000</td>\n",
" <td>1</td>\n",
" <td>3.100943e+06</td>\n",
" <td>3</td>\n",
" <td>1.033648e+06</td>\n",
" <td>1.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Tangerang</td>\n",
" <td>Laki-laki</td>\n",
" <td>Single</td>\n",
" <td>0</td>\n",
" <td>SLTA</td>\n",
" <td>2.0</td>\n",
" <td>9.80</td>\n",
" <td>Engineering &amp; IT</td>\n",
" <td>Staff</td>\n",
" <td>1.146038e+06</td>\n",
" <td>...</td>\n",
" <td>1.489849e+07</td>\n",
" <td>1.146038e+06</td>\n",
" <td>1.22500</td>\n",
" <td>Mid-term</td>\n",
" <td>4.333333</td>\n",
" <td>1</td>\n",
" <td>1.146038e+06</td>\n",
" <td>1</td>\n",
" <td>1.146038e+06</td>\n",
" <td>2.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Kabupaten Bekasi</td>\n",
" <td>Laki-laki</td>\n",
" <td>Married</td>\n",
" <td>2</td>\n",
" <td>SLTA</td>\n",
" <td>0.0</td>\n",
" <td>9.45</td>\n",
" <td>Creative &amp; Design</td>\n",
" <td>Manager</td>\n",
" <td>8.013796e+06</td>\n",
" <td>...</td>\n",
" <td>2.003449e+08</td>\n",
" <td>2.671265e+06</td>\n",
" <td>1.18125</td>\n",
" <td>Mid-term</td>\n",
" <td>25.000000</td>\n",
" <td>4</td>\n",
" <td>2.003449e+06</td>\n",
" <td>1</td>\n",
" <td>8.013796e+06</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Kabupaten Bekasi</td>\n",
" <td>Laki-laki</td>\n",
" <td>Married</td>\n",
" <td>3</td>\n",
" <td>SLTA</td>\n",
" <td>0.0</td>\n",
" <td>9.76</td>\n",
" <td>Marketing</td>\n",
" <td>Manager</td>\n",
" <td>1.015002e+07</td>\n",
" <td>...</td>\n",
" <td>2.537505e+08</td>\n",
" <td>2.537505e+06</td>\n",
" <td>1.22000</td>\n",
" <td>Mid-term</td>\n",
" <td>25.000000</td>\n",
" <td>4</td>\n",
" <td>2.537505e+06</td>\n",
" <td>1</td>\n",
" <td>1.015002e+07</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Kota Jakarta Pusat</td>\n",
" <td>Laki-laki</td>\n",
" <td>Married</td>\n",
" <td>1</td>\n",
" <td>SLTA</td>\n",
" <td>8.0</td>\n",
" <td>9.46</td>\n",
" <td>Operations</td>\n",
" <td>Staff</td>\n",
" <td>2.548043e+06</td>\n",
" <td>...</td>\n",
" <td>3.312456e+07</td>\n",
" <td>1.274022e+06</td>\n",
" <td>1.18250</td>\n",
" <td>Mid-term</td>\n",
" <td>1.444444</td>\n",
" <td>1</td>\n",
" <td>2.548043e+06</td>\n",
" <td>1</td>\n",
" <td>2.548043e+06</td>\n",
" <td>1.8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" domisili jenis_kelamin marriage_stat dependant education \\\n",
"0 Kota Jakarta Timur Perempuan Single 0 D2 \n",
"1 Tangerang Laki-laki Single 0 SLTA \n",
"2 Kabupaten Bekasi Laki-laki Married 2 SLTA \n",
"3 Kabupaten Bekasi Laki-laki Married 3 SLTA \n",
"4 Kota Jakarta Pusat Laki-laki Married 1 SLTA \n",
"\n",
" absent_90D avg_time_work departemen position \\\n",
"0 4.0 9.02 Corporate Strategy & Communications Staff \n",
"1 2.0 9.80 Engineering & IT Staff \n",
"2 0.0 9.45 Creative & Design Manager \n",
"3 0.0 9.76 Marketing Manager \n",
"4 8.0 9.46 Operations Staff \n",
"\n",
" income ... total_income_work income_dependant_ratio \\\n",
"0 3.100943e+06 ... 4.341320e+07 3.100943e+06 \n",
"1 1.146038e+06 ... 1.489849e+07 1.146038e+06 \n",
"2 8.013796e+06 ... 2.003449e+08 2.671265e+06 \n",
"3 1.015002e+07 ... 2.537505e+08 2.537505e+06 \n",
"4 2.548043e+06 ... 3.312456e+07 1.274022e+06 \n",
"\n",
" work_efficiency active_work_category work_stability_score position_score \\\n",
"0 1.12750 Mid-term 2.800000 1 \n",
"1 1.22500 Mid-term 4.333333 1 \n",
"2 1.18125 Mid-term 25.000000 4 \n",
"3 1.22000 Mid-term 25.000000 4 \n",
"4 1.18250 Mid-term 1.444444 1 \n",
"\n",
" job_income_position_score education_score education_income_ratio \\\n",
"0 3.100943e+06 3 1.033648e+06 \n",
"1 1.146038e+06 1 1.146038e+06 \n",
"2 2.003449e+06 1 8.013796e+06 \n",
"3 2.537505e+06 1 1.015002e+07 \n",
"4 2.548043e+06 1 2.548043e+06 \n",
"\n",
" weighted_satisfaction_performance \n",
"0 1.8 \n",
"1 2.6 \n",
"2 3.0 \n",
"3 4.0 \n",
"4 1.8 \n",
"\n",
"[5 rows x 27 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,

View File

@ -232,6 +232,18 @@
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = df.drop(columns=['active_work_category', 'work_stability_score', \n",
" 'position_score', 'job_income_position_score',\n",
" 'education_score', 'education_income_ratio',\n",
" 'weighted_satisfaction_performance'])"
]
},
{
"cell_type": "code",
"execution_count": 2,
@ -319,18 +331,16 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating',\n",
" 'education', 'active_work_category', 'jenis_kelamin']\n",
" 'education', 'jenis_kelamin']\n",
"\n",
"X = df.drop(columns=['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', \n",
" 'active_work_months', 'position_score', 'job_income_position_score',\n",
" 'education_score', 'education_income_ratio'])\n",
"X = df.drop(columns=['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', 'active_work_months'])\n",
"y = df['churn_status']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
@ -434,7 +444,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@ -456,7 +466,7 @@
"\n",
"cat_indices = [X.columns.get_loc(col) for col in cat_feature]\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"\n",
"sm = SMOTENC(categorical_features=cat_indices, random_state=42)\n",
"X_train_res, y_train_res = sm.fit_resample(X_train, y_train)\n",
@ -467,7 +477,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"outputs": [
{
@ -534,7 +544,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 10,
"metadata": {},
"outputs": [
{
@ -568,7 +578,9 @@
" <th>departemen</th>\n",
" <th>position</th>\n",
" <th>income</th>\n",
" <th>...</th>\n",
" <th>total_komp</th>\n",
" <th>job_satisfaction</th>\n",
" <th>performance_rating</th>\n",
" <th>age_years</th>\n",
" <th>active_work</th>\n",
" <th>income_3_months</th>\n",
@ -576,9 +588,6 @@
" <th>total_income_work</th>\n",
" <th>income_dependant_ratio</th>\n",
" <th>work_efficiency</th>\n",
" <th>active_work_category</th>\n",
" <th>work_stability_score</th>\n",
" <th>weighted_satisfaction_performance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
@ -594,7 +603,9 @@
" <td>Corporate Strategy &amp; Communications</td>\n",
" <td>Staff</td>\n",
" <td>3.100943e+06</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>Low</td>\n",
" <td>Excellent</td>\n",
" <td>25</td>\n",
" <td>429</td>\n",
" <td>9.302829e+06</td>\n",
@ -602,9 +613,6 @@
" <td>4.341320e+07</td>\n",
" <td>3.100943e+06</td>\n",
" <td>1.12750</td>\n",
" <td>Mid-term</td>\n",
" <td>2.800000</td>\n",
" <td>1.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
@ -618,7 +626,9 @@
" <td>Engineering &amp; IT</td>\n",
" <td>Staff</td>\n",
" <td>1.146038e+06</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>High</td>\n",
" <td>Good</td>\n",
" <td>26</td>\n",
" <td>410</td>\n",
" <td>3.438114e+06</td>\n",
@ -626,9 +636,6 @@
" <td>1.489849e+07</td>\n",
" <td>1.146038e+06</td>\n",
" <td>1.22500</td>\n",
" <td>Mid-term</td>\n",
" <td>4.333333</td>\n",
" <td>2.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
@ -642,7 +649,9 @@
" <td>Creative &amp; Design</td>\n",
" <td>Manager</td>\n",
" <td>8.013796e+06</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>High</td>\n",
" <td>Excellent</td>\n",
" <td>43</td>\n",
" <td>776</td>\n",
" <td>2.404139e+07</td>\n",
@ -650,9 +659,6 @@
" <td>2.003449e+08</td>\n",
" <td>2.671265e+06</td>\n",
" <td>1.18125</td>\n",
" <td>Mid-term</td>\n",
" <td>25.000000</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
@ -666,7 +672,9 @@
" <td>Marketing</td>\n",
" <td>Manager</td>\n",
" <td>1.015002e+07</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>Very High</td>\n",
" <td>Outstanding</td>\n",
" <td>43</td>\n",
" <td>778</td>\n",
" <td>3.045007e+07</td>\n",
@ -674,9 +682,6 @@
" <td>2.537505e+08</td>\n",
" <td>2.537505e+06</td>\n",
" <td>1.22000</td>\n",
" <td>Mid-term</td>\n",
" <td>25.000000</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
@ -690,7 +695,9 @@
" <td>Operations</td>\n",
" <td>Staff</td>\n",
" <td>2.548043e+06</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>Low</td>\n",
" <td>Excellent</td>\n",
" <td>36</td>\n",
" <td>405</td>\n",
" <td>7.644129e+06</td>\n",
@ -698,98 +705,257 @@
" <td>3.312456e+07</td>\n",
" <td>1.274022e+06</td>\n",
" <td>1.18250</td>\n",
" <td>Mid-term</td>\n",
" <td>1.444444</td>\n",
" <td>1.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12283</th>\n",
" <td>Kabupaten Bogor</td>\n",
" <td>Perempuan</td>\n",
" <td>Married</td>\n",
" <td>0</td>\n",
" <td>SLTA</td>\n",
" <td>4.0</td>\n",
" <td>9.40</td>\n",
" <td>HR</td>\n",
" <td>Staff</td>\n",
" <td>1.092339e+06</td>\n",
" <td>0.0</td>\n",
" <td>High</td>\n",
" <td>Excellent</td>\n",
" <td>40</td>\n",
" <td>832</td>\n",
" <td>3.277017e+06</td>\n",
" <td>6.554034e+06</td>\n",
" <td>2.949315e+07</td>\n",
" <td>1.092339e+06</td>\n",
" <td>1.17500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12284</th>\n",
" <td>Kota Jakarta Barat</td>\n",
" <td>Laki-laki</td>\n",
" <td>Married</td>\n",
" <td>2</td>\n",
" <td>SLTA</td>\n",
" <td>3.0</td>\n",
" <td>9.11</td>\n",
" <td>Corporate Strategy &amp; Communications</td>\n",
" <td>Staff</td>\n",
" <td>1.175199e+06</td>\n",
" <td>0.0</td>\n",
" <td>Medium</td>\n",
" <td>Good</td>\n",
" <td>32</td>\n",
" <td>408</td>\n",
" <td>3.525597e+06</td>\n",
" <td>7.051194e+06</td>\n",
" <td>1.527759e+07</td>\n",
" <td>3.917330e+05</td>\n",
" <td>1.13875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12285</th>\n",
" <td>Tangerang</td>\n",
" <td>Laki-laki</td>\n",
" <td>Single</td>\n",
" <td>0</td>\n",
" <td>SLTA</td>\n",
" <td>0.0</td>\n",
" <td>9.82</td>\n",
" <td>HR</td>\n",
" <td>Staff</td>\n",
" <td>1.479552e+06</td>\n",
" <td>0.0</td>\n",
" <td>Medium</td>\n",
" <td>Good</td>\n",
" <td>24</td>\n",
" <td>539</td>\n",
" <td>4.438656e+06</td>\n",
" <td>8.877312e+06</td>\n",
" <td>2.515238e+07</td>\n",
" <td>1.479552e+06</td>\n",
" <td>1.22750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12286</th>\n",
" <td>Tangerang</td>\n",
" <td>Perempuan</td>\n",
" <td>Married</td>\n",
" <td>1</td>\n",
" <td>D1</td>\n",
" <td>5.0</td>\n",
" <td>9.17</td>\n",
" <td>Finance &amp; Accounting</td>\n",
" <td>Staff</td>\n",
" <td>4.655009e+06</td>\n",
" <td>0.0</td>\n",
" <td>Low</td>\n",
" <td>Low</td>\n",
" <td>26</td>\n",
" <td>918</td>\n",
" <td>1.396503e+07</td>\n",
" <td>2.793005e+07</td>\n",
" <td>1.396503e+08</td>\n",
" <td>2.327504e+06</td>\n",
" <td>1.14625</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12287</th>\n",
" <td>Tangerang</td>\n",
" <td>Perempuan</td>\n",
" <td>Married</td>\n",
" <td>2</td>\n",
" <td>SLTA</td>\n",
" <td>7.0</td>\n",
" <td>9.37</td>\n",
" <td>Creative &amp; Design</td>\n",
" <td>Senior</td>\n",
" <td>6.400201e+06</td>\n",
" <td>0.0</td>\n",
" <td>Very High</td>\n",
" <td>Excellent</td>\n",
" <td>47</td>\n",
" <td>559</td>\n",
" <td>1.920060e+07</td>\n",
" <td>3.840121e+07</td>\n",
" <td>1.152036e+08</td>\n",
" <td>2.133400e+06</td>\n",
" <td>1.17125</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 23 columns</p>\n",
"<p>12288 rows × 20 columns</p>\n",
"</div>"
],
"text/plain": [
" domisili jenis_kelamin marriage_stat dependant education \\\n",
"0 Kota Jakarta Timur Perempuan Single 0 D2 \n",
"1 Tangerang Laki-laki Single 0 SLTA \n",
"2 Kabupaten Bekasi Laki-laki Married 2 SLTA \n",
"3 Kabupaten Bekasi Laki-laki Married 3 SLTA \n",
"4 Kota Jakarta Pusat Laki-laki Married 1 SLTA \n",
" domisili jenis_kelamin marriage_stat dependant education \\\n",
"0 Kota Jakarta Timur Perempuan Single 0 D2 \n",
"1 Tangerang Laki-laki Single 0 SLTA \n",
"2 Kabupaten Bekasi Laki-laki Married 2 SLTA \n",
"3 Kabupaten Bekasi Laki-laki Married 3 SLTA \n",
"4 Kota Jakarta Pusat Laki-laki Married 1 SLTA \n",
"... ... ... ... ... ... \n",
"12283 Kabupaten Bogor Perempuan Married 0 SLTA \n",
"12284 Kota Jakarta Barat Laki-laki Married 2 SLTA \n",
"12285 Tangerang Laki-laki Single 0 SLTA \n",
"12286 Tangerang Perempuan Married 1 D1 \n",
"12287 Tangerang Perempuan Married 2 SLTA \n",
"\n",
" absent_90D avg_time_work departemen position \\\n",
"0 4.0 9.02 Corporate Strategy & Communications Staff \n",
"1 2.0 9.80 Engineering & IT Staff \n",
"2 0.0 9.45 Creative & Design Manager \n",
"3 0.0 9.76 Marketing Manager \n",
"4 8.0 9.46 Operations Staff \n",
" absent_90D avg_time_work departemen \\\n",
"0 4.0 9.02 Corporate Strategy & Communications \n",
"1 2.0 9.80 Engineering & IT \n",
"2 0.0 9.45 Creative & Design \n",
"3 0.0 9.76 Marketing \n",
"4 8.0 9.46 Operations \n",
"... ... ... ... \n",
"12283 4.0 9.40 HR \n",
"12284 3.0 9.11 Corporate Strategy & Communications \n",
"12285 0.0 9.82 HR \n",
"12286 5.0 9.17 Finance & Accounting \n",
"12287 7.0 9.37 Creative & Design \n",
"\n",
" income ... age_years active_work income_3_months income_6_months \\\n",
"0 3.100943e+06 ... 25 429 9.302829e+06 1.860566e+07 \n",
"1 1.146038e+06 ... 26 410 3.438114e+06 6.876228e+06 \n",
"2 8.013796e+06 ... 43 776 2.404139e+07 4.808278e+07 \n",
"3 1.015002e+07 ... 43 778 3.045007e+07 6.090013e+07 \n",
"4 2.548043e+06 ... 36 405 7.644129e+06 1.528826e+07 \n",
" position income total_komp job_satisfaction performance_rating \\\n",
"0 Staff 3.100943e+06 0.0 Low Excellent \n",
"1 Staff 1.146038e+06 0.0 High Good \n",
"2 Manager 8.013796e+06 0.0 High Excellent \n",
"3 Manager 1.015002e+07 0.0 Very High Outstanding \n",
"4 Staff 2.548043e+06 0.0 Low Excellent \n",
"... ... ... ... ... ... \n",
"12283 Staff 1.092339e+06 0.0 High Excellent \n",
"12284 Staff 1.175199e+06 0.0 Medium Good \n",
"12285 Staff 1.479552e+06 0.0 Medium Good \n",
"12286 Staff 4.655009e+06 0.0 Low Low \n",
"12287 Senior 6.400201e+06 0.0 Very High Excellent \n",
"\n",
" total_income_work income_dependant_ratio work_efficiency \\\n",
"0 4.341320e+07 3.100943e+06 1.12750 \n",
"1 1.489849e+07 1.146038e+06 1.22500 \n",
"2 2.003449e+08 2.671265e+06 1.18125 \n",
"3 2.537505e+08 2.537505e+06 1.22000 \n",
"4 3.312456e+07 1.274022e+06 1.18250 \n",
" age_years active_work income_3_months income_6_months \\\n",
"0 25 429 9.302829e+06 1.860566e+07 \n",
"1 26 410 3.438114e+06 6.876228e+06 \n",
"2 43 776 2.404139e+07 4.808278e+07 \n",
"3 43 778 3.045007e+07 6.090013e+07 \n",
"4 36 405 7.644129e+06 1.528826e+07 \n",
"... ... ... ... ... \n",
"12283 40 832 3.277017e+06 6.554034e+06 \n",
"12284 32 408 3.525597e+06 7.051194e+06 \n",
"12285 24 539 4.438656e+06 8.877312e+06 \n",
"12286 26 918 1.396503e+07 2.793005e+07 \n",
"12287 47 559 1.920060e+07 3.840121e+07 \n",
"\n",
" active_work_category work_stability_score \\\n",
"0 Mid-term 2.800000 \n",
"1 Mid-term 4.333333 \n",
"2 Mid-term 25.000000 \n",
"3 Mid-term 25.000000 \n",
"4 Mid-term 1.444444 \n",
" total_income_work income_dependant_ratio work_efficiency \n",
"0 4.341320e+07 3.100943e+06 1.12750 \n",
"1 1.489849e+07 1.146038e+06 1.22500 \n",
"2 2.003449e+08 2.671265e+06 1.18125 \n",
"3 2.537505e+08 2.537505e+06 1.22000 \n",
"4 3.312456e+07 1.274022e+06 1.18250 \n",
"... ... ... ... \n",
"12283 2.949315e+07 1.092339e+06 1.17500 \n",
"12284 1.527759e+07 3.917330e+05 1.13875 \n",
"12285 2.515238e+07 1.479552e+06 1.22750 \n",
"12286 1.396503e+08 2.327504e+06 1.14625 \n",
"12287 1.152036e+08 2.133400e+06 1.17125 \n",
"\n",
" weighted_satisfaction_performance \n",
"0 1.8 \n",
"1 2.6 \n",
"2 3.0 \n",
"3 4.0 \n",
"4 1.8 \n",
"\n",
"[5 rows x 23 columns]"
"[12288 rows x 20 columns]"
]
},
"execution_count": 13,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.head()"
"X"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\ttest: 0.9289412\tbest: 0.9289412 (0)\ttotal: 209ms\tremaining: 3m 28s\n",
"200:\ttest: 0.9741746\tbest: 0.9742290 (183)\ttotal: 18.6s\tremaining: 1m 13s\n",
"400:\ttest: 0.9759996\tbest: 0.9759996 (400)\ttotal: 46.4s\tremaining: 1m 9s\n",
"600:\ttest: 0.9764617\tbest: 0.9765179 (576)\ttotal: 1m 8s\tremaining: 45.2s\n",
"800:\ttest: 0.9769282\tbest: 0.9769282 (800)\ttotal: 1m 27s\tremaining: 21.8s\n",
"999:\ttest: 0.9772636\tbest: 0.9772636 (999)\ttotal: 1m 55s\tremaining: 0us\n",
"0:\ttest: 0.9522383\tbest: 0.9522383 (0)\ttotal: 371ms\tremaining: 6m 10s\n",
"200:\ttest: 0.9749033\tbest: 0.9749426 (188)\ttotal: 15.7s\tremaining: 1m 2s\n",
"400:\ttest: 0.9751700\tbest: 0.9753029 (270)\ttotal: 32.3s\tremaining: 48.2s\n",
"600:\ttest: 0.9757035\tbest: 0.9757052 (599)\ttotal: 47.2s\tremaining: 31.3s\n",
"800:\ttest: 0.9760228\tbest: 0.9760585 (762)\ttotal: 1m 2s\tremaining: 15.5s\n",
"999:\ttest: 0.9761958\tbest: 0.9762119 (990)\ttotal: 1m 17s\tremaining: 0us\n",
"\n",
"bestTest = 0.9772635842\n",
"bestIteration = 999\n",
"\n"
"bestTest = 0.9762119056\n",
"bestIteration = 990\n",
"\n",
"Shrink model to first 991 iterations.\n"
]
},
{
"data": {
"text/plain": [
"<catboost.core.CatBoostClassifier at 0x23afa652260>"
"<catboost.core.CatBoostClassifier at 0x1cb3665ba30>"
]
},
"execution_count": 14,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@ -815,7 +981,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 12,
"metadata": {},
"outputs": [
{
@ -864,34 +1030,34 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[I 2025-03-20 15:00:41,602] A new study created in memory with name: no-name-25d292cb-21f5-4296-b4f8-60550f9419f3\n",
"[I 2025-03-20 15:02:01,024] Trial 0 finished with value: 0.9732495439604305 and parameters: {'iterations': 885, 'learning_rate': 0.014037075669208683, 'depth': 6, 'subsample': 0.7347642926574603, 'colsample_bylevel': 0.628056031091488, 'l2_leaf_reg': 18.77314488035107, 'random_strength': 7.563973089971057}. Best is trial 0 with value: 0.9732495439604305.\n",
"[I 2025-03-20 15:03:17,085] Trial 1 finished with value: 0.9711497549205443 and parameters: {'iterations': 708, 'learning_rate': 0.012363987021937311, 'depth': 6, 'subsample': 0.5278155628504972, 'colsample_bylevel': 0.5923839431004807, 'l2_leaf_reg': 13.672040817303856, 'random_strength': 5.047595540352549}. Best is trial 0 with value: 0.9732495439604305.\n",
"[I 2025-03-20 15:04:48,051] Trial 2 finished with value: 0.9648459277561962 and parameters: {'iterations': 993, 'learning_rate': 0.001016519352919797, 'depth': 6, 'subsample': 0.7618891207779287, 'colsample_bylevel': 0.7088194762415717, 'l2_leaf_reg': 15.900968017238345, 'random_strength': 8.689884293115323}. Best is trial 0 with value: 0.9732495439604305.\n",
"[I 2025-03-20 15:05:54,013] Trial 3 finished with value: 0.9774063056111822 and parameters: {'iterations': 667, 'learning_rate': 0.053616906724399574, 'depth': 6, 'subsample': 0.5295054888915093, 'colsample_bylevel': 0.5919019897109834, 'l2_leaf_reg': 7.4789520770307085, 'random_strength': 9.983783900508625}. Best is trial 3 with value: 0.9774063056111822.\n",
"[I 2025-03-20 15:06:51,248] Trial 4 finished with value: 0.968719922573624 and parameters: {'iterations': 588, 'learning_rate': 0.012727034718753798, 'depth': 6, 'subsample': 0.6426790020391226, 'colsample_bylevel': 0.5191821820085022, 'l2_leaf_reg': 15.153416708004082, 'random_strength': 5.999182374601503}. Best is trial 3 with value: 0.9774063056111822.\n",
"[I 2025-03-20 15:08:16,470] Trial 5 finished with value: 0.9776203877562853 and parameters: {'iterations': 764, 'learning_rate': 0.07266083579909825, 'depth': 5, 'subsample': 0.5191615563488426, 'colsample_bylevel': 0.7302518018135663, 'l2_leaf_reg': 6.949740475447557, 'random_strength': 8.548754890547242}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:09:10,733] Trial 6 finished with value: 0.9675852872045778 and parameters: {'iterations': 731, 'learning_rate': 0.013438903268682896, 'depth': 4, 'subsample': 0.5131276397700636, 'colsample_bylevel': 0.6859814430568429, 'l2_leaf_reg': 14.057136374487015, 'random_strength': 8.372349610104749}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:10:01,815] Trial 7 finished with value: 0.977360813155348 and parameters: {'iterations': 642, 'learning_rate': 0.062155964429978755, 'depth': 5, 'subsample': 0.7661053749507193, 'colsample_bylevel': 0.6922388149758989, 'l2_leaf_reg': 5.284164428504138, 'random_strength': 6.232456641121701}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:10:59,528] Trial 8 finished with value: 0.9653231525379885 and parameters: {'iterations': 754, 'learning_rate': 0.00208109923189855, 'depth': 5, 'subsample': 0.596137479445154, 'colsample_bylevel': 0.7574060768484172, 'l2_leaf_reg': 11.210689475672972, 'random_strength': 7.219680822486467}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:12:14,806] Trial 9 finished with value: 0.9645587008781827 and parameters: {'iterations': 797, 'learning_rate': 0.002876994974206422, 'depth': 6, 'subsample': 0.7623605250175415, 'colsample_bylevel': 0.5792373869617546, 'l2_leaf_reg': 11.933900864140389, 'random_strength': 9.537789817015767}. Best is trial 5 with value: 0.9776203877562853.\n",
"[I 2025-03-20 15:13:20,035] Trial 10 finished with value: 0.9777185087394575 and parameters: {'iterations': 843, 'learning_rate': 0.09347955751894015, 'depth': 4, 'subsample': 0.5919290139946178, 'colsample_bylevel': 0.7823226761425409, 'l2_leaf_reg': 8.906630269811362, 'random_strength': 8.822275155143554}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:14:26,950] Trial 11 finished with value: 0.9772716122615548 and parameters: {'iterations': 866, 'learning_rate': 0.09609340816462177, 'depth': 4, 'subsample': 0.588006292400181, 'colsample_bylevel': 0.7921811422710393, 'l2_leaf_reg': 8.754759631995423, 'random_strength': 8.823729815067631}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:15:31,960] Trial 12 finished with value: 0.9770432579734449 and parameters: {'iterations': 842, 'learning_rate': 0.03150622304414378, 'depth': 4, 'subsample': 0.5815107101504045, 'colsample_bylevel': 0.7526738793511021, 'l2_leaf_reg': 8.96571033386253, 'random_strength': 7.901521206622082}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:16:19,764] Trial 13 finished with value: 0.9762975385013358 and parameters: {'iterations': 500, 'learning_rate': 0.030813140864715382, 'depth': 5, 'subsample': 0.6807936806371823, 'colsample_bylevel': 0.7992043286169884, 'l2_leaf_reg': 6.54189325170135, 'random_strength': 9.2194946118034}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:17:30,598] Trial 14 finished with value: 0.9771627871711274 and parameters: {'iterations': 960, 'learning_rate': 0.08725168283234642, 'depth': 4, 'subsample': 0.5611916358409927, 'colsample_bylevel': 0.7407206600062527, 'l2_leaf_reg': 10.877310276695814, 'random_strength': 8.279759834834062}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:18:55,623] Trial 15 finished with value: 0.9773510010570307 and parameters: {'iterations': 921, 'learning_rate': 0.030478650701436915, 'depth': 5, 'subsample': 0.6426668840891531, 'colsample_bylevel': 0.6580544384365046, 'l2_leaf_reg': 9.275448656004336, 'random_strength': 7.054540552553462}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:20:03,449] Trial 16 finished with value: 0.9770013335533622 and parameters: {'iterations': 802, 'learning_rate': 0.04717986995915269, 'depth': 4, 'subsample': 0.5502437476216447, 'colsample_bylevel': 0.7293227273801556, 'l2_leaf_reg': 5.910885625677492, 'random_strength': 9.224856294634591}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:21:08,738] Trial 17 finished with value: 0.9664417317461521 and parameters: {'iterations': 794, 'learning_rate': 0.005786331156808205, 'depth': 5, 'subsample': 0.5027107752205993, 'colsample_bylevel': 0.7639160019272435, 'l2_leaf_reg': 7.568889551395079, 'random_strength': 7.889004164110706}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:22:13,898] Trial 18 finished with value: 0.9765954694866041 and parameters: {'iterations': 919, 'learning_rate': 0.02240626498044735, 'depth': 4, 'subsample': 0.6106354847964582, 'colsample_bylevel': 0.6625660136271165, 'l2_leaf_reg': 10.544957798217377, 'random_strength': 6.657969347763323}. Best is trial 10 with value: 0.9777185087394575.\n",
"[I 2025-03-20 15:23:05,997] Trial 19 finished with value: 0.9776159277115958 and parameters: {'iterations': 597, 'learning_rate': 0.07175908110255513, 'depth': 5, 'subsample': 0.681492227991095, 'colsample_bylevel': 0.7774154754002688, 'l2_leaf_reg': 7.472076834725921, 'random_strength': 9.99934503292809}. Best is trial 10 with value: 0.9777185087394575.\n"
"[I 2025-04-15 14:46:04,525] A new study created in memory with name: no-name-a82f1532-93fd-444e-a2a9-e64f1ab1581d\n",
"[I 2025-04-15 14:46:47,591] Trial 0 finished with value: 0.9760700762221638 and parameters: {'iterations': 879, 'learning_rate': 0.03194622545083888, 'depth': 4, 'subsample': 0.6984770397557132, 'colsample_bylevel': 0.6107784129933028, 'l2_leaf_reg': 19.31004315136751, 'random_strength': 8.538025613654153}. Best is trial 0 with value: 0.9760700762221638.\n",
"[I 2025-04-15 14:47:49,613] Trial 1 finished with value: 0.968974145120934 and parameters: {'iterations': 987, 'learning_rate': 0.00497466285240473, 'depth': 6, 'subsample': 0.7324064062513642, 'colsample_bylevel': 0.5860390386187008, 'l2_leaf_reg': 13.6700598295073, 'random_strength': 8.68535276828941}. Best is trial 0 with value: 0.9760700762221638.\n",
"[I 2025-04-15 14:48:52,501] Trial 2 finished with value: 0.9771271068136104 and parameters: {'iterations': 958, 'learning_rate': 0.029644496717173407, 'depth': 5, 'subsample': 0.7324841402342017, 'colsample_bylevel': 0.7540959396418829, 'l2_leaf_reg': 7.126960456494959, 'random_strength': 7.4284219069859745}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:49:44,391] Trial 3 finished with value: 0.9687270586451275 and parameters: {'iterations': 778, 'learning_rate': 0.002458862957591489, 'depth': 6, 'subsample': 0.5532293644121917, 'colsample_bylevel': 0.5854134727534657, 'l2_leaf_reg': 16.679333851474986, 'random_strength': 7.393624819420116}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:50:18,713] Trial 4 finished with value: 0.9763555190823012 and parameters: {'iterations': 562, 'learning_rate': 0.036725493598596365, 'depth': 5, 'subsample': 0.7722133171879484, 'colsample_bylevel': 0.6532823306754292, 'l2_leaf_reg': 12.030754990066507, 'random_strength': 8.877550003001012}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:51:17,350] Trial 5 finished with value: 0.968570065072052 and parameters: {'iterations': 962, 'learning_rate': 0.0011370325299877532, 'depth': 5, 'subsample': 0.6728515618578689, 'colsample_bylevel': 0.6826782056061581, 'l2_leaf_reg': 13.623633572161166, 'random_strength': 6.601819735133423}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:51:57,207] Trial 6 finished with value: 0.9687047584216792 and parameters: {'iterations': 617, 'learning_rate': 0.0015432741560526652, 'depth': 5, 'subsample': 0.7499204041791451, 'colsample_bylevel': 0.7446512678837157, 'l2_leaf_reg': 10.072108741396685, 'random_strength': 7.00923150345308}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:52:37,654] Trial 7 finished with value: 0.976307350599653 and parameters: {'iterations': 619, 'learning_rate': 0.09249177862479464, 'depth': 5, 'subsample': 0.5961272939306226, 'colsample_bylevel': 0.6454652012357972, 'l2_leaf_reg': 13.952857635588412, 'random_strength': 5.3135323404658195}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:53:16,002] Trial 8 finished with value: 0.9680910562723839 and parameters: {'iterations': 668, 'learning_rate': 0.0017846267770558202, 'depth': 5, 'subsample': 0.6091299857459994, 'colsample_bylevel': 0.5146530801783137, 'l2_leaf_reg': 6.463177992505614, 'random_strength': 9.83403822639651}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:54:07,117] Trial 9 finished with value: 0.9701837092407667 and parameters: {'iterations': 899, 'learning_rate': 0.009675766544107381, 'depth': 5, 'subsample': 0.7887519851916923, 'colsample_bylevel': 0.7583643579850647, 'l2_leaf_reg': 12.884965304659517, 'random_strength': 7.548006066067643}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:54:49,239] Trial 10 finished with value: 0.9754644021533097 and parameters: {'iterations': 777, 'learning_rate': 0.024122661029117063, 'depth': 4, 'subsample': 0.5013447329900989, 'colsample_bylevel': 0.7800307305883676, 'l2_leaf_reg': 5.638932266363305, 'random_strength': 5.942927125428197}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:55:16,702] Trial 11 finished with value: 0.9764081476096389 and parameters: {'iterations': 508, 'learning_rate': 0.04726537771631546, 'depth': 4, 'subsample': 0.7937595144792454, 'colsample_bylevel': 0.6888257437132431, 'l2_leaf_reg': 9.252398775691933, 'random_strength': 8.42482116769386}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:55:43,712] Trial 12 finished with value: 0.9769504890439002 and parameters: {'iterations': 520, 'learning_rate': 0.07517500503326581, 'depth': 4, 'subsample': 0.7116563324529114, 'colsample_bylevel': 0.7144947743480992, 'l2_leaf_reg': 8.350160243659916, 'random_strength': 8.168242005926878}. Best is trial 2 with value: 0.9771271068136104.\n",
"[I 2025-04-15 14:56:21,069] Trial 13 finished with value: 0.9774294978435685 and parameters: {'iterations': 703, 'learning_rate': 0.06983698579391724, 'depth': 4, 'subsample': 0.7065367059268508, 'colsample_bylevel': 0.724103178768751, 'l2_leaf_reg': 8.023903029922751, 'random_strength': 7.928311734643061}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 14:57:09,353] Trial 14 finished with value: 0.9731246627091203 and parameters: {'iterations': 724, 'learning_rate': 0.015490359934301434, 'depth': 6, 'subsample': 0.6446878985673278, 'colsample_bylevel': 0.7920667146800474, 'l2_leaf_reg': 7.371821145261041, 'random_strength': 6.451885650217026}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 14:57:52,357] Trial 15 finished with value: 0.9738400538773399 and parameters: {'iterations': 859, 'learning_rate': 0.014795165553926048, 'depth': 4, 'subsample': 0.665044880376952, 'colsample_bylevel': 0.7290363615996102, 'l2_leaf_reg': 5.2279489954943, 'random_strength': 9.524780688568056}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 14:58:30,199] Trial 16 finished with value: 0.9766686142195145 and parameters: {'iterations': 722, 'learning_rate': 0.058843073710673745, 'depth': 4, 'subsample': 0.7070259824991089, 'colsample_bylevel': 0.7046723312898295, 'l2_leaf_reg': 9.886755559850055, 'random_strength': 7.515414918520503}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 14:59:29,101] Trial 17 finished with value: 0.9759648191674881 and parameters: {'iterations': 836, 'learning_rate': 0.0201164375942546, 'depth': 6, 'subsample': 0.7464329768357236, 'colsample_bylevel': 0.751756591889216, 'l2_leaf_reg': 11.194529638284472, 'random_strength': 7.890747626459661}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 15:00:13,792] Trial 18 finished with value: 0.9691284626671959 and parameters: {'iterations': 941, 'learning_rate': 0.008139386305286263, 'depth': 4, 'subsample': 0.6324467522547773, 'colsample_bylevel': 0.6640606522826965, 'l2_leaf_reg': 7.637155074182216, 'random_strength': 9.118659113863696}. Best is trial 13 with value: 0.9774294978435685.\n",
"[I 2025-04-15 15:01:04,807] Trial 19 finished with value: 0.9772921284671273 and parameters: {'iterations': 812, 'learning_rate': 0.047376913098252045, 'depth': 5, 'subsample': 0.6838122071503733, 'colsample_bylevel': 0.7976712282377681, 'l2_leaf_reg': 15.51559416611185, 'random_strength': 6.860978796602465}. Best is trial 13 with value: 0.9774294978435685.\n"
]
},
{
@ -899,15 +1065,15 @@
"output_type": "stream",
"text": [
"Best Trial:\n",
"AUC: 0.9777185087394575\n",
"AUC: 0.9774294978435685\n",
"Params:\n",
" iterations: 843\n",
" learning_rate: 0.09347955751894015\n",
" iterations: 703\n",
" learning_rate: 0.06983698579391724\n",
" depth: 4\n",
" subsample: 0.5919290139946178\n",
" colsample_bylevel: 0.7823226761425409\n",
" l2_leaf_reg: 8.906630269811362\n",
" random_strength: 8.822275155143554\n"
" subsample: 0.7065367059268508\n",
" colsample_bylevel: 0.724103178768751\n",
" l2_leaf_reg: 8.023903029922751\n",
" random_strength: 7.928311734643061\n"
]
}
],
@ -926,24 +1092,22 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\ttest: 0.9345234\tbest: 0.9345234 (0)\ttotal: 67.6ms\tremaining: 56.9s\n",
"100:\ttest: 0.9676504\tbest: 0.9677699 (99)\ttotal: 7.48s\tremaining: 55s\n",
"200:\ttest: 0.9763805\tbest: 0.9763805 (200)\ttotal: 14.6s\tremaining: 46.6s\n",
"300:\ttest: 0.9768756\tbest: 0.9770299 (281)\ttotal: 21.1s\tremaining: 38s\n",
"0:\ttest: 0.8633242\tbest: 0.8633242 (0)\ttotal: 105ms\tremaining: 1m 13s\n",
"100:\ttest: 0.9693666\tbest: 0.9696128 (84)\ttotal: 4.78s\tremaining: 28.5s\n",
"Stopped by overfitting detector (50 iterations wait)\n",
"\n",
"bestTest = 0.9770298778\n",
"bestIteration = 281\n",
"bestTest = 0.9696128235\n",
"bestIteration = 84\n",
"\n",
"Shrink model to first 282 iterations.\n",
"Learn AUC: 0.9921 | Test AUC: 0.9770\n"
"Shrink model to first 85 iterations.\n",
"Learn AUC: 0.9742 | Test AUC: 0.9696\n"
]
}
],
@ -989,7 +1153,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [

View File

@ -237,6 +237,18 @@
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = df.drop(columns=['active_work_category', 'work_stability_score', \n",
" 'position_score', 'job_income_position_score',\n",
" 'education_score', 'education_income_ratio',\n",
" 'weighted_satisfaction_performance'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"X = df.drop(columns=['active_work_months','churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date'])\n",
"y = df['active_work_months']\n",
@ -253,7 +265,7 @@
"y_valid = valid_data['active_work_months']\n",
"\n",
"cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating',\n",
" 'education', 'active_work_category', 'jenis_kelamin']"
" 'education', 'jenis_kelamin']"
]
},
{
@ -334,21 +346,21 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\tlearn: 14.5009742\ttest: 20.7509764\tbest: 20.7509764 (0)\ttotal: 299ms\tremaining: 4m 58s\n",
"200:\tlearn: 2.3680725\ttest: 3.7582712\tbest: 3.7582712 (200)\ttotal: 23.4s\tremaining: 1m 33s\n",
"400:\tlearn: 0.6074381\ttest: 1.2438678\tbest: 1.2438678 (400)\ttotal: 39s\tremaining: 58.2s\n",
"600:\tlearn: 0.3585168\ttest: 0.6968303\tbest: 0.6968303 (600)\ttotal: 50.6s\tremaining: 33.6s\n",
"800:\tlearn: 0.3105016\ttest: 0.5057286\tbest: 0.5057286 (800)\ttotal: 1m 7s\tremaining: 16.7s\n",
"999:\tlearn: 0.2918692\ttest: 0.4375722\tbest: 0.4375722 (999)\ttotal: 1m 26s\tremaining: 0us\n",
"0:\tlearn: 14.5012046\ttest: 20.7328222\tbest: 20.7328222 (0)\ttotal: 274ms\tremaining: 4m 33s\n",
"200:\tlearn: 2.3174890\ttest: 3.6207810\tbest: 3.6207810 (200)\ttotal: 13.5s\tremaining: 53.6s\n",
"400:\tlearn: 0.5080249\ttest: 0.7918809\tbest: 0.7918809 (400)\ttotal: 27.5s\tremaining: 41.1s\n",
"600:\tlearn: 0.3180844\ttest: 0.3664973\tbest: 0.3664973 (600)\ttotal: 42.6s\tremaining: 28.3s\n",
"800:\tlearn: 0.2992983\ttest: 0.3231799\tbest: 0.3231799 (800)\ttotal: 55.5s\tremaining: 13.8s\n",
"999:\tlearn: 0.2885793\ttest: 0.3135297\tbest: 0.3135297 (999)\ttotal: 1m 7s\tremaining: 0us\n",
"\n",
"bestTest = 0.4375721622\n",
"bestTest = 0.3135296768\n",
"bestIteration = 999\n",
"\n"
]
@ -356,10 +368,10 @@
{
"data": {
"text/plain": [
"<catboost.core.CatBoostRegressor at 0x2204d48bd60>"
"<catboost.core.CatBoostRegressor at 0x26bbda6b3d0>"
]
},
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -549,29 +561,27 @@
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\Tugas Akhir\\Codingan\\Development\\App\\.venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"[I 2025-03-20 21:00:35,783] A new study created in memory with name: no-name-90c20206-17ac-4d18-b328-3e83b0b68b02\n",
"[I 2025-03-20 21:01:21,239] Trial 0 finished with value: 8.419168492565904 and parameters: {'iterations': 829, 'learning_rate': 0.0015555213398614615, 'depth': 5, 'subsample': 0.7218038924225667, 'colsample_bylevel': 0.6317251084903674, 'l2_leaf_reg': 9.824596206043214, 'random_strength': 5.86501034445993}. Best is trial 0 with value: 8.419168492565904.\n",
"[I 2025-03-20 21:02:07,180] Trial 1 finished with value: 2.733330716926934 and parameters: {'iterations': 884, 'learning_rate': 0.0053516318860709, 'depth': 5, 'subsample': 0.7097793314114498, 'colsample_bylevel': 0.6435980070119711, 'l2_leaf_reg': 13.121533573607655, 'random_strength': 9.324119789288343}. Best is trial 1 with value: 2.733330716926934.\n",
"[I 2025-03-20 21:02:32,264] Trial 2 finished with value: 1.1897665017142585 and parameters: {'iterations': 635, 'learning_rate': 0.023704951537819915, 'depth': 4, 'subsample': 0.7686389413016427, 'colsample_bylevel': 0.5266671315496564, 'l2_leaf_reg': 15.175290458163712, 'random_strength': 7.30745741132724}. Best is trial 2 with value: 1.1897665017142585.\n",
"[I 2025-03-20 21:03:16,738] Trial 3 finished with value: 1.7154507371626437 and parameters: {'iterations': 782, 'learning_rate': 0.012722641891947339, 'depth': 5, 'subsample': 0.6239927785252992, 'colsample_bylevel': 0.551697952957819, 'l2_leaf_reg': 8.011605642650462, 'random_strength': 7.351190134542403}. Best is trial 2 with value: 1.1897665017142585.\n",
"[I 2025-03-20 21:03:56,278] Trial 4 finished with value: 1.0342169780611064 and parameters: {'iterations': 590, 'learning_rate': 0.026972969679793426, 'depth': 5, 'subsample': 0.7586099689360802, 'colsample_bylevel': 0.6775428452906951, 'l2_leaf_reg': 8.716335425228655, 'random_strength': 8.009471482166916}. Best is trial 4 with value: 1.0342169780611064.\n",
"[I 2025-03-20 21:04:38,974] Trial 5 finished with value: 0.9367588642619448 and parameters: {'iterations': 806, 'learning_rate': 0.025597264277354852, 'depth': 5, 'subsample': 0.6503807167080523, 'colsample_bylevel': 0.6858283170142669, 'l2_leaf_reg': 15.122433690868604, 'random_strength': 9.26015271022034}. Best is trial 5 with value: 0.9367588642619448.\n",
"[I 2025-03-20 21:05:17,061] Trial 6 finished with value: 3.3099767746788875 and parameters: {'iterations': 789, 'learning_rate': 0.005334295156738072, 'depth': 4, 'subsample': 0.6150191027518872, 'colsample_bylevel': 0.5100653940971351, 'l2_leaf_reg': 16.26273857150823, 'random_strength': 9.363327197652792}. Best is trial 5 with value: 0.9367588642619448.\n",
"[I 2025-03-20 21:06:11,133] Trial 7 finished with value: 2.430297164863871 and parameters: {'iterations': 837, 'learning_rate': 0.006452050997341309, 'depth': 5, 'subsample': 0.5569957978664557, 'colsample_bylevel': 0.7974026616845746, 'l2_leaf_reg': 17.640163162948227, 'random_strength': 8.93735519303648}. Best is trial 5 with value: 0.9367588642619448.\n",
"[I 2025-03-20 21:06:40,233] Trial 8 finished with value: 3.059791812580938 and parameters: {'iterations': 590, 'learning_rate': 0.006568464895546362, 'depth': 5, 'subsample': 0.5764687596920144, 'colsample_bylevel': 0.5755754855362132, 'l2_leaf_reg': 8.522648009171846, 'random_strength': 9.538382092752938}. Best is trial 5 with value: 0.9367588642619448.\n",
"[I 2025-03-20 21:07:13,639] Trial 9 finished with value: 7.76817781695066 and parameters: {'iterations': 617, 'learning_rate': 0.002539968080520203, 'depth': 4, 'subsample': 0.6372121192487497, 'colsample_bylevel': 0.6082864452448382, 'l2_leaf_reg': 17.654618698706546, 'random_strength': 9.459556159452063}. Best is trial 5 with value: 0.9367588642619448.\n",
"[I 2025-03-20 21:08:20,357] Trial 10 finished with value: 0.4103763995321103 and parameters: {'iterations': 979, 'learning_rate': 0.09529892832775133, 'depth': 6, 'subsample': 0.5209516193113186, 'colsample_bylevel': 0.7296584784356337, 'l2_leaf_reg': 5.36555480808817, 'random_strength': 5.055795530221549}. Best is trial 10 with value: 0.4103763995321103.\n",
"[I 2025-03-20 21:09:34,792] Trial 11 finished with value: 0.33873805409112234 and parameters: {'iterations': 989, 'learning_rate': 0.07851963900488154, 'depth': 6, 'subsample': 0.5200611359234908, 'colsample_bylevel': 0.7185209948848563, 'l2_leaf_reg': 5.259953652103869, 'random_strength': 5.611812114220319}. Best is trial 11 with value: 0.33873805409112234.\n",
"[I 2025-03-20 21:10:53,294] Trial 12 finished with value: 0.33747220251551263 and parameters: {'iterations': 1000, 'learning_rate': 0.09306028108980487, 'depth': 6, 'subsample': 0.5034416033403175, 'colsample_bylevel': 0.7452683981181829, 'l2_leaf_reg': 5.279315402542746, 'random_strength': 5.294692039250562}. Best is trial 12 with value: 0.33747220251551263.\n",
"[I 2025-03-20 21:11:51,728] Trial 13 finished with value: 1.6267506473342959 and parameters: {'iterations': 996, 'learning_rate': 0.09325178395683269, 'depth': 6, 'subsample': 0.5125699226468535, 'colsample_bylevel': 0.756377904971589, 'l2_leaf_reg': 5.519312813157406, 'random_strength': 6.120809817278766}. Best is trial 12 with value: 0.33747220251551263.\n",
"[I 2025-03-20 21:12:42,167] Trial 14 finished with value: 0.4564504106167195 and parameters: {'iterations': 919, 'learning_rate': 0.052020741738658456, 'depth': 6, 'subsample': 0.5007317413228829, 'colsample_bylevel': 0.727489010605425, 'l2_leaf_reg': 11.083178624062024, 'random_strength': 5.261787256931901}. Best is trial 12 with value: 0.33747220251551263.\n",
"[I 2025-03-20 21:13:32,464] Trial 15 finished with value: 0.6231066699606179 and parameters: {'iterations': 696, 'learning_rate': 0.04926421412297825, 'depth': 6, 'subsample': 0.5528336472517523, 'colsample_bylevel': 0.7710394282985423, 'l2_leaf_reg': 6.189751630816335, 'random_strength': 6.502158303659374}. Best is trial 12 with value: 0.33747220251551263.\n",
"[I 2025-03-20 21:14:53,119] Trial 16 finished with value: 0.4792492732081974 and parameters: {'iterations': 930, 'learning_rate': 0.04640033657256652, 'depth': 6, 'subsample': 0.5859719980036684, 'colsample_bylevel': 0.700113722637731, 'l2_leaf_reg': 7.099786946755341, 'random_strength': 6.688129574746723}. Best is trial 12 with value: 0.33747220251551263.\n",
"[I 2025-03-20 21:15:49,049] Trial 17 finished with value: 1.61841931396652 and parameters: {'iterations': 518, 'learning_rate': 0.015200955659638398, 'depth': 6, 'subsample': 0.534977261704631, 'colsample_bylevel': 0.7246250429139481, 'l2_leaf_reg': 19.80600077744929, 'random_strength': 5.673411846650424}. Best is trial 12 with value: 0.33747220251551263.\n",
"[I 2025-03-20 21:17:36,811] Trial 18 finished with value: 0.5548288731588591 and parameters: {'iterations': 938, 'learning_rate': 0.07194118095780964, 'depth': 6, 'subsample': 0.6665507151821397, 'colsample_bylevel': 0.790138958827564, 'l2_leaf_reg': 12.301321434830928, 'random_strength': 6.796168129707886}. Best is trial 12 with value: 0.33747220251551263.\n",
"[I 2025-03-20 21:18:43,121] Trial 19 finished with value: 0.6231757616191587 and parameters: {'iterations': 719, 'learning_rate': 0.03682071614516647, 'depth': 6, 'subsample': 0.5866796565002883, 'colsample_bylevel': 0.7504213556109071, 'l2_leaf_reg': 10.442003723130012, 'random_strength': 8.36360703538023}. Best is trial 12 with value: 0.33747220251551263.\n"
"[I 2025-04-15 15:08:11,750] A new study created in memory with name: no-name-7d5ebf34-fc55-4b59-b90c-b22b64311951\n",
"[I 2025-04-15 15:08:48,891] Trial 0 finished with value: 0.646062322615293 and parameters: {'iterations': 898, 'learning_rate': 0.03145083099289301, 'depth': 4, 'subsample': 0.6444105892248361, 'colsample_bylevel': 0.6186096575774811, 'l2_leaf_reg': 17.64514783498297, 'random_strength': 9.88786820068755}. Best is trial 0 with value: 0.646062322615293.\n",
"[I 2025-04-15 15:09:26,486] Trial 1 finished with value: 0.9184440015716631 and parameters: {'iterations': 897, 'learning_rate': 0.011291887138100785, 'depth': 4, 'subsample': 0.7769524548266592, 'colsample_bylevel': 0.5647743432086197, 'l2_leaf_reg': 7.76546189160343, 'random_strength': 7.668666969032969}. Best is trial 0 with value: 0.646062322615293.\n",
"[I 2025-04-15 15:09:52,174] Trial 2 finished with value: 5.984384729372321 and parameters: {'iterations': 598, 'learning_rate': 0.004160874913367698, 'depth': 4, 'subsample': 0.5118790113991489, 'colsample_bylevel': 0.5110086574656876, 'l2_leaf_reg': 18.661041022397058, 'random_strength': 5.943307589760212}. Best is trial 0 with value: 0.646062322615293.\n",
"[I 2025-04-15 15:10:26,575] Trial 3 finished with value: 9.228750640503423 and parameters: {'iterations': 682, 'learning_rate': 0.0021975421219896157, 'depth': 5, 'subsample': 0.6755358151480091, 'colsample_bylevel': 0.6596729110914161, 'l2_leaf_reg': 14.736656536586944, 'random_strength': 7.136912007339907}. Best is trial 0 with value: 0.646062322615293.\n",
"[I 2025-04-15 15:11:10,104] Trial 4 finished with value: 0.6227136479365861 and parameters: {'iterations': 895, 'learning_rate': 0.019358429409891725, 'depth': 5, 'subsample': 0.6733355260936932, 'colsample_bylevel': 0.6148187038915378, 'l2_leaf_reg': 12.647440499487868, 'random_strength': 9.155074074386231}. Best is trial 4 with value: 0.6227136479365861.\n",
"[I 2025-04-15 15:11:42,996] Trial 5 finished with value: 2.612658773597153 and parameters: {'iterations': 607, 'learning_rate': 0.0064697585951560485, 'depth': 6, 'subsample': 0.5164557295530797, 'colsample_bylevel': 0.6841029537714532, 'l2_leaf_reg': 11.459664006812375, 'random_strength': 5.575562702191829}. Best is trial 4 with value: 0.6227136479365861.\n",
"[I 2025-04-15 15:12:09,055] Trial 6 finished with value: 5.0563199381852675 and parameters: {'iterations': 634, 'learning_rate': 0.0041378039430250224, 'depth': 4, 'subsample': 0.6296012316767893, 'colsample_bylevel': 0.6928068433595623, 'l2_leaf_reg': 18.60249289606864, 'random_strength': 6.29108320180428}. Best is trial 4 with value: 0.6227136479365861.\n",
"[I 2025-04-15 15:12:48,070] Trial 7 finished with value: 6.064226235109285 and parameters: {'iterations': 659, 'learning_rate': 0.003194121199679894, 'depth': 6, 'subsample': 0.6850485984896206, 'colsample_bylevel': 0.7100729641171083, 'l2_leaf_reg': 11.897665097253151, 'random_strength': 5.260194361969255}. Best is trial 4 with value: 0.6227136479365861.\n",
"[I 2025-04-15 15:13:18,641] Trial 8 finished with value: 9.453945919462235 and parameters: {'iterations': 700, 'learning_rate': 0.0020817037350174448, 'depth': 4, 'subsample': 0.6203198807137237, 'colsample_bylevel': 0.5266772331906592, 'l2_leaf_reg': 9.86926120392112, 'random_strength': 5.474003236157858}. Best is trial 4 with value: 0.6227136479365861.\n",
"[I 2025-04-15 15:14:05,709] Trial 9 finished with value: 0.49267024172790747 and parameters: {'iterations': 687, 'learning_rate': 0.052164593502382166, 'depth': 6, 'subsample': 0.6660663261019623, 'colsample_bylevel': 0.7314001039861265, 'l2_leaf_reg': 17.594770060423073, 'random_strength': 9.339230639028976}. Best is trial 9 with value: 0.49267024172790747.\n",
"[I 2025-04-15 15:14:39,848] Trial 10 finished with value: 0.5500268252840363 and parameters: {'iterations': 509, 'learning_rate': 0.07967431030832754, 'depth': 6, 'subsample': 0.7602272787841288, 'colsample_bylevel': 0.770229071536223, 'l2_leaf_reg': 15.503851488383855, 'random_strength': 8.492987824560748}. Best is trial 9 with value: 0.49267024172790747.\n",
"[I 2025-04-15 15:15:11,853] Trial 11 finished with value: 0.49023595998871994 and parameters: {'iterations': 500, 'learning_rate': 0.08779241979856825, 'depth': 6, 'subsample': 0.796075504979686, 'colsample_bylevel': 0.7976372716940351, 'l2_leaf_reg': 14.93516913511941, 'random_strength': 8.423535620874652}. Best is trial 11 with value: 0.49023595998871994.\n",
"[I 2025-04-15 15:16:03,033] Trial 12 finished with value: 0.49819783655018374 and parameters: {'iterations': 793, 'learning_rate': 0.09384534188388771, 'depth': 6, 'subsample': 0.7184900016067328, 'colsample_bylevel': 0.7836196128120625, 'l2_leaf_reg': 15.663609062290409, 'random_strength': 8.589899472344362}. Best is trial 11 with value: 0.49023595998871994.\n",
"[I 2025-04-15 15:16:29,620] Trial 13 finished with value: 0.4846695605629031 and parameters: {'iterations': 537, 'learning_rate': 0.042877612832129756, 'depth': 5, 'subsample': 0.5706634178949412, 'colsample_bylevel': 0.7465651513848619, 'l2_leaf_reg': 5.218951437778482, 'random_strength': 9.92877520468897}. Best is trial 13 with value: 0.4846695605629031.\n",
"[I 2025-04-15 15:16:57,316] Trial 14 finished with value: 0.7077219444540906 and parameters: {'iterations': 504, 'learning_rate': 0.034174948663442034, 'depth': 5, 'subsample': 0.569558755228507, 'colsample_bylevel': 0.7511709728692211, 'l2_leaf_reg': 7.512089049291371, 'random_strength': 7.737784610626449}. Best is trial 13 with value: 0.4846695605629031.\n",
"[I 2025-04-15 15:17:22,465] Trial 15 finished with value: 0.8736557180076023 and parameters: {'iterations': 560, 'learning_rate': 0.017043234915075773, 'depth': 5, 'subsample': 0.5741018674613553, 'colsample_bylevel': 0.7991676085324451, 'l2_leaf_reg': 5.982967530770232, 'random_strength': 9.756085101353683}. Best is trial 13 with value: 0.4846695605629031.\n",
"[I 2025-04-15 15:18:03,448] Trial 16 finished with value: 0.45211315741628616 and parameters: {'iterations': 779, 'learning_rate': 0.049661228877482096, 'depth': 5, 'subsample': 0.7989351952288142, 'colsample_bylevel': 0.7407581801522412, 'l2_leaf_reg': 5.003342383992051, 'random_strength': 8.391196012503668}. Best is trial 16 with value: 0.45211315741628616.\n",
"[I 2025-04-15 15:18:44,012] Trial 17 finished with value: 12.445283924362869 and parameters: {'iterations': 791, 'learning_rate': 0.0011590379392432504, 'depth': 5, 'subsample': 0.5835792160594921, 'colsample_bylevel': 0.7376020551248276, 'l2_leaf_reg': 5.088499987791439, 'random_strength': 7.132621480885446}. Best is trial 16 with value: 0.45211315741628616.\n",
"[I 2025-04-15 15:19:38,776] Trial 18 finished with value: 0.4335016501614797 and parameters: {'iterations': 970, 'learning_rate': 0.04163009683311559, 'depth': 5, 'subsample': 0.7311095765931528, 'colsample_bylevel': 0.6587798610446518, 'l2_leaf_reg': 7.70099272097691, 'random_strength': 9.134434266536488}. Best is trial 18 with value: 0.4335016501614797.\n",
"[I 2025-04-15 15:20:27,679] Trial 19 finished with value: 0.639097289338391 and parameters: {'iterations': 979, 'learning_rate': 0.012183824359018583, 'depth': 5, 'subsample': 0.7371061668104383, 'colsample_bylevel': 0.6302275501729142, 'l2_leaf_reg': 8.50453575427487, 'random_strength': 9.030709992017943}. Best is trial 18 with value: 0.4335016501614797.\n"
]
},
{
@ -579,8 +589,8 @@
"output_type": "stream",
"text": [
"Best trial:\n",
" RMSE: 0.33747220251551263\n",
" Params: {'iterations': 1000, 'learning_rate': 0.09306028108980487, 'depth': 6, 'subsample': 0.5034416033403175, 'colsample_bylevel': 0.7452683981181829, 'l2_leaf_reg': 5.279315402542746, 'random_strength': 5.294692039250562}\n"
" RMSE: 0.4335016501614797\n",
" Params: {'iterations': 970, 'learning_rate': 0.04163009683311559, 'depth': 5, 'subsample': 0.7311095765931528, 'colsample_bylevel': 0.6587798610446518, 'l2_leaf_reg': 7.70099272097691, 'random_strength': 9.134434266536488}\n"
]
}
],
@ -588,6 +598,7 @@
"import optuna\n",
"from catboost import CatBoostRegressor\n",
"from sklearn.metrics import mean_squared_error\n",
"import numpy as np\n",
"\n",
"# Fungsi objective untuk Optuna\n",
"def objective(trial):\n",
@ -639,18 +650,17 @@
"name": "stdout",
"output_type": "stream",
"text": [
"0:\tlearn: 13.4115622\ttest: 19.0660791\tbest: 19.0660791 (0)\ttotal: 193ms\tremaining: 3m 13s\n",
"200:\tlearn: 0.3985082\ttest: 0.5382811\tbest: 0.5382373 (199)\ttotal: 21.7s\tremaining: 1m 26s\n",
"400:\tlearn: 0.2915505\ttest: 0.4082137\tbest: 0.4080268 (398)\ttotal: 38.8s\tremaining: 57.9s\n",
"600:\tlearn: 0.2452564\ttest: 0.3555501\tbest: 0.3555501 (600)\ttotal: 55.4s\tremaining: 36.8s\n",
"800:\tlearn: 0.2201339\ttest: 0.3435762\tbest: 0.3435629 (786)\ttotal: 1m 16s\tremaining: 18.9s\n",
"999:\tlearn: 0.2028099\ttest: 0.3375486\tbest: 0.3374722 (998)\ttotal: 1m 40s\tremaining: 0us\n",
"0:\tlearn: 14.2026549\ttest: 20.4309858\tbest: 20.4309858 (0)\ttotal: 111ms\tremaining: 1m 47s\n",
"200:\tlearn: 0.7177922\ttest: 1.1667817\tbest: 1.1667817 (200)\ttotal: 10.7s\tremaining: 40.8s\n",
"400:\tlearn: 0.4239619\ttest: 0.5853924\tbest: 0.5853924 (400)\ttotal: 22.1s\tremaining: 31.4s\n",
"600:\tlearn: 0.3515389\ttest: 0.5013024\tbest: 0.5012549 (597)\ttotal: 32.9s\tremaining: 20.2s\n",
"800:\tlearn: 0.3162819\ttest: 0.4551766\tbest: 0.4551445 (798)\ttotal: 44.8s\tremaining: 9.45s\n",
"969:\tlearn: 0.2948077\ttest: 0.4335017\tbest: 0.4335017 (969)\ttotal: 53.6s\tremaining: 0us\n",
"\n",
"bestTest = 0.3374722025\n",
"bestIteration = 998\n",
"bestTest = 0.4335016502\n",
"bestIteration = 969\n",
"\n",
"Shrink model to first 999 iterations.\n",
"Final RMSE: 0.33747220251551263\n"
"Final RMSE: 0.4335016501614797\n"
]
}
],
@ -950,7 +960,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 10,
"metadata": {},
"outputs": [
{

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -13,7 +13,7 @@ import streamlit.components.v1 as components
import time
import json
reg_model = pickle.load(open('regression_model_final.sav', 'rb'))
reg_model = pickle.load(open('regression_model_final_1year.sav', 'rb'))
class_model = pickle.load(open('clasification_final_model_smote.sav', 'rb'))
train_file_path = 'X_train.csv'
@ -81,34 +81,6 @@ def process_employee_data(df):
df["income_dependant_ratio"] = df["income"] / (df["dependant"] + 1)
df["work_efficiency"] = df["avg_time_work"] / 8
def categorize_work_duration_months(months):
if months < 12:
return "Short-term"
elif 12 <= months <= 36:
return "Mid-term"
else:
return "Long-term"
df['active_work_category'] = df['active_work_months'].apply(categorize_work_duration_months)
# Work Stability Score
df['work_stability_score'] = df['active_work_months'] / (df['absent_90D'] + 1)
# Job Income to Position Score
position_score_mapping = {'Junior': 2, 'Staff': 1, 'Senior': 3, 'Manager': 4}
df['position_score'] = df['position'].map(position_score_mapping)
df['job_income_position_score'] = df['income'] / df['position_score']
# Education-Adjusted Income
education_score_mapping = {'SLTA': 1, 'D1': 2, 'D2': 3, 'D3': 4, 'S1': 5, 'S2': 6, 'S3': 7}
df['education_score'] = df['education'].map(education_score_mapping)
df['education_income_ratio'] = df['income'] / df['education_score']
# Weighted Satisfaction-Performance Score
df['weighted_satisfaction_performance'] = (
0.6 * df['job_satisfaction'] + 0.4 * df['performance_rating']
)
job_satisfaction_mapping = {1.0: 'Low', 2.0: 'Medium', 3.0: 'High', 4.0: 'Very High'}
df['job_satisfaction'] = df['job_satisfaction'].map(job_satisfaction_mapping)
@ -383,7 +355,7 @@ def show_prediction():
# Kolom kategori
cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction',
'performance_rating', 'education', 'active_work_category', 'jenis_kelamin']
'performance_rating', 'education', 'jenis_kelamin']
X_test_class = df[expected_columns_class]
X_test_reg = df[expected_columns_reg]

Binary file not shown.