{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" employee_id | \n",
" domisili | \n",
" jenis_kelamin | \n",
" date_of_birth | \n",
" join_date | \n",
" resign_date | \n",
" marriage_stat | \n",
" dependant | \n",
" education | \n",
" absent_90D | \n",
" ... | \n",
" active_work_category | \n",
" work_stability_score | \n",
" married_dependent_ratio | \n",
" position_score | \n",
" job_income_position_score | \n",
" education_score | \n",
" education_income_ratio | \n",
" weighted_satisfaction_performance | \n",
" resign_risk_indicator | \n",
" adjusted_work_time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" EM10510 | \n",
" Kota Jakarta Utara | \n",
" Laki-laki | \n",
" 1983-09-11 | \n",
" 2021-02-09 | \n",
" 2023-06-22 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 9.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.800000 | \n",
" 2 | \n",
" 1 | \n",
" 1418218.0 | \n",
" 1 | \n",
" 1.418218e+06 | \n",
" 2.2 | \n",
" Medium | \n",
" 9.246870 | \n",
"
\n",
" \n",
" 1 | \n",
" EM4322 | \n",
" Kabupaten Bekasi | \n",
" Perempuan | \n",
" 1987-03-22 | \n",
" 2022-02-28 | \n",
" 2023-04-04 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 13.000000 | \n",
" 2 | \n",
" 1 | \n",
" 1060575.0 | \n",
" 1 | \n",
" 1.060575e+06 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.650000 | \n",
"
\n",
" \n",
" 2 | \n",
" EM1637 | \n",
" Kota Jakarta Barat | \n",
" Laki-laki | \n",
" 1970-04-27 | \n",
" 2020-12-23 | \n",
" 2023-03-25 | \n",
" Married | \n",
" 4 | \n",
" D2 | \n",
" 4.0 | \n",
" ... | \n",
" Mid-term | \n",
" 5.400000 | \n",
" 5 | \n",
" 1 | \n",
" 4885136.0 | \n",
" 3 | \n",
" 1.628379e+06 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.813826 | \n",
"
\n",
" \n",
" 3 | \n",
" EM14613 | \n",
" Kota Jakarta Pusat | \n",
" Laki-laki | \n",
" 1988-06-10 | \n",
" 2022-11-21 | \n",
" 2024-03-23 | \n",
" Married | \n",
" 1 | \n",
" D3 | \n",
" 2.0 | \n",
" ... | \n",
" Mid-term | \n",
" 5.333333 | \n",
" 2 | \n",
" 1 | \n",
" 4602479.0 | \n",
" 4 | \n",
" 1.150620e+06 | \n",
" 2.4 | \n",
" Medium | \n",
" 9.756440 | \n",
"
\n",
" \n",
" 4 | \n",
" EM1084 | \n",
" Kabupaten Bogor | \n",
" Perempuan | \n",
" 1977-05-25 | \n",
" 2021-06-07 | \n",
" 2023-07-21 | \n",
" Married | \n",
" 3 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 25.000000 | \n",
" 4 | \n",
" 1 | \n",
" 1066966.0 | \n",
" 1 | \n",
" 1.066966e+06 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.080000 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 37 columns
\n",
"
"
],
"text/plain": [
" employee_id domisili jenis_kelamin date_of_birth join_date \\\n",
"0 EM10510 Kota Jakarta Utara Laki-laki 1983-09-11 2021-02-09 \n",
"1 EM4322 Kabupaten Bekasi Perempuan 1987-03-22 2022-02-28 \n",
"2 EM1637 Kota Jakarta Barat Laki-laki 1970-04-27 2020-12-23 \n",
"3 EM14613 Kota Jakarta Pusat Laki-laki 1988-06-10 2022-11-21 \n",
"4 EM1084 Kabupaten Bogor Perempuan 1977-05-25 2021-06-07 \n",
"\n",
" resign_date marriage_stat dependant education absent_90D ... \\\n",
"0 2023-06-22 Married 1 SLTA 9.0 ... \n",
"1 2023-04-04 Married 1 SLTA 0.0 ... \n",
"2 2023-03-25 Married 4 D2 4.0 ... \n",
"3 2024-03-23 Married 1 D3 2.0 ... \n",
"4 2023-07-21 Married 3 SLTA 0.0 ... \n",
"\n",
" active_work_category work_stability_score married_dependent_ratio \\\n",
"0 Mid-term 2.800000 2 \n",
"1 Mid-term 13.000000 2 \n",
"2 Mid-term 5.400000 5 \n",
"3 Mid-term 5.333333 2 \n",
"4 Mid-term 25.000000 4 \n",
"\n",
" position_score job_income_position_score education_score \\\n",
"0 1 1418218.0 1 \n",
"1 1 1060575.0 1 \n",
"2 1 4885136.0 3 \n",
"3 1 4602479.0 4 \n",
"4 1 1066966.0 1 \n",
"\n",
" education_income_ratio weighted_satisfaction_performance \\\n",
"0 1.418218e+06 2.2 \n",
"1 1.060575e+06 1.6 \n",
"2 1.628379e+06 1.0 \n",
"3 1.150620e+06 2.4 \n",
"4 1.066966e+06 2.6 \n",
"\n",
" resign_risk_indicator adjusted_work_time \n",
"0 Medium 9.246870 \n",
"1 Medium 9.650000 \n",
"2 Medium 9.813826 \n",
"3 Medium 9.756440 \n",
"4 Medium 9.080000 \n",
"\n",
"[5 rows x 37 columns]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv(\"D:/Tugas Akhir/Codingan/Notebook - Playground/preprocessed_data_train_3.csv\")\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13770"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" churn_status | \n",
" Count | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 10696 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 3074 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" churn_status Count\n",
"0 0 10696\n",
"1 1 3074"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"churn = df['churn_status']\n",
"exploded_churn = churn.explode()\n",
"\n",
"churn_count = exploded_churn.value_counts().reset_index()\n",
"churn_count.columns = ['churn_status', 'Count']\n",
"churn_count"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" employee_id | \n",
" domisili | \n",
" jenis_kelamin | \n",
" date_of_birth | \n",
" join_date | \n",
" resign_date | \n",
" marriage_stat | \n",
" dependant | \n",
" education | \n",
" absent_90D | \n",
" ... | \n",
" active_work_category | \n",
" work_stability_score | \n",
" married_dependent_ratio | \n",
" position_score | \n",
" job_income_position_score | \n",
" education_score | \n",
" education_income_ratio | \n",
" weighted_satisfaction_performance | \n",
" resign_risk_indicator | \n",
" adjusted_work_time | \n",
"
\n",
" \n",
" \n",
" \n",
" 12683 | \n",
" EM12967 | \n",
" Kota Jakarta Utara | \n",
" Laki-laki | \n",
" 1998-03-13 | \n",
" 2020-05-03 | \n",
" 2024-07-04 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 7.0 | \n",
" ... | \n",
" Long-term | \n",
" 6.250000 | \n",
" 1 | \n",
" 1 | \n",
" 1776619.0 | \n",
" 1 | \n",
" 1776619.0 | \n",
" 1.6 | \n",
" Low | \n",
" 9.884603 | \n",
"
\n",
" \n",
" 11885 | \n",
" EM8515 | \n",
" Tangerang | \n",
" Laki-laki | \n",
" 2000-03-31 | \n",
" 2022-08-23 | \n",
" 2024-10-31 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 26.000000 | \n",
" 1 | \n",
" 1 | \n",
" 1104966.0 | \n",
" 1 | \n",
" 1104966.0 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.260000 | \n",
"
\n",
" \n",
" 13621 | \n",
" EM9679 | \n",
" Kota Bekasi | \n",
" Perempuan | \n",
" 1975-07-01 | \n",
" 2021-12-06 | \n",
" 2024-10-31 | \n",
" Married | \n",
" 2 | \n",
" S1 | \n",
" 5.0 | \n",
" ... | \n",
" Mid-term | \n",
" 5.833333 | \n",
" 3 | \n",
" 2 | \n",
" 2519458.0 | \n",
" 5 | \n",
" 1007783.2 | \n",
" 2.0 | \n",
" Medium | \n",
" 9.335163 | \n",
"
\n",
" \n",
" 8399 | \n",
" EM15269 | \n",
" Kabupaten Bekasi | \n",
" Laki-laki | \n",
" 1988-01-19 | \n",
" 2022-11-01 | \n",
" 2024-10-31 | \n",
" Married | \n",
" 2 | \n",
" S1 | \n",
" 2.0 | \n",
" ... | \n",
" Mid-term | \n",
" 8.000000 | \n",
" 3 | \n",
" 2 | \n",
" 3015150.5 | \n",
" 5 | \n",
" 1206060.2 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.740976 | \n",
"
\n",
" \n",
" 5525 | \n",
" EM2598 | \n",
" Tangerang | \n",
" Laki-laki | \n",
" 1996-03-11 | \n",
" 2021-06-08 | \n",
" 2024-10-31 | \n",
" Single | \n",
" 0 | \n",
" D3 | \n",
" 4.0 | \n",
" ... | \n",
" Long-term | \n",
" 8.200000 | \n",
" 1 | \n",
" 1 | \n",
" 4488998.0 | \n",
" 4 | \n",
" 1122249.5 | \n",
" 1.0 | \n",
" Low | \n",
" 9.120106 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 37 columns
\n",
"
"
],
"text/plain": [
" employee_id domisili jenis_kelamin date_of_birth join_date \\\n",
"12683 EM12967 Kota Jakarta Utara Laki-laki 1998-03-13 2020-05-03 \n",
"11885 EM8515 Tangerang Laki-laki 2000-03-31 2022-08-23 \n",
"13621 EM9679 Kota Bekasi Perempuan 1975-07-01 2021-12-06 \n",
"8399 EM15269 Kabupaten Bekasi Laki-laki 1988-01-19 2022-11-01 \n",
"5525 EM2598 Tangerang Laki-laki 1996-03-11 2021-06-08 \n",
"\n",
" resign_date marriage_stat dependant education absent_90D ... \\\n",
"12683 2024-07-04 Single 0 SLTA 7.0 ... \n",
"11885 2024-10-31 Single 0 SLTA 0.0 ... \n",
"13621 2024-10-31 Married 2 S1 5.0 ... \n",
"8399 2024-10-31 Married 2 S1 2.0 ... \n",
"5525 2024-10-31 Single 0 D3 4.0 ... \n",
"\n",
" active_work_category work_stability_score married_dependent_ratio \\\n",
"12683 Long-term 6.250000 1 \n",
"11885 Mid-term 26.000000 1 \n",
"13621 Mid-term 5.833333 3 \n",
"8399 Mid-term 8.000000 3 \n",
"5525 Long-term 8.200000 1 \n",
"\n",
" position_score job_income_position_score education_score \\\n",
"12683 1 1776619.0 1 \n",
"11885 1 1104966.0 1 \n",
"13621 2 2519458.0 5 \n",
"8399 2 3015150.5 5 \n",
"5525 1 4488998.0 4 \n",
"\n",
" education_income_ratio weighted_satisfaction_performance \\\n",
"12683 1776619.0 1.6 \n",
"11885 1104966.0 2.6 \n",
"13621 1007783.2 2.0 \n",
"8399 1206060.2 1.0 \n",
"5525 1122249.5 1.0 \n",
"\n",
" resign_risk_indicator adjusted_work_time \n",
"12683 Low 9.884603 \n",
"11885 Medium 9.260000 \n",
"13621 Medium 9.335163 \n",
"8399 Medium 9.740976 \n",
"5525 Low 9.120106 \n",
"\n",
"[5 rows x 37 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.DataFrame()\n",
"for index, row in churn_count.iterrows():\n",
" churn = row['churn_status'] \n",
" count = row['Count']\n",
" if count > 3074:\n",
" filtered_data = df[df['churn_status'] == churn].sample(3074)\n",
" data = pd.concat([data, filtered_data])\n",
"\n",
"for index, row in churn_count.iterrows():\n",
" churn = row['churn_status'] \n",
" count = row['Count']\n",
" if count <= 3074:\n",
" filtered_data = df[df['churn_status'] == churn]\n",
" data = pd.concat([data, filtered_data])\n",
"\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating',\n",
" 'education', 'active_work_category', 'resign_risk_indicator', 'jenis_kelamin']\n",
"\n",
"X = data.drop(columns=['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', 'active_work_months'])\n",
"y = data['churn_status']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" domisili | \n",
" jenis_kelamin | \n",
" marriage_stat | \n",
" dependant | \n",
" education | \n",
" absent_90D | \n",
" avg_time_work | \n",
" departemen | \n",
" position | \n",
" income | \n",
" ... | \n",
" active_work_category | \n",
" work_stability_score | \n",
" married_dependent_ratio | \n",
" position_score | \n",
" job_income_position_score | \n",
" education_score | \n",
" education_income_ratio | \n",
" weighted_satisfaction_performance | \n",
" resign_risk_indicator | \n",
" adjusted_work_time | \n",
"
\n",
" \n",
" \n",
" \n",
" 12683 | \n",
" Kota Jakarta Utara | \n",
" Laki-laki | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 7.0 | \n",
" 9.90 | \n",
" Creative & Design | \n",
" Staff | \n",
" 1776619 | \n",
" ... | \n",
" Long-term | \n",
" 6.250000 | \n",
" 1 | \n",
" 1 | \n",
" 1776619.0 | \n",
" 1 | \n",
" 1776619.0 | \n",
" 1.6 | \n",
" Low | \n",
" 9.884603 | \n",
"
\n",
" \n",
" 11885 | \n",
" Tangerang | \n",
" Laki-laki | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 0.0 | \n",
" 9.26 | \n",
" Operations | \n",
" Staff | \n",
" 1104966 | \n",
" ... | \n",
" Mid-term | \n",
" 26.000000 | \n",
" 1 | \n",
" 1 | \n",
" 1104966.0 | \n",
" 1 | \n",
" 1104966.0 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.260000 | \n",
"
\n",
" \n",
" 13621 | \n",
" Kota Bekasi | \n",
" Perempuan | \n",
" Married | \n",
" 2 | \n",
" S1 | \n",
" 5.0 | \n",
" 9.35 | \n",
" Engineering & IT | \n",
" Junior | \n",
" 5038916 | \n",
" ... | \n",
" Mid-term | \n",
" 5.833333 | \n",
" 3 | \n",
" 2 | \n",
" 2519458.0 | \n",
" 5 | \n",
" 1007783.2 | \n",
" 2.0 | \n",
" Medium | \n",
" 9.335163 | \n",
"
\n",
" \n",
" 8399 | \n",
" Kabupaten Bekasi | \n",
" Laki-laki | \n",
" Married | \n",
" 2 | \n",
" S1 | \n",
" 2.0 | \n",
" 9.75 | \n",
" HR | \n",
" Junior | \n",
" 6030301 | \n",
" ... | \n",
" Mid-term | \n",
" 8.000000 | \n",
" 3 | \n",
" 2 | \n",
" 3015150.5 | \n",
" 5 | \n",
" 1206060.2 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.740976 | \n",
"
\n",
" \n",
" 5525 | \n",
" Tangerang | \n",
" Laki-laki | \n",
" Single | \n",
" 0 | \n",
" D3 | \n",
" 4.0 | \n",
" 9.13 | \n",
" Service & Support | \n",
" Staff | \n",
" 4488998 | \n",
" ... | \n",
" Long-term | \n",
" 8.200000 | \n",
" 1 | \n",
" 1 | \n",
" 4488998.0 | \n",
" 4 | \n",
" 1122249.5 | \n",
" 1.0 | \n",
" Low | \n",
" 9.120106 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 31 columns
\n",
"
"
],
"text/plain": [
" domisili jenis_kelamin marriage_stat dependant education \\\n",
"12683 Kota Jakarta Utara Laki-laki Single 0 SLTA \n",
"11885 Tangerang Laki-laki Single 0 SLTA \n",
"13621 Kota Bekasi Perempuan Married 2 S1 \n",
"8399 Kabupaten Bekasi Laki-laki Married 2 S1 \n",
"5525 Tangerang Laki-laki Single 0 D3 \n",
"\n",
" absent_90D avg_time_work departemen position income ... \\\n",
"12683 7.0 9.90 Creative & Design Staff 1776619 ... \n",
"11885 0.0 9.26 Operations Staff 1104966 ... \n",
"13621 5.0 9.35 Engineering & IT Junior 5038916 ... \n",
"8399 2.0 9.75 HR Junior 6030301 ... \n",
"5525 4.0 9.13 Service & Support Staff 4488998 ... \n",
"\n",
" active_work_category work_stability_score married_dependent_ratio \\\n",
"12683 Long-term 6.250000 1 \n",
"11885 Mid-term 26.000000 1 \n",
"13621 Mid-term 5.833333 3 \n",
"8399 Mid-term 8.000000 3 \n",
"5525 Long-term 8.200000 1 \n",
"\n",
" position_score job_income_position_score education_score \\\n",
"12683 1 1776619.0 1 \n",
"11885 1 1104966.0 1 \n",
"13621 2 2519458.0 5 \n",
"8399 2 3015150.5 5 \n",
"5525 1 4488998.0 4 \n",
"\n",
" education_income_ratio weighted_satisfaction_performance \\\n",
"12683 1776619.0 1.6 \n",
"11885 1104966.0 2.6 \n",
"13621 1007783.2 2.0 \n",
"8399 1206060.2 1.0 \n",
"5525 1122249.5 1.0 \n",
"\n",
" resign_risk_indicator adjusted_work_time \n",
"12683 Low 9.884603 \n",
"11885 Medium 9.260000 \n",
"13621 Medium 9.335163 \n",
"8399 Medium 9.740976 \n",
"5525 Low 9.120106 \n",
"\n",
"[5 rows x 31 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"X.to_csv(r\"D:\\Tugas Akhir\\Codingan\\Development\\App\\X_train.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\ttest: 0.9491030\tbest: 0.9491030 (0)\ttotal: 266ms\tremaining: 4m 26s\n",
"200:\ttest: 0.9793245\tbest: 0.9793245 (200)\ttotal: 14.5s\tremaining: 57.8s\n",
"400:\ttest: 0.9820451\tbest: 0.9820530 (398)\ttotal: 27s\tremaining: 40.3s\n",
"600:\ttest: 0.9827616\tbest: 0.9827616 (597)\ttotal: 40.5s\tremaining: 26.9s\n",
"800:\ttest: 0.9831529\tbest: 0.9831820 (788)\ttotal: 54.9s\tremaining: 13.6s\n",
"999:\ttest: 0.9837425\tbest: 0.9837425 (999)\ttotal: 1m 9s\tremaining: 0us\n",
"\n",
"bestTest = 0.9837424813\n",
"bestIteration = 999\n",
"\n"
]
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from catboost import CatBoostClassifier\n",
"import pandas as pd\n",
"\n",
"model = CatBoostClassifier(\n",
" iterations=1000,\n",
" learning_rate=0.01,\n",
" depth=6,\n",
" cat_features= cat_feature,\n",
" loss_function='Logloss',\n",
" eval_metric='AUC',\n",
" scale_pos_weight=len(y_train[y_train == 0]) / len(y_train[y_train == 1]),\n",
" verbose=200\n",
")\n",
"\n",
"# Melatih model\n",
"model.fit(X_train, y_train, eval_set=(X_test, y_test), use_best_model=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\Tugas Akhir\\Codingan\\Notebook - Playground\\.venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import optuna\n",
"from catboost import CatBoostClassifier\n",
"from sklearn.metrics import roc_auc_score\n",
"\n",
"# Fungsi objective untuk Optuna\n",
"def objective(trial):\n",
" # Definisikan parameter yang akan dioptimasi\n",
" params = {\n",
" 'iterations': trial.suggest_int('iterations', 500, 1000),\n",
" 'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),\n",
" 'depth': trial.suggest_int('depth', 4, 6),\n",
" 'subsample': trial.suggest_float('subsample', 0.5, 0.8),\n",
" 'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.5, 0.8),\n",
" 'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 5, 20),\n",
" 'random_strength': trial.suggest_float('random_strength', 5, 10),\n",
" 'cat_features': cat_feature,\n",
" 'loss_function': 'Logloss',\n",
" 'random_state': 42,\n",
" 'verbose': 0\n",
" }\n",
"\n",
" # Inisialisasi model dengan parameter yang dioptimasi\n",
" model = CatBoostClassifier(**params)\n",
"\n",
" # Melatih model dengan validasi\n",
" model.fit(X_train, y_train, eval_set=(X_test, y_test), use_best_model=True)\n",
"\n",
" # Prediksi probabilitas untuk menghitung AUC\n",
" y_pred = model.predict_proba(X_test)[:, 1]\n",
" auc = roc_auc_score(y_test, y_pred)\n",
"\n",
" return auc # Mengembalikan AUC sebagai skor yang ingin dimaksimalkan"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[I 2025-01-31 18:51:48,706] A new study created in memory with name: no-name-64301d1e-ebb0-4383-9c80-96d0c991cd74\n",
"[I 2025-01-31 18:52:32,766] Trial 0 finished with value: 0.9773838323749091 and parameters: {'iterations': 898, 'learning_rate': 0.011053171335873363, 'depth': 5, 'subsample': 0.6792060501802821, 'colsample_bylevel': 0.677770309507521, 'l2_leaf_reg': 6.799321957299509, 'random_strength': 9.022785536036583}. Best is trial 0 with value: 0.9773838323749091.\n",
"[I 2025-01-31 18:53:04,401] Trial 1 finished with value: 0.9764399497653513 and parameters: {'iterations': 751, 'learning_rate': 0.010856162412518163, 'depth': 4, 'subsample': 0.7382693336863855, 'colsample_bylevel': 0.7186692583510685, 'l2_leaf_reg': 8.362893889778585, 'random_strength': 8.702993428810624}. Best is trial 0 with value: 0.9773838323749091.\n",
"[I 2025-01-31 18:54:04,581] Trial 2 finished with value: 0.9846493489325138 and parameters: {'iterations': 923, 'learning_rate': 0.04798765627548158, 'depth': 6, 'subsample': 0.6754895839217451, 'colsample_bylevel': 0.5237363809797543, 'l2_leaf_reg': 19.582992922585383, 'random_strength': 8.441609405121403}. Best is trial 2 with value: 0.9846493489325138.\n",
"[I 2025-01-31 18:54:58,991] Trial 3 finished with value: 0.9753136360631899 and parameters: {'iterations': 955, 'learning_rate': 0.0015368102043846466, 'depth': 6, 'subsample': 0.7011449309209402, 'colsample_bylevel': 0.7103505124550098, 'l2_leaf_reg': 5.113987964193796, 'random_strength': 8.460971358767354}. Best is trial 2 with value: 0.9846493489325138.\n",
"[I 2025-01-31 18:55:25,136] Trial 4 finished with value: 0.9841575781611475 and parameters: {'iterations': 578, 'learning_rate': 0.063678671058667, 'depth': 4, 'subsample': 0.7677253729534457, 'colsample_bylevel': 0.7531320019655664, 'l2_leaf_reg': 15.93900442556415, 'random_strength': 5.432113965360059}. Best is trial 2 with value: 0.9846493489325138.\n",
"[I 2025-01-31 18:56:16,998] Trial 5 finished with value: 0.9776138541873224 and parameters: {'iterations': 826, 'learning_rate': 0.009797116924664805, 'depth': 6, 'subsample': 0.5890878538611122, 'colsample_bylevel': 0.7373385731110815, 'l2_leaf_reg': 15.73191814912146, 'random_strength': 6.050835373364055}. Best is trial 2 with value: 0.9846493489325138.\n",
"[I 2025-01-31 18:57:15,482] Trial 6 finished with value: 0.9749619935223743 and parameters: {'iterations': 809, 'learning_rate': 0.001699677627520717, 'depth': 6, 'subsample': 0.578445828928962, 'colsample_bylevel': 0.7436210936117759, 'l2_leaf_reg': 11.117190568598893, 'random_strength': 7.04383990081959}. Best is trial 2 with value: 0.9846493489325138.\n",
"[I 2025-01-31 18:57:54,127] Trial 7 finished with value: 0.9816352700112367 and parameters: {'iterations': 861, 'learning_rate': 0.01541861782108474, 'depth': 4, 'subsample': 0.7886446490601242, 'colsample_bylevel': 0.6735700671825722, 'l2_leaf_reg': 11.919032864668194, 'random_strength': 9.492143464663224}. Best is trial 2 with value: 0.9846493489325138.\n",
"[I 2025-01-31 18:58:51,816] Trial 8 finished with value: 0.9852865357921872 and parameters: {'iterations': 769, 'learning_rate': 0.06982641965149956, 'depth': 6, 'subsample': 0.6814518799518184, 'colsample_bylevel': 0.7775983370027374, 'l2_leaf_reg': 8.960252840750547, 'random_strength': 9.846274339599894}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 18:59:44,308] Trial 9 finished with value: 0.9754141053605658 and parameters: {'iterations': 746, 'learning_rate': 0.0016137224415552842, 'depth': 6, 'subsample': 0.5993549736218431, 'colsample_bylevel': 0.6989790297541257, 'l2_leaf_reg': 13.553876556590211, 'random_strength': 5.990982802533768}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:00:15,989] Trial 10 finished with value: 0.9838826095578029 and parameters: {'iterations': 597, 'learning_rate': 0.03144100490697419, 'depth': 5, 'subsample': 0.5363364194930027, 'colsample_bylevel': 0.7978652144122366, 'l2_leaf_reg': 8.249537524112156, 'random_strength': 9.984348987802527}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:01:10,331] Trial 11 finished with value: 0.9841760856632956 and parameters: {'iterations': 974, 'learning_rate': 0.09965915584325698, 'depth': 5, 'subsample': 0.6574603869223254, 'colsample_bylevel': 0.5501601205848138, 'l2_leaf_reg': 19.76070753486162, 'random_strength': 7.755479575146688}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:01:54,913] Trial 12 finished with value: 0.9840386013616234 and parameters: {'iterations': 691, 'learning_rate': 0.03424349599719855, 'depth': 6, 'subsample': 0.7224175773823622, 'colsample_bylevel': 0.5929425691182608, 'l2_leaf_reg': 19.25404560213221, 'random_strength': 7.759900759284147}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:02:25,479] Trial 13 finished with value: 0.9840068742150836 and parameters: {'iterations': 502, 'learning_rate': 0.038817996889561834, 'depth': 6, 'subsample': 0.6330719275681617, 'colsample_bylevel': 0.5095236622667528, 'l2_leaf_reg': 9.778730963469318, 'random_strength': 9.883633709104314}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:03:12,496] Trial 14 finished with value: 0.9760645118646308 and parameters: {'iterations': 913, 'learning_rate': 0.004711726870893525, 'depth': 5, 'subsample': 0.6341455516910446, 'colsample_bylevel': 0.6189316091763777, 'l2_leaf_reg': 14.590796785510822, 'random_strength': 8.280974259767127}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:03:54,780] Trial 15 finished with value: 0.983739837398374 and parameters: {'iterations': 684, 'learning_rate': 0.0770812480415403, 'depth': 6, 'subsample': 0.6841267228305123, 'colsample_bylevel': 0.6244873598756205, 'l2_leaf_reg': 17.527072672878234, 'random_strength': 6.997647156643534}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:04:35,490] Trial 16 finished with value: 0.9834939520126909 and parameters: {'iterations': 797, 'learning_rate': 0.02527170220411846, 'depth': 5, 'subsample': 0.5006836923455774, 'colsample_bylevel': 0.5022479394722035, 'l2_leaf_reg': 10.283699043522013, 'random_strength': 9.139463380029865}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:05:33,238] Trial 17 finished with value: 0.984297706391698 and parameters: {'iterations': 911, 'learning_rate': 0.051380928005540824, 'depth': 6, 'subsample': 0.7343100864781796, 'colsample_bylevel': 0.7831407839963198, 'l2_leaf_reg': 17.57103102855441, 'random_strength': 9.394666221253614}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:06:12,201] Trial 18 finished with value: 0.9830497719611342 and parameters: {'iterations': 668, 'learning_rate': 0.01944108063159131, 'depth': 6, 'subsample': 0.6583734530240242, 'colsample_bylevel': 0.5684416729202474, 'l2_leaf_reg': 12.834044527259307, 'random_strength': 8.062187605442572}. Best is trial 8 with value: 0.9852865357921872.\n",
"[I 2025-01-31 19:06:52,557] Trial 19 finished with value: 0.9760327847180911 and parameters: {'iterations': 748, 'learning_rate': 0.004860150067425841, 'depth': 5, 'subsample': 0.6132382429039442, 'colsample_bylevel': 0.5408464943257619, 'l2_leaf_reg': 8.671812981324692, 'random_strength': 7.2908917904657535}. Best is trial 8 with value: 0.9852865357921872.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best Trial:\n",
"AUC: 0.9852865357921872\n",
"Params:\n",
" iterations: 769\n",
" learning_rate: 0.06982641965149956\n",
" depth: 6\n",
" subsample: 0.6814518799518184\n",
" colsample_bylevel: 0.7775983370027374\n",
" l2_leaf_reg: 8.960252840750547\n",
" random_strength: 9.846274339599894\n"
]
}
],
"source": [
"# Buat studi Optuna untuk memaksimalkan AUC\n",
"study = optuna.create_study(direction='maximize')\n",
"study.optimize(objective, n_trials=20) # Lakukan 20 percobaan\n",
"\n",
"# Tampilkan hasil terbaik\n",
"print(\"Best Trial:\")\n",
"print(f\"AUC: {study.best_trial.value}\")\n",
"print(\"Params:\")\n",
"for key, value in study.best_trial.params.items():\n",
" print(f\" {key}: {value}\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\tlearn: 0.5922554\ttest: 0.5897562\tbest: 0.5897562 (0)\ttotal: 108ms\tremaining: 1m 22s\n",
"200:\tlearn: 0.1113078\ttest: 0.1422183\tbest: 0.1422137 (199)\ttotal: 13.2s\tremaining: 37.4s\n",
"Stopped by overfitting detector (50 iterations wait)\n",
"\n",
"bestTest = 0.1357173793\n",
"bestIteration = 347\n",
"\n",
"Shrink model to first 348 iterations.\n",
"Final AUC: 0.9844008196179522\n"
]
}
],
"source": [
"# Ambil parameter terbaik dari Optuna\n",
"best_params = study.best_trial.params\n",
"\n",
"# Tambahkan parameter tetap (yang tidak dioptimasi)\n",
"best_params.update({\n",
" 'loss_function': 'Logloss', # Gunakan Logloss sebagai loss function\n",
" 'cat_features': cat_feature,\n",
" 'random_state': 42,\n",
" 'verbose': 200, # Aktifkan output verbose\n",
" 'od_type': 'Iter',\n",
" 'od_wait': 50\n",
"})\n",
"\n",
"# Latih model dengan parameter terbaik\n",
"final_model = CatBoostClassifier(**best_params)\n",
"final_model.fit(X_train, y_train, eval_set=(X_test, y_test), use_best_model=True)\n",
"\n",
"# Evaluasi model final\n",
"y_pred = final_model.predict_proba(X_test)[:, 1]\n",
"final_auc = roc_auc_score(y_test, y_pred)\n",
"print(f\"Final AUC: {final_auc}\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CatBoost Classification model saved to 'clasification_model.sav'\n"
]
}
],
"source": [
"import pickle\n",
"\n",
"with open('clasification_model.sav', 'wb') as f:\n",
" pickle.dump(final_model, f)\n",
"print(\"CatBoost Classification model saved to 'clasification_model.sav'\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Final Training Logloss: 0.07900948412964129\n",
"Final Validation Logloss: 0.1360854668410178\n"
]
}
],
"source": [
"evals_result = final_model.get_evals_result()\n",
"\n",
"# Menampilkan skor terakhir\n",
"train_score = evals_result['learn']['Logloss'][-1]\n",
"val_score = evals_result['validation']['Logloss'][-1]\n",
"\n",
"print(f\"Final Training Logloss: {train_score}\")\n",
"print(f\"Final Validation Logloss: {val_score}\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# Ambil skor training dan validation dari evals_result\n",
"train_logloss = evals_result['learn']['Logloss']\n",
"val_logloss = evals_result['validation']['Logloss']\n",
"\n",
"# Plot learning curve\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(train_logloss, label='Training Logloss')\n",
"plt.plot(val_logloss, label='Validation Logloss')\n",
"plt.xlabel('Iteration')\n",
"plt.ylabel('Logloss')\n",
"plt.title('Learning Curve')\n",
"plt.legend()\n",
"plt.grid()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\ttotal: 53.9ms\tremaining: 53.8s\n",
"200:\ttotal: 15.8s\tremaining: 1m 2s\n",
"400:\ttotal: 32.7s\tremaining: 48.9s\n",
"600:\ttotal: 48.3s\tremaining: 32.1s\n",
"800:\ttotal: 1m 3s\tremaining: 15.8s\n",
"999:\ttotal: 1m 18s\tremaining: 0us\n",
"0:\ttotal: 67.1ms\tremaining: 1m 7s\n",
"200:\ttotal: 16.2s\tremaining: 1m 4s\n",
"400:\ttotal: 30.7s\tremaining: 45.8s\n",
"600:\ttotal: 42.6s\tremaining: 28.2s\n",
"800:\ttotal: 56.8s\tremaining: 14.1s\n",
"999:\ttotal: 1m 10s\tremaining: 0us\n",
"0:\ttotal: 115ms\tremaining: 1m 54s\n",
"200:\ttotal: 13.6s\tremaining: 54.1s\n",
"400:\ttotal: 26.6s\tremaining: 39.7s\n",
"600:\ttotal: 39.4s\tremaining: 26.1s\n",
"800:\ttotal: 52.8s\tremaining: 13.1s\n",
"999:\ttotal: 1m 6s\tremaining: 0us\n",
"0:\ttotal: 55.5ms\tremaining: 55.4s\n",
"200:\ttotal: 13.7s\tremaining: 54.6s\n",
"400:\ttotal: 26.3s\tremaining: 39.3s\n",
"600:\ttotal: 38.2s\tremaining: 25.4s\n",
"800:\ttotal: 50.6s\tremaining: 12.6s\n",
"999:\ttotal: 1m 3s\tremaining: 0us\n",
"0:\ttotal: 58.5ms\tremaining: 58.4s\n",
"200:\ttotal: 12.5s\tremaining: 49.6s\n",
"400:\ttotal: 25.3s\tremaining: 37.8s\n",
"600:\ttotal: 37.7s\tremaining: 25.1s\n",
"800:\ttotal: 51.5s\tremaining: 12.8s\n",
"999:\ttotal: 1m 6s\tremaining: 0us\n",
"0:\ttotal: 86.8ms\tremaining: 1m 26s\n",
"200:\ttotal: 11.9s\tremaining: 47.4s\n",
"400:\ttotal: 26.5s\tremaining: 39.6s\n",
"600:\ttotal: 39.9s\tremaining: 26.5s\n",
"800:\ttotal: 52.7s\tremaining: 13.1s\n",
"999:\ttotal: 1m 7s\tremaining: 0us\n",
"0:\ttotal: 108ms\tremaining: 1m 47s\n",
"200:\ttotal: 13.3s\tremaining: 53s\n",
"400:\ttotal: 27.2s\tremaining: 40.7s\n",
"600:\ttotal: 39.7s\tremaining: 26.4s\n",
"800:\ttotal: 53.8s\tremaining: 13.4s\n",
"999:\ttotal: 1m 8s\tremaining: 0us\n",
"0:\ttotal: 61.8ms\tremaining: 1m 1s\n",
"200:\ttotal: 13.3s\tremaining: 52.8s\n",
"400:\ttotal: 27.4s\tremaining: 40.9s\n",
"600:\ttotal: 41.3s\tremaining: 27.4s\n",
"800:\ttotal: 54.1s\tremaining: 13.4s\n",
"999:\ttotal: 1m 8s\tremaining: 0us\n",
"0:\ttotal: 61.2ms\tremaining: 1m 1s\n",
"200:\ttotal: 14.1s\tremaining: 56.2s\n",
"400:\ttotal: 28.4s\tremaining: 42.4s\n",
"600:\ttotal: 42.1s\tremaining: 28s\n",
"800:\ttotal: 56.9s\tremaining: 14.1s\n",
"999:\ttotal: 1m 11s\tremaining: 0us\n",
"0:\ttotal: 61.5ms\tremaining: 1m 1s\n",
"200:\ttotal: 14.3s\tremaining: 56.9s\n",
"400:\ttotal: 26.4s\tremaining: 39.5s\n",
"600:\ttotal: 38.8s\tremaining: 25.7s\n",
"800:\ttotal: 52.1s\tremaining: 12.9s\n",
"999:\ttotal: 1m 5s\tremaining: 0us\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy Scores for each fold: [0.95772358 0.95121951 0.9495935 0.96585366 0.95772358 0.94796748\n",
" 0.95284553 0.9398374 0.96416938 0.95602606]\n",
"Mean Accuracy: 0.95\n",
"Standard Deviation: 0.01\n"
]
}
],
"source": [
"from sklearn.model_selection import cross_val_score, StratifiedKFold\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"# Fungsi untuk menghitung skor cross-validation dan visualisasi\n",
"def cross_validate_and_visualize_accuracy(model, X, y, cv=10):\n",
" # Stratified K-Fold untuk mempertahankan distribusi label\n",
" skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=42)\n",
"\n",
" # Hitung skor cross-validation dengan metrik akurasi\n",
" scores = cross_val_score(model, X, y, scoring='accuracy', cv=skf)\n",
"\n",
" # Rata-rata dan standar deviasi\n",
" mean_score = np.mean(scores)\n",
" std_score = np.std(scores)\n",
"\n",
" # Visualisasi hasil cross-validation\n",
" plt.figure(figsize=(8, 5))\n",
" plt.plot(range(1, cv + 1), scores, marker='o', linestyle='-', color='b', label='Fold Score')\n",
" plt.axhline(y=mean_score, color='r', linestyle='--', label=f'Mean Accuracy: {mean_score:.2f}')\n",
" plt.fill_between(range(1, cv + 1), mean_score - std_score, mean_score + std_score, color='r', alpha=0.2, label='±1 Std Dev')\n",
" plt.title('Cross-Validation Scores (Accuracy)')\n",
" plt.xlabel('Fold')\n",
" plt.ylabel('Accuracy')\n",
" plt.legend()\n",
" plt.grid()\n",
" plt.show()\n",
"\n",
" # Cetak hasil skor\n",
" print(f'Accuracy Scores for each fold: {scores}')\n",
" print(f'Mean Accuracy: {mean_score:.2f}')\n",
" print(f'Standard Deviation: {std_score:.2f}')\n",
"\n",
"# Contoh penggunaan\n",
"# Ganti model dengan model Anda, misalnya `model`\n",
"cross_validate_and_visualize_accuracy(model, X, y, cv=10)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\tlearn: 0.5888696\ttotal: 136ms\tremaining: 1m 44s\n",
"200:\tlearn: 0.1175942\ttotal: 12s\tremaining: 34s\n",
"400:\tlearn: 0.0930919\ttotal: 25.9s\tremaining: 23.8s\n",
"600:\tlearn: 0.0725604\ttotal: 40.8s\tremaining: 11.4s\n",
"768:\tlearn: 0.0585621\ttotal: 52.3s\tremaining: 0us\n",
"0:\tlearn: 0.5886265\ttotal: 73.6ms\tremaining: 56.5s\n",
"200:\tlearn: 0.1198413\ttotal: 13.5s\tremaining: 38.1s\n",
"400:\tlearn: 0.0881541\ttotal: 27.7s\tremaining: 25.5s\n",
"600:\tlearn: 0.0662039\ttotal: 41.6s\tremaining: 11.6s\n",
"768:\tlearn: 0.0527943\ttotal: 53.4s\tremaining: 0us\n",
"0:\tlearn: 0.5897432\ttotal: 60.9ms\tremaining: 46.7s\n",
"200:\tlearn: 0.1123212\ttotal: 12.1s\tremaining: 34.3s\n",
"400:\tlearn: 0.0833892\ttotal: 25.9s\tremaining: 23.8s\n",
"600:\tlearn: 0.0613133\ttotal: 40.7s\tremaining: 11.4s\n",
"768:\tlearn: 0.0478552\ttotal: 52.5s\tremaining: 0us\n",
"0:\tlearn: 0.5899386\ttotal: 56.5ms\tremaining: 43.4s\n",
"200:\tlearn: 0.1199977\ttotal: 12.3s\tremaining: 34.7s\n",
"400:\tlearn: 0.0868800\ttotal: 26.2s\tremaining: 24.1s\n",
"600:\tlearn: 0.0666446\ttotal: 40.4s\tremaining: 11.3s\n",
"768:\tlearn: 0.0538801\ttotal: 52.7s\tremaining: 0us\n",
"0:\tlearn: 0.5884433\ttotal: 59.2ms\tremaining: 45.5s\n",
"200:\tlearn: 0.1198657\ttotal: 11.1s\tremaining: 31.5s\n",
"400:\tlearn: 0.0896701\ttotal: 25.1s\tremaining: 23s\n",
"600:\tlearn: 0.0667894\ttotal: 38.6s\tremaining: 10.8s\n",
"768:\tlearn: 0.0541702\ttotal: 49.8s\tremaining: 0us\n",
"0:\tlearn: 0.5885144\ttotal: 94.7ms\tremaining: 1m 12s\n",
"200:\tlearn: 0.1134256\ttotal: 10.7s\tremaining: 30.2s\n",
"400:\tlearn: 0.0799281\ttotal: 24.6s\tremaining: 22.5s\n",
"600:\tlearn: 0.0591933\ttotal: 38.8s\tremaining: 10.8s\n",
"768:\tlearn: 0.0468819\ttotal: 49.5s\tremaining: 0us\n",
"0:\tlearn: 0.5893625\ttotal: 30.6ms\tremaining: 23.5s\n",
"200:\tlearn: 0.1135640\ttotal: 9.38s\tremaining: 26.5s\n",
"400:\tlearn: 0.0816812\ttotal: 25.6s\tremaining: 23.5s\n",
"600:\tlearn: 0.0613164\ttotal: 38.7s\tremaining: 10.8s\n",
"768:\tlearn: 0.0484291\ttotal: 49.8s\tremaining: 0us\n",
"0:\tlearn: 0.5876099\ttotal: 68.4ms\tremaining: 52.5s\n",
"200:\tlearn: 0.1100839\ttotal: 11.6s\tremaining: 32.8s\n",
"400:\tlearn: 0.0779947\ttotal: 24.8s\tremaining: 22.8s\n",
"600:\tlearn: 0.0553376\ttotal: 37.9s\tremaining: 10.6s\n",
"768:\tlearn: 0.0420982\ttotal: 49.3s\tremaining: 0us\n",
"0:\tlearn: 0.5793230\ttotal: 53.9ms\tremaining: 41.4s\n",
"200:\tlearn: 0.1217541\ttotal: 11.3s\tremaining: 32.1s\n",
"400:\tlearn: 0.0942607\ttotal: 25.7s\tremaining: 23.6s\n",
"600:\tlearn: 0.0723347\ttotal: 40.6s\tremaining: 11.4s\n",
"768:\tlearn: 0.0577402\ttotal: 53.2s\tremaining: 0us\n",
"0:\tlearn: 0.5793558\ttotal: 78.5ms\tremaining: 1m\n",
"200:\tlearn: 0.1156503\ttotal: 13s\tremaining: 36.8s\n",
"400:\tlearn: 0.0851988\ttotal: 27.6s\tremaining: 25.3s\n",
"600:\tlearn: 0.0650702\ttotal: 42.9s\tremaining: 12s\n",
"768:\tlearn: 0.0549370\ttotal: 54.7s\tremaining: 0us\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy Scores for each fold: [0.95772358 0.95447154 0.94796748 0.96097561 0.95447154 0.95284553\n",
" 0.9495935 0.94308943 0.96579805 0.95439739]\n",
"Mean Accuracy: 0.95\n",
"Standard Deviation: 0.01\n"
]
}
],
"source": [
"cross_validate_and_visualize_accuracy(final_model, X, y, cv=10)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training Metrics:\n",
"Accuracy: 0.97\n",
"Precision: 0.94\n",
"Recall: 0.99\n",
"F1 Score: 0.97\n",
"------------------------------\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Testing Metrics:\n",
"Accuracy: 0.95\n",
"Precision: 0.93\n",
"Recall: 0.98\n",
"F1 Score: 0.95\n",
"------------------------------\n"
]
}
],
"source": [
"from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Fungsi untuk menampilkan confusion matrix dan metrik evaluasi\n",
"def evaluate_model(y_true, y_pred, dataset_name):\n",
" # Confusion matrix\n",
" cm = confusion_matrix(y_true, y_pred)\n",
" \n",
" # Plot confusion matrix\n",
" plt.figure(figsize=(6, 4))\n",
" sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Churn', 'Churn'], yticklabels=['Not Churn', 'Churn'])\n",
" plt.xlabel('Predicted')\n",
" plt.ylabel('Actual')\n",
" plt.title(f'Confusion Matrix ({dataset_name})')\n",
" plt.show()\n",
" \n",
" # Hitung metrik evaluasi\n",
" accuracy = accuracy_score(y_true, y_pred)\n",
" precision = precision_score(y_true, y_pred, zero_division=0)\n",
" recall = recall_score(y_true, y_pred, zero_division=0)\n",
" f1 = f1_score(y_true, y_pred, zero_division=0)\n",
" \n",
" print(f'{dataset_name} Metrics:')\n",
" print(f'Accuracy: {accuracy:.2f}')\n",
" print(f'Precision: {precision:.2f}')\n",
" print(f'Recall: {recall:.2f}')\n",
" print(f'F1 Score: {f1:.2f}')\n",
" print('-' * 30)\n",
"\n",
"# Prediksi untuk data training dan testing\n",
"y_train_pred = final_model.predict(X_train)\n",
"y_test_pred = final_model.predict(X_test)\n",
"\n",
"# Evaluasi untuk data training\n",
"evaluate_model(y_train, y_train_pred, 'Training')\n",
"\n",
"# Evaluasi untuk data testing\n",
"evaluate_model(y_test, y_test_pred, 'Testing')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training Metrics:\n",
"Accuracy: 0.96\n",
"Precision: 0.94\n",
"Recall: 0.99\n",
"F1 Score: 0.96\n",
"------------------------------\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Testing Metrics:\n",
"Accuracy: 0.95\n",
"Precision: 0.92\n",
"Recall: 0.98\n",
"F1 Score: 0.95\n",
"------------------------------\n"
]
}
],
"source": [
"y_train_pred = model.predict(X_train)\n",
"y_test_pred = model.predict(X_test)\n",
"\n",
"# Evaluasi untuk data training\n",
"evaluate_model(y_train, y_train_pred, 'Training')\n",
"\n",
"# Evaluasi untuk data testing\n",
"evaluate_model(y_test, y_test_pred, 'Testing')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Final Training Logloss: 0.10264213391001191\n",
"Final Validation Logloss: 0.14014741622356627\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evals_result = model.get_evals_result()\n",
"\n",
"# Menampilkan skor terakhir\n",
"train_score = evals_result['learn']['Logloss'][-1]\n",
"val_score = evals_result['validation']['Logloss'][-1]\n",
"\n",
"print(f\"Final Training Logloss: {train_score}\")\n",
"print(f\"Final Validation Logloss: {val_score}\")\n",
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Ambil skor training dan validation dari evals_result\n",
"train_logloss = evals_result['learn']['Logloss']\n",
"val_logloss = evals_result['validation']['Logloss']\n",
"\n",
"# Plot learning curve\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(train_logloss, label='Training Logloss')\n",
"plt.plot(val_logloss, label='Validation Logloss')\n",
"plt.xlabel('Iteration')\n",
"plt.ylabel('Logloss')\n",
"plt.title('Learning Curve')\n",
"plt.legend()\n",
"plt.grid()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" employee_id | \n",
" domisili | \n",
" jenis_kelamin | \n",
" date_of_birth | \n",
" join_date | \n",
" resign_date | \n",
" marriage_stat | \n",
" dependant | \n",
" education | \n",
" absent_90D | \n",
" ... | \n",
" active_work_category | \n",
" work_stability_score | \n",
" married_dependent_ratio | \n",
" position_score | \n",
" job_income_position_score | \n",
" education_score | \n",
" education_income_ratio | \n",
" weighted_satisfaction_performance | \n",
" resign_risk_indicator | \n",
" adjusted_work_time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" EM0120 | \n",
" Tangerang | \n",
" Laki-laki | \n",
" 1990-02-18 | \n",
" 2023-01-11 | \n",
" 2024-01-30 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 11.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.000000 | \n",
" 2 | \n",
" 1 | \n",
" 1140915.0 | \n",
" 1 | \n",
" 1140915.0 | \n",
" 1.4 | \n",
" Medium | \n",
" 9.393432 | \n",
"
\n",
" \n",
" 1 | \n",
" EM13985 | \n",
" Kepulauan Seribu | \n",
" Perempuan | \n",
" 1987-02-01 | \n",
" 2022-09-26 | \n",
" 2023-11-08 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 13.000000 | \n",
" 2 | \n",
" 1 | \n",
" 2103348.0 | \n",
" 1 | \n",
" 2103348.0 | \n",
" 1.8 | \n",
" Medium | \n",
" 9.300000 | \n",
"
\n",
" \n",
" 2 | \n",
" EM2560 | \n",
" Tangerang | \n",
" Perempuan | \n",
" 1999-11-01 | \n",
" 2023-01-05 | \n",
" 2024-05-04 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 10.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.454545 | \n",
" 1 | \n",
" 1 | \n",
" 2145814.0 | \n",
" 1 | \n",
" 2145814.0 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.205670 | \n",
"
\n",
" \n",
" 3 | \n",
" EM0343 | \n",
" Kabupaten Bekasi | \n",
" Laki-laki | \n",
" 1990-10-12 | \n",
" 2022-02-01 | \n",
" 2023-07-17 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 1.0 | \n",
" ... | \n",
" Mid-term | \n",
" 8.500000 | \n",
" 1 | \n",
" 1 | \n",
" 2331081.0 | \n",
" 1 | \n",
" 2331081.0 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.154017 | \n",
"
\n",
" \n",
" 4 | \n",
" EM14458 | \n",
" Kabupaten Bogor | \n",
" Perempuan | \n",
" 1996-04-24 | \n",
" 2022-10-23 | \n",
" 2023-12-30 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 12.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.076923 | \n",
" 2 | \n",
" 1 | \n",
" 1798725.0 | \n",
" 1 | \n",
" 1798725.0 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.706741 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 37 columns
\n",
"
"
],
"text/plain": [
" employee_id domisili jenis_kelamin date_of_birth join_date \\\n",
"0 EM0120 Tangerang Laki-laki 1990-02-18 2023-01-11 \n",
"1 EM13985 Kepulauan Seribu Perempuan 1987-02-01 2022-09-26 \n",
"2 EM2560 Tangerang Perempuan 1999-11-01 2023-01-05 \n",
"3 EM0343 Kabupaten Bekasi Laki-laki 1990-10-12 2022-02-01 \n",
"4 EM14458 Kabupaten Bogor Perempuan 1996-04-24 2022-10-23 \n",
"\n",
" resign_date marriage_stat dependant education absent_90D ... \\\n",
"0 2024-01-30 Married 1 SLTA 11.0 ... \n",
"1 2023-11-08 Married 1 SLTA 0.0 ... \n",
"2 2024-05-04 Single 0 SLTA 10.0 ... \n",
"3 2023-07-17 Single 0 SLTA 1.0 ... \n",
"4 2023-12-30 Married 1 SLTA 12.0 ... \n",
"\n",
" active_work_category work_stability_score married_dependent_ratio \\\n",
"0 Mid-term 1.000000 2 \n",
"1 Mid-term 13.000000 2 \n",
"2 Mid-term 1.454545 1 \n",
"3 Mid-term 8.500000 1 \n",
"4 Mid-term 1.076923 2 \n",
"\n",
" position_score job_income_position_score education_score \\\n",
"0 1 1140915.0 1 \n",
"1 1 2103348.0 1 \n",
"2 1 2145814.0 1 \n",
"3 1 2331081.0 1 \n",
"4 1 1798725.0 1 \n",
"\n",
" education_income_ratio weighted_satisfaction_performance \\\n",
"0 1140915.0 1.4 \n",
"1 2103348.0 1.8 \n",
"2 2145814.0 1.6 \n",
"3 2331081.0 2.6 \n",
"4 1798725.0 2.6 \n",
"\n",
" resign_risk_indicator adjusted_work_time \n",
"0 Medium 9.393432 \n",
"1 Medium 9.300000 \n",
"2 Medium 9.205670 \n",
"3 Medium 9.154017 \n",
"4 Medium 9.706741 \n",
"\n",
"[5 rows x 37 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_test = pd.read_csv('D:/Tugas Akhir/Codingan/Notebook - Playground/preprocessed_data_test_5.csv')\n",
"df_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"churn_status\n",
"1 809\n",
"0 798\n",
"Name: count, dtype: int64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_test['churn_status'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1607"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_test)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.9477286869943995\n",
"Precision: 0.9142857142857143\n",
"Recall: 0.9888751545117429\n",
"F1 Score: 0.9501187648456056\n"
]
}
],
"source": [
"X_test = df_test.drop(['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', 'active_work_months'], axis=1)\n",
"\n",
"cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating',\n",
" 'education', 'active_work_category', 'resign_risk_indicator', 'jenis_kelamin']\n",
"\n",
"y_pred = final_model.predict(X_test)\n",
"\n",
"X_test['predicted_churn'] = y_pred\n",
"\n",
"accuracy = accuracy_score(df_test['churn_status'], y_pred)\n",
"precision = precision_score(df_test['churn_status'], y_pred, zero_division=0)\n",
"recall = recall_score(df_test['churn_status'], y_pred, zero_division=0)\n",
"f1 = f1_score(df_test['churn_status'], y_pred, zero_division=0)\n",
"\n",
"print(\"Accuracy:\", accuracy)\n",
"print(\"Precision:\", precision)\n",
"print(\"Recall:\", recall)\n",
"print(\"F1 Score:\", f1)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" employee_id | \n",
" domisili | \n",
" jenis_kelamin | \n",
" date_of_birth | \n",
" join_date | \n",
" resign_date | \n",
" marriage_stat | \n",
" dependant | \n",
" education | \n",
" absent_90D | \n",
" ... | \n",
" active_work_category | \n",
" work_stability_score | \n",
" married_dependent_ratio | \n",
" position_score | \n",
" job_income_position_score | \n",
" education_score | \n",
" education_income_ratio | \n",
" weighted_satisfaction_performance | \n",
" resign_risk_indicator | \n",
" adjusted_work_time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" EM0406 | \n",
" Kota Jakarta Utara | \n",
" Laki-laki | \n",
" 1975-01-07 | \n",
" 2021-09-25 | \n",
" 2023-12-07 | \n",
" Married | \n",
" 3 | \n",
" SLTA | \n",
" 3.0 | \n",
" ... | \n",
" Mid-term | \n",
" 6.500000 | \n",
" 4 | \n",
" 1 | \n",
" 1578410.0 | \n",
" 1 | \n",
" 1578410.00 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.797428 | \n",
"
\n",
" \n",
" 1 | \n",
" EM1772 | \n",
" Kabupaten Bogor | \n",
" Perempuan | \n",
" 1993-04-18 | \n",
" 2021-02-23 | \n",
" 2023-06-24 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 2.0 | \n",
" ... | \n",
" Mid-term | \n",
" 9.333333 | \n",
" 2 | \n",
" 1 | \n",
" 2003154.0 | \n",
" 1 | \n",
" 2003154.00 | \n",
" 2.2 | \n",
" Medium | \n",
" 9.342582 | \n",
"
\n",
" \n",
" 2 | \n",
" EM7996 | \n",
" Tangerang | \n",
" Laki-laki | \n",
" 1998-02-12 | \n",
" 2023-05-04 | \n",
" 2024-06-29 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 5.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.333333 | \n",
" 1 | \n",
" 1 | \n",
" 1394384.0 | \n",
" 1 | \n",
" 1394384.00 | \n",
" 3.0 | \n",
" Medium | \n",
" 9.551975 | \n",
"
\n",
" \n",
" 3 | \n",
" EM13978 | \n",
" Kota Jakarta Barat | \n",
" Perempuan | \n",
" 1982-12-26 | \n",
" 2021-09-11 | \n",
" 2023-04-03 | \n",
" Married | \n",
" 0 | \n",
" D3 | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 18.000000 | \n",
" 1 | \n",
" 1 | \n",
" 4151999.0 | \n",
" 4 | \n",
" 1037999.75 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.180000 | \n",
"
\n",
" \n",
" 4 | \n",
" EM9860 | \n",
" Tangerang | \n",
" Perempuan | \n",
" 1997-03-26 | \n",
" 2023-06-20 | \n",
" 2024-10-02 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 15.0 | \n",
" ... | \n",
" Mid-term | \n",
" 0.937500 | \n",
" 1 | \n",
" 1 | \n",
" 1560817.0 | \n",
" 1 | \n",
" 1560817.00 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.414301 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 37 columns
\n",
"
"
],
"text/plain": [
" employee_id domisili jenis_kelamin date_of_birth join_date \\\n",
"0 EM0406 Kota Jakarta Utara Laki-laki 1975-01-07 2021-09-25 \n",
"1 EM1772 Kabupaten Bogor Perempuan 1993-04-18 2021-02-23 \n",
"2 EM7996 Tangerang Laki-laki 1998-02-12 2023-05-04 \n",
"3 EM13978 Kota Jakarta Barat Perempuan 1982-12-26 2021-09-11 \n",
"4 EM9860 Tangerang Perempuan 1997-03-26 2023-06-20 \n",
"\n",
" resign_date marriage_stat dependant education absent_90D ... \\\n",
"0 2023-12-07 Married 3 SLTA 3.0 ... \n",
"1 2023-06-24 Married 1 SLTA 2.0 ... \n",
"2 2024-06-29 Single 0 SLTA 5.0 ... \n",
"3 2023-04-03 Married 0 D3 0.0 ... \n",
"4 2024-10-02 Single 0 SLTA 15.0 ... \n",
"\n",
" active_work_category work_stability_score married_dependent_ratio \\\n",
"0 Mid-term 6.500000 4 \n",
"1 Mid-term 9.333333 2 \n",
"2 Mid-term 2.333333 1 \n",
"3 Mid-term 18.000000 1 \n",
"4 Mid-term 0.937500 1 \n",
"\n",
" position_score job_income_position_score education_score \\\n",
"0 1 1578410.0 1 \n",
"1 1 2003154.0 1 \n",
"2 1 1394384.0 1 \n",
"3 1 4151999.0 4 \n",
"4 1 1560817.0 1 \n",
"\n",
" education_income_ratio weighted_satisfaction_performance \\\n",
"0 1578410.00 1.0 \n",
"1 2003154.00 2.2 \n",
"2 1394384.00 3.0 \n",
"3 1037999.75 2.6 \n",
"4 1560817.00 2.6 \n",
"\n",
" resign_risk_indicator adjusted_work_time \n",
"0 Medium 9.797428 \n",
"1 Medium 9.342582 \n",
"2 Medium 9.551975 \n",
"3 Medium 9.180000 \n",
"4 Medium 9.414301 \n",
"\n",
"[5 rows x 37 columns]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_test = pd.read_csv('D:/Tugas Akhir/Codingan/Notebook - Playground/preprocessed_data_test_7.csv')\n",
"df_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"churn_status\n",
"1 161\n",
"Name: count, dtype: int64"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_test['churn_status'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"161"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_test)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 1.0\n",
"Precision: 1.0\n",
"Recall: 1.0\n",
"F1 Score: 1.0\n"
]
}
],
"source": [
"X_test = df_test.drop(['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', 'active_work_months'], axis=1)\n",
"\n",
"cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', 'performance_rating',\n",
" 'education', 'active_work_category', 'resign_risk_indicator', 'jenis_kelamin']\n",
"\n",
"y_pred = final_model.predict(X_test)\n",
"\n",
"X_test['predicted_churn'] = y_pred\n",
"\n",
"accuracy = accuracy_score(df_test['churn_status'], y_pred)\n",
"precision = precision_score(df_test['churn_status'], y_pred, zero_division=0)\n",
"recall = recall_score(df_test['churn_status'], y_pred, zero_division=0)\n",
"f1 = f1_score(df_test['churn_status'], y_pred, zero_division=0)\n",
"\n",
"print(\"Accuracy:\", accuracy)\n",
"print(\"Precision:\", precision)\n",
"print(\"Recall:\", recall)\n",
"print(\"F1 Score:\", f1)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" employee_id | \n",
" domisili | \n",
" jenis_kelamin | \n",
" date_of_birth | \n",
" join_date | \n",
" resign_date | \n",
" marriage_stat | \n",
" dependant | \n",
" education | \n",
" absent_90D | \n",
" ... | \n",
" active_work_category | \n",
" work_stability_score | \n",
" married_dependent_ratio | \n",
" position_score | \n",
" job_income_position_score | \n",
" education_score | \n",
" education_income_ratio | \n",
" weighted_satisfaction_performance | \n",
" resign_risk_indicator | \n",
" adjusted_work_time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" EM0012 | \n",
" Tangerang | \n",
" Laki-laki | \n",
" 1970-12-21 | \n",
" 2023-02-23 | \n",
" 2024-08-07 | \n",
" Married | \n",
" 3 | \n",
" D3 | \n",
" 2.0 | \n",
" ... | \n",
" Mid-term | \n",
" 5.666667 | \n",
" 4 | \n",
" 1 | \n",
" 4708861.0 | \n",
" 4 | \n",
" 1.177215e+06 | \n",
" 1.4 | \n",
" Medium | \n",
" 9.857106 | \n",
"
\n",
" \n",
" 1 | \n",
" EM0026 | \n",
" Kota Depok | \n",
" Laki-laki | \n",
" 1986-11-14 | \n",
" 2022-04-17 | \n",
" 2024-08-04 | \n",
" Married | \n",
" 2 | \n",
" SLTA | \n",
" 4.0 | \n",
" ... | \n",
" Mid-term | \n",
" 5.600000 | \n",
" 3 | \n",
" 1 | \n",
" 1430853.0 | \n",
" 1 | \n",
" 1.430853e+06 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.694593 | \n",
"
\n",
" \n",
" 2 | \n",
" EM0041 | \n",
" Kota Jakarta Barat | \n",
" Laki-laki | \n",
" 1983-03-16 | \n",
" 2023-06-15 | \n",
" 2024-09-06 | \n",
" Divorce | \n",
" 3 | \n",
" SLTA | \n",
" 7.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.750000 | \n",
" 1 | \n",
" 1 | \n",
" 1379381.0 | \n",
" 1 | \n",
" 1.379381e+06 | \n",
" 2.4 | \n",
" Medium | \n",
" 9.059429 | \n",
"
\n",
" \n",
" 3 | \n",
" EM0053 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1979-07-13 | \n",
" 2023-07-11 | \n",
" 2024-09-21 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 1.0 | \n",
" ... | \n",
" Mid-term | \n",
" 7.000000 | \n",
" 1 | \n",
" 1 | \n",
" 1911583.0 | \n",
" 1 | \n",
" 1.911583e+06 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.842189 | \n",
"
\n",
" \n",
" 4 | \n",
" EM0057 | \n",
" Kota Jakarta Barat | \n",
" Perempuan | \n",
" 2000-03-13 | \n",
" 2022-07-14 | \n",
" 2024-08-29 | \n",
" Single | \n",
" 0 | \n",
" D2 | \n",
" 8.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.777778 | \n",
" 1 | \n",
" 1 | \n",
" 3724157.0 | \n",
" 3 | \n",
" 1.241386e+06 | \n",
" 2.0 | \n",
" Medium | \n",
" 9.047730 | \n",
"
\n",
" \n",
" 5 | \n",
" EM0058 | \n",
" Tangerang | \n",
" Perempuan | \n",
" 1996-04-23 | \n",
" 2023-07-18 | \n",
" 2024-09-26 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 9.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.400000 | \n",
" 1 | \n",
" 1 | \n",
" 2229928.0 | \n",
" 1 | \n",
" 2.229928e+06 | \n",
" 1.4 | \n",
" Medium | \n",
" 9.114481 | \n",
"
\n",
" \n",
" 6 | \n",
" EM0064 | \n",
" Kota Jakarta Utara | \n",
" Perempuan | \n",
" 1987-04-20 | \n",
" 2022-07-25 | \n",
" 2024-08-23 | \n",
" Married | \n",
" 3 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 25.000000 | \n",
" 4 | \n",
" 1 | \n",
" 1257855.0 | \n",
" 1 | \n",
" 1.257855e+06 | \n",
" 2.2 | \n",
" Medium | \n",
" 9.320000 | \n",
"
\n",
" \n",
" 7 | \n",
" EM0180 | \n",
" Kota Jakarta Utara | \n",
" Perempuan | \n",
" 2000-06-25 | \n",
" 2022-09-04 | \n",
" 2024-10-07 | \n",
" Single | \n",
" 0 | \n",
" D2 | \n",
" 7.0 | \n",
" ... | \n",
" Mid-term | \n",
" 3.125000 | \n",
" 1 | \n",
" 1 | \n",
" 3034058.0 | \n",
" 3 | \n",
" 1.011353e+06 | \n",
" 2.2 | \n",
" Medium | \n",
" 9.091639 | \n",
"
\n",
" \n",
" 8 | \n",
" EM0259 | \n",
" Kepulauan Seribu | \n",
" Laki-laki | \n",
" 1993-10-04 | \n",
" 2023-06-08 | \n",
" 2024-08-29 | \n",
" Single | \n",
" 0 | \n",
" D1 | \n",
" 4.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.800000 | \n",
" 1 | \n",
" 1 | \n",
" 4513378.0 | \n",
" 2 | \n",
" 2.256689e+06 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.479833 | \n",
"
\n",
" \n",
" 9 | \n",
" EM0263 | \n",
" Kabupaten Bogor | \n",
" Laki-laki | \n",
" 1995-02-15 | \n",
" 2022-06-15 | \n",
" 2024-07-27 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 13.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.785714 | \n",
" 2 | \n",
" 1 | \n",
" 1599099.0 | \n",
" 1 | \n",
" 1.599099e+06 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.077272 | \n",
"
\n",
" \n",
" 10 | \n",
" EM0268 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1984-09-25 | \n",
" 2023-07-04 | \n",
" 2024-09-21 | \n",
" Married | \n",
" 5 | \n",
" SLTA | \n",
" 4.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.800000 | \n",
" 6 | \n",
" 1 | \n",
" 2869178.0 | \n",
" 1 | \n",
" 2.869178e+06 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.599453 | \n",
"
\n",
" \n",
" 11 | \n",
" EM0274 | \n",
" Kota Bogor | \n",
" Laki-laki | \n",
" 1995-07-09 | \n",
" 2023-07-15 | \n",
" 2024-10-02 | \n",
" Married | \n",
" 0 | \n",
" D1 | \n",
" 5.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.333333 | \n",
" 1 | \n",
" 1 | \n",
" 3040879.0 | \n",
" 2 | \n",
" 1.520440e+06 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.781063 | \n",
"
\n",
" \n",
" 12 | \n",
" EM0360 | \n",
" Tangerang | \n",
" Perempuan | \n",
" 1979-08-13 | \n",
" 2022-04-17 | \n",
" 2024-09-19 | \n",
" Married | \n",
" 3 | \n",
" D3 | \n",
" 7.0 | \n",
" ... | \n",
" Mid-term | \n",
" 3.625000 | \n",
" 4 | \n",
" 1 | \n",
" 4658718.0 | \n",
" 4 | \n",
" 1.164680e+06 | \n",
" 3.0 | \n",
" Medium | \n",
" 9.494477 | \n",
"
\n",
" \n",
" 13 | \n",
" EM0368 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1979-12-25 | \n",
" 2022-05-25 | \n",
" 2024-08-02 | \n",
" Married | \n",
" 1 | \n",
" D1 | \n",
" 8.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.888889 | \n",
" 2 | \n",
" 1 | \n",
" 3326206.0 | \n",
" 2 | \n",
" 1.663103e+06 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.786442 | \n",
"
\n",
" \n",
" 14 | \n",
" EM0384 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1976-08-11 | \n",
" 2022-07-05 | \n",
" 2024-09-21 | \n",
" Married | \n",
" 1 | \n",
" D3 | \n",
" 4.0 | \n",
" ... | \n",
" Mid-term | \n",
" 5.200000 | \n",
" 2 | \n",
" 1 | \n",
" 3215076.0 | \n",
" 4 | \n",
" 8.037690e+05 | \n",
" 2.0 | \n",
" Medium | \n",
" 9.773272 | \n",
"
\n",
" \n",
" 15 | \n",
" EM0388 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1970-11-15 | \n",
" 2023-07-10 | \n",
" 2024-09-07 | \n",
" Married | \n",
" 2 | \n",
" SLTA | \n",
" 12.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.076923 | \n",
" 3 | \n",
" 1 | \n",
" 1178459.0 | \n",
" 1 | \n",
" 1.178459e+06 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.072831 | \n",
"
\n",
" \n",
" 16 | \n",
" EM0398 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1999-05-03 | \n",
" 2023-09-01 | \n",
" 2024-10-12 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 13.000000 | \n",
" 1 | \n",
" 1 | \n",
" 1527441.0 | \n",
" 1 | \n",
" 1.527441e+06 | \n",
" 3.0 | \n",
" Medium | \n",
" 9.390000 | \n",
"
\n",
" \n",
" 17 | \n",
" EM0481 | \n",
" Kabupaten Bekasi | \n",
" Perempuan | \n",
" 1997-12-24 | \n",
" 2022-06-08 | \n",
" 2024-09-27 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 2.0 | \n",
" ... | \n",
" Mid-term | \n",
" 9.333333 | \n",
" 1 | \n",
" 1 | \n",
" 2890639.0 | \n",
" 1 | \n",
" 2.890639e+06 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.562408 | \n",
"
\n",
" \n",
" 18 | \n",
" EM0483 | \n",
" Kabupaten Bogor | \n",
" Perempuan | \n",
" 1975-08-05 | \n",
" 2023-06-14 | \n",
" 2024-07-25 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 12.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.000000 | \n",
" 2 | \n",
" 1 | \n",
" 1193560.0 | \n",
" 1 | \n",
" 1.193560e+06 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.313570 | \n",
"
\n",
" \n",
" 19 | \n",
" EM0491 | \n",
" Kota Jakarta Timur | \n",
" Perempuan | \n",
" 1969-04-10 | \n",
" 2023-06-28 | \n",
" 2024-09-14 | \n",
" Married | \n",
" 5 | \n",
" SLTA | \n",
" 5.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.333333 | \n",
" 6 | \n",
" 1 | \n",
" 2048458.0 | \n",
" 1 | \n",
" 2.048458e+06 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.372688 | \n",
"
\n",
" \n",
" 20 | \n",
" EM0493 | \n",
" Tangerang | \n",
" Perempuan | \n",
" 1996-08-05 | \n",
" 2023-07-04 | \n",
" 2024-07-25 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 7.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.500000 | \n",
" 2 | \n",
" 1 | \n",
" 1267701.0 | \n",
" 1 | \n",
" 1.267701e+06 | \n",
" 1.4 | \n",
" Medium | \n",
" 9.190102 | \n",
"
\n",
" \n",
" 21 | \n",
" EM0499 | \n",
" Kota Jakarta Pusat | \n",
" Laki-laki | \n",
" 1990-10-23 | \n",
" 2022-07-20 | \n",
" 2024-09-25 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 13.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.857143 | \n",
" 2 | \n",
" 1 | \n",
" 1544522.0 | \n",
" 1 | \n",
" 1.544522e+06 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.825135 | \n",
"
\n",
" \n",
" 22 | \n",
" EM0504 | \n",
" Kabupaten Bekasi | \n",
" Laki-laki | \n",
" 2000-04-19 | \n",
" 2023-08-01 | \n",
" 2024-09-21 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 3.0 | \n",
" ... | \n",
" Mid-term | \n",
" 3.250000 | \n",
" 1 | \n",
" 1 | \n",
" 1486463.0 | \n",
" 1 | \n",
" 1.486463e+06 | \n",
" 1.8 | \n",
" Medium | \n",
" 9.705073 | \n",
"
\n",
" \n",
" 23 | \n",
" EM0509 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1992-11-23 | \n",
" 2023-08-15 | \n",
" 2024-10-02 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 15.0 | \n",
" ... | \n",
" Mid-term | \n",
" 0.812500 | \n",
" 2 | \n",
" 1 | \n",
" 1214155.0 | \n",
" 1 | \n",
" 1.214155e+06 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.733698 | \n",
"
\n",
" \n",
" 24 | \n",
" EM0520 | \n",
" Kota Jakarta Timur | \n",
" Perempuan | \n",
" 2000-09-11 | \n",
" 2022-09-12 | \n",
" 2024-10-13 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 13.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.785714 | \n",
" 1 | \n",
" 1 | \n",
" 1098601.0 | \n",
" 1 | \n",
" 1.098601e+06 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.087215 | \n",
"
\n",
" \n",
" 25 | \n",
" EM0590 | \n",
" Kota Jakarta Pusat | \n",
" Perempuan | \n",
" 1980-06-14 | \n",
" 2023-04-13 | \n",
" 2024-08-29 | \n",
" Married | \n",
" 0 | \n",
" D3 | \n",
" 5.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.666667 | \n",
" 1 | \n",
" 1 | \n",
" 4646268.0 | \n",
" 4 | \n",
" 1.161567e+06 | \n",
" 1.4 | \n",
" Medium | \n",
" 9.108286 | \n",
"
\n",
" \n",
" 26 | \n",
" EM0597 | \n",
" Kabupaten Bekasi | \n",
" Perempuan | \n",
" 1980-11-30 | \n",
" 2023-05-15 | \n",
" 2024-09-21 | \n",
" Married | \n",
" 3 | \n",
" D1 | \n",
" 3.0 | \n",
" ... | \n",
" Mid-term | \n",
" 4.000000 | \n",
" 4 | \n",
" 1 | \n",
" 3975285.0 | \n",
" 2 | \n",
" 1.987642e+06 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.580014 | \n",
"
\n",
" \n",
" 27 | \n",
" EM0602 | \n",
" Kota Bekasi | \n",
" Perempuan | \n",
" 1990-07-28 | \n",
" 2023-05-29 | \n",
" 2024-08-26 | \n",
" Single | \n",
" 0 | \n",
" D2 | \n",
" 9.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.500000 | \n",
" 1 | \n",
" 1 | \n",
" 3496995.0 | \n",
" 3 | \n",
" 1.165665e+06 | \n",
" 2.2 | \n",
" Medium | \n",
" 9.188379 | \n",
"
\n",
" \n",
" 28 | \n",
" EM0606 | \n",
" Kota Bogor | \n",
" Laki-laki | \n",
" 1987-08-01 | \n",
" 2023-06-12 | \n",
" 2024-08-07 | \n",
" Divorce | \n",
" 0 | \n",
" SLTA | \n",
" 7.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.750000 | \n",
" 1 | \n",
" 1 | \n",
" 2928866.0 | \n",
" 1 | \n",
" 2.928866e+06 | \n",
" 2.4 | \n",
" Medium | \n",
" 9.357764 | \n",
"
\n",
" \n",
" 29 | \n",
" EM0621 | \n",
" Kabupaten Bekasi | \n",
" Laki-laki | \n",
" 2000-05-14 | \n",
" 2022-07-25 | \n",
" 2024-09-07 | \n",
" Married | \n",
" 0 | \n",
" SLTA | \n",
" 13.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.785714 | \n",
" 1 | \n",
" 1 | \n",
" 1374872.0 | \n",
" 1 | \n",
" 1.374872e+06 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.107099 | \n",
"
\n",
" \n",
" 30 | \n",
" EM0626 | \n",
" Kota Depok | \n",
" Laki-laki | \n",
" 1978-08-28 | \n",
" 2023-08-14 | \n",
" 2024-09-21 | \n",
" Married | \n",
" 2 | \n",
" SLTA | \n",
" 10.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.181818 | \n",
" 3 | \n",
" 1 | \n",
" 2436465.0 | \n",
" 1 | \n",
" 2.436465e+06 | \n",
" 3.0 | \n",
" Medium | \n",
" 9.250325 | \n",
"
\n",
" \n",
" 31 | \n",
" EM0638 | \n",
" Kabupaten Bogor | \n",
" Laki-laki | \n",
" 1991-06-05 | \n",
" 2023-09-11 | \n",
" 2024-10-02 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 10.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.090909 | \n",
" 1 | \n",
" 1 | \n",
" 1191009.0 | \n",
" 1 | \n",
" 1.191009e+06 | \n",
" 2.2 | \n",
" Medium | \n",
" 9.134709 | \n",
"
\n",
" \n",
" 32 | \n",
" EM0640 | \n",
" Tangerang | \n",
" Laki-laki | \n",
" 1986-05-10 | \n",
" 2023-09-25 | \n",
" 2024-10-22 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 15.0 | \n",
" ... | \n",
" Mid-term | \n",
" 0.812500 | \n",
" 2 | \n",
" 1 | \n",
" 1106988.0 | \n",
" 1 | \n",
" 1.106988e+06 | \n",
" 2.2 | \n",
" Medium | \n",
" 9.220359 | \n",
"
\n",
" \n",
" 33 | \n",
" EM0722 | \n",
" Kota Jakarta Selatan | \n",
" Perempuan | \n",
" 1978-05-29 | \n",
" 2022-07-10 | \n",
" 2024-10-26 | \n",
" Married | \n",
" 3 | \n",
" D2 | \n",
" 7.0 | \n",
" ... | \n",
" Mid-term | \n",
" 3.375000 | \n",
" 4 | \n",
" 1 | \n",
" 3502617.0 | \n",
" 3 | \n",
" 1.167539e+06 | \n",
" 1.8 | \n",
" Medium | \n",
" 9.133624 | \n",
"
\n",
" \n",
" 34 | \n",
" EM0726 | \n",
" Kabupaten Bekasi | \n",
" Laki-laki | \n",
" 1980-09-08 | \n",
" 2023-07-20 | \n",
" 2024-08-31 | \n",
" Married | \n",
" 2 | \n",
" SLTA | \n",
" 11.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.083333 | \n",
" 3 | \n",
" 1 | \n",
" 1592248.0 | \n",
" 1 | \n",
" 1.592248e+06 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.668318 | \n",
"
\n",
" \n",
" 35 | \n",
" EM0728 | \n",
" Kota Bekasi | \n",
" Laki-laki | \n",
" 1983-04-20 | \n",
" 2023-07-25 | \n",
" 2024-09-11 | \n",
" Divorce | \n",
" 2 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 13.000000 | \n",
" 1 | \n",
" 1 | \n",
" 1798264.0 | \n",
" 1 | \n",
" 1.798264e+06 | \n",
" 2.0 | \n",
" Medium | \n",
" 9.720000 | \n",
"
\n",
" \n",
" 36 | \n",
" EM0730 | \n",
" Kota Jakarta Timur | \n",
" Perempuan | \n",
" 1978-08-22 | \n",
" 2023-08-03 | \n",
" 2024-09-14 | \n",
" Married | \n",
" 3 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 13.000000 | \n",
" 4 | \n",
" 1 | \n",
" 1658463.0 | \n",
" 1 | \n",
" 1.658463e+06 | \n",
" 1.6 | \n",
" Medium | \n",
" 9.150000 | \n",
"
\n",
" \n",
" 37 | \n",
" EM0732 | \n",
" Kota Jakarta Selatan | \n",
" Laki-laki | \n",
" 1981-03-17 | \n",
" 2022-08-04 | \n",
" 2024-09-13 | \n",
" Married | \n",
" 2 | \n",
" SLTA | \n",
" 8.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.777778 | \n",
" 3 | \n",
" 1 | \n",
" 1461380.0 | \n",
" 1 | \n",
" 1.461380e+06 | \n",
" 2.4 | \n",
" Medium | \n",
" 9.695420 | \n",
"
\n",
" \n",
" 38 | \n",
" EM0733 | \n",
" Kota Jakarta Utara | \n",
" Laki-laki | \n",
" 1975-05-11 | \n",
" 2023-08-04 | \n",
" 2024-09-18 | \n",
" Married | \n",
" 2 | \n",
" SLTA | \n",
" 6.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.857143 | \n",
" 3 | \n",
" 1 | \n",
" 2041027.0 | \n",
" 1 | \n",
" 2.041027e+06 | \n",
" 1.8 | \n",
" Medium | \n",
" 9.550811 | \n",
"
\n",
" \n",
" 39 | \n",
" EM0736 | \n",
" Kota Depok | \n",
" Laki-laki | \n",
" 1999-12-24 | \n",
" 2022-08-08 | \n",
" 2024-09-30 | \n",
" Single | \n",
" 0 | \n",
" D3 | \n",
" 4.0 | \n",
" ... | \n",
" Mid-term | \n",
" 5.200000 | \n",
" 1 | \n",
" 1 | \n",
" 4568518.0 | \n",
" 4 | \n",
" 1.142130e+06 | \n",
" 1.0 | \n",
" Medium | \n",
" 9.773272 | \n",
"
\n",
" \n",
" 40 | \n",
" EM0741 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1997-01-24 | \n",
" 2023-08-21 | \n",
" 2024-09-23 | \n",
" Single | \n",
" 0 | \n",
" D1 | \n",
" 6.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.857143 | \n",
" 1 | \n",
" 1 | \n",
" 3317052.0 | \n",
" 2 | \n",
" 1.658526e+06 | \n",
" 2.4 | \n",
" Medium | \n",
" 9.371734 | \n",
"
\n",
" \n",
" 41 | \n",
" EM0819 | \n",
" Kota Depok | \n",
" Laki-laki | \n",
" 1998-10-15 | \n",
" 2022-05-09 | \n",
" 2024-10-11 | \n",
" Single | \n",
" 0 | \n",
" D1 | \n",
" 5.0 | \n",
" ... | \n",
" Mid-term | \n",
" 4.833333 | \n",
" 1 | \n",
" 1 | \n",
" 3966514.0 | \n",
" 2 | \n",
" 1.983257e+06 | \n",
" 2.6 | \n",
" Medium | \n",
" 9.342076 | \n",
"
\n",
" \n",
" 42 | \n",
" EM0837 | \n",
" Kota Jakarta Selatan | \n",
" Perempuan | \n",
" 1991-10-25 | \n",
" 2023-07-03 | \n",
" 2024-09-15 | \n",
" Married | \n",
" 1 | \n",
" D1 | \n",
" 7.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.750000 | \n",
" 2 | \n",
" 1 | \n",
" 3765986.0 | \n",
" 2 | \n",
" 1.882993e+06 | \n",
" 3.0 | \n",
" Medium | \n",
" 9.327930 | \n",
"
\n",
" \n",
" 43 | \n",
" EM0845 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1998-10-17 | \n",
" 2022-07-24 | \n",
" 2024-08-23 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 14.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.666667 | \n",
" 1 | \n",
" 1 | \n",
" 1258904.0 | \n",
" 1 | \n",
" 1.258904e+06 | \n",
" 2.2 | \n",
" Medium | \n",
" 9.798676 | \n",
"
\n",
" \n",
" 44 | \n",
" EM0865 | \n",
" Kota Depok | \n",
" Laki-laki | \n",
" 1996-10-04 | \n",
" 2023-09-05 | \n",
" 2024-09-27 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 12.000000 | \n",
" 1 | \n",
" 1 | \n",
" 1126688.0 | \n",
" 1 | \n",
" 1.126688e+06 | \n",
" 1.4 | \n",
" Medium | \n",
" 9.400000 | \n",
"
\n",
" \n",
" 45 | \n",
" EM0868 | \n",
" Kabupaten Bogor | \n",
" Laki-laki | \n",
" 1977-06-10 | \n",
" 2023-09-07 | \n",
" 2024-10-26 | \n",
" Married | \n",
" 5 | \n",
" SLTA | \n",
" 0.0 | \n",
" ... | \n",
" Mid-term | \n",
" 13.000000 | \n",
" 6 | \n",
" 1 | \n",
" 1144246.0 | \n",
" 1 | \n",
" 1.144246e+06 | \n",
" 1.4 | \n",
" Medium | \n",
" 9.140000 | \n",
"
\n",
" \n",
" 46 | \n",
" EM0930 | \n",
" Kota Depok | \n",
" Laki-laki | \n",
" 1970-08-04 | \n",
" 2023-03-08 | \n",
" 2024-09-18 | \n",
" Married | \n",
" 2 | \n",
" D3 | \n",
" 2.0 | \n",
" ... | \n",
" Mid-term | \n",
" 6.000000 | \n",
" 3 | \n",
" 1 | \n",
" 3918148.0 | \n",
" 4 | \n",
" 9.795370e+05 | \n",
" 2.4 | \n",
" Medium | \n",
" 9.478291 | \n",
"
\n",
" \n",
" 47 | \n",
" EM0933 | \n",
" Kota Jakarta Timur | \n",
" Laki-laki | \n",
" 1981-10-31 | \n",
" 2022-03-20 | \n",
" 2024-09-08 | \n",
" Married | \n",
" 1 | \n",
" SLTA | \n",
" 7.0 | \n",
" ... | \n",
" Mid-term | \n",
" 3.750000 | \n",
" 2 | \n",
" 1 | \n",
" 2490863.0 | \n",
" 1 | \n",
" 2.490863e+06 | \n",
" 2.0 | \n",
" Medium | \n",
" 9.106338 | \n",
"
\n",
" \n",
" 48 | \n",
" EM0957 | \n",
" Kota Jakarta Selatan | \n",
" Perempuan | \n",
" 1998-11-24 | \n",
" 2022-07-05 | \n",
" 2024-10-31 | \n",
" Married | \n",
" 2 | \n",
" SLTA | \n",
" 10.0 | \n",
" ... | \n",
" Mid-term | \n",
" 2.545455 | \n",
" 3 | \n",
" 1 | \n",
" 2615137.0 | \n",
" 1 | \n",
" 2.615137e+06 | \n",
" 2.0 | \n",
" Medium | \n",
" 9.342793 | \n",
"
\n",
" \n",
" 49 | \n",
" EM0967 | \n",
" Kabupaten Bogor | \n",
" Laki-laki | \n",
" 1996-02-01 | \n",
" 2023-08-07 | \n",
" 2024-10-03 | \n",
" Single | \n",
" 0 | \n",
" SLTA | \n",
" 7.0 | \n",
" ... | \n",
" Mid-term | \n",
" 1.750000 | \n",
" 1 | \n",
" 1 | \n",
" 1745824.0 | \n",
" 1 | \n",
" 1.745824e+06 | \n",
" 1.4 | \n",
" Medium | \n",
" 9.208596 | \n",
"
\n",
" \n",
"
\n",
"
50 rows × 37 columns
\n",
"
"
],
"text/plain": [
" employee_id domisili jenis_kelamin date_of_birth join_date \\\n",
"0 EM0012 Tangerang Laki-laki 1970-12-21 2023-02-23 \n",
"1 EM0026 Kota Depok Laki-laki 1986-11-14 2022-04-17 \n",
"2 EM0041 Kota Jakarta Barat Laki-laki 1983-03-16 2023-06-15 \n",
"3 EM0053 Kota Jakarta Timur Laki-laki 1979-07-13 2023-07-11 \n",
"4 EM0057 Kota Jakarta Barat Perempuan 2000-03-13 2022-07-14 \n",
"5 EM0058 Tangerang Perempuan 1996-04-23 2023-07-18 \n",
"6 EM0064 Kota Jakarta Utara Perempuan 1987-04-20 2022-07-25 \n",
"7 EM0180 Kota Jakarta Utara Perempuan 2000-06-25 2022-09-04 \n",
"8 EM0259 Kepulauan Seribu Laki-laki 1993-10-04 2023-06-08 \n",
"9 EM0263 Kabupaten Bogor Laki-laki 1995-02-15 2022-06-15 \n",
"10 EM0268 Kota Jakarta Timur Laki-laki 1984-09-25 2023-07-04 \n",
"11 EM0274 Kota Bogor Laki-laki 1995-07-09 2023-07-15 \n",
"12 EM0360 Tangerang Perempuan 1979-08-13 2022-04-17 \n",
"13 EM0368 Kota Jakarta Timur Laki-laki 1979-12-25 2022-05-25 \n",
"14 EM0384 Kota Jakarta Timur Laki-laki 1976-08-11 2022-07-05 \n",
"15 EM0388 Kota Jakarta Timur Laki-laki 1970-11-15 2023-07-10 \n",
"16 EM0398 Kota Jakarta Timur Laki-laki 1999-05-03 2023-09-01 \n",
"17 EM0481 Kabupaten Bekasi Perempuan 1997-12-24 2022-06-08 \n",
"18 EM0483 Kabupaten Bogor Perempuan 1975-08-05 2023-06-14 \n",
"19 EM0491 Kota Jakarta Timur Perempuan 1969-04-10 2023-06-28 \n",
"20 EM0493 Tangerang Perempuan 1996-08-05 2023-07-04 \n",
"21 EM0499 Kota Jakarta Pusat Laki-laki 1990-10-23 2022-07-20 \n",
"22 EM0504 Kabupaten Bekasi Laki-laki 2000-04-19 2023-08-01 \n",
"23 EM0509 Kota Jakarta Timur Laki-laki 1992-11-23 2023-08-15 \n",
"24 EM0520 Kota Jakarta Timur Perempuan 2000-09-11 2022-09-12 \n",
"25 EM0590 Kota Jakarta Pusat Perempuan 1980-06-14 2023-04-13 \n",
"26 EM0597 Kabupaten Bekasi Perempuan 1980-11-30 2023-05-15 \n",
"27 EM0602 Kota Bekasi Perempuan 1990-07-28 2023-05-29 \n",
"28 EM0606 Kota Bogor Laki-laki 1987-08-01 2023-06-12 \n",
"29 EM0621 Kabupaten Bekasi Laki-laki 2000-05-14 2022-07-25 \n",
"30 EM0626 Kota Depok Laki-laki 1978-08-28 2023-08-14 \n",
"31 EM0638 Kabupaten Bogor Laki-laki 1991-06-05 2023-09-11 \n",
"32 EM0640 Tangerang Laki-laki 1986-05-10 2023-09-25 \n",
"33 EM0722 Kota Jakarta Selatan Perempuan 1978-05-29 2022-07-10 \n",
"34 EM0726 Kabupaten Bekasi Laki-laki 1980-09-08 2023-07-20 \n",
"35 EM0728 Kota Bekasi Laki-laki 1983-04-20 2023-07-25 \n",
"36 EM0730 Kota Jakarta Timur Perempuan 1978-08-22 2023-08-03 \n",
"37 EM0732 Kota Jakarta Selatan Laki-laki 1981-03-17 2022-08-04 \n",
"38 EM0733 Kota Jakarta Utara Laki-laki 1975-05-11 2023-08-04 \n",
"39 EM0736 Kota Depok Laki-laki 1999-12-24 2022-08-08 \n",
"40 EM0741 Kota Jakarta Timur Laki-laki 1997-01-24 2023-08-21 \n",
"41 EM0819 Kota Depok Laki-laki 1998-10-15 2022-05-09 \n",
"42 EM0837 Kota Jakarta Selatan Perempuan 1991-10-25 2023-07-03 \n",
"43 EM0845 Kota Jakarta Timur Laki-laki 1998-10-17 2022-07-24 \n",
"44 EM0865 Kota Depok Laki-laki 1996-10-04 2023-09-05 \n",
"45 EM0868 Kabupaten Bogor Laki-laki 1977-06-10 2023-09-07 \n",
"46 EM0930 Kota Depok Laki-laki 1970-08-04 2023-03-08 \n",
"47 EM0933 Kota Jakarta Timur Laki-laki 1981-10-31 2022-03-20 \n",
"48 EM0957 Kota Jakarta Selatan Perempuan 1998-11-24 2022-07-05 \n",
"49 EM0967 Kabupaten Bogor Laki-laki 1996-02-01 2023-08-07 \n",
"\n",
" resign_date marriage_stat dependant education absent_90D ... \\\n",
"0 2024-08-07 Married 3 D3 2.0 ... \n",
"1 2024-08-04 Married 2 SLTA 4.0 ... \n",
"2 2024-09-06 Divorce 3 SLTA 7.0 ... \n",
"3 2024-09-21 Single 0 SLTA 1.0 ... \n",
"4 2024-08-29 Single 0 D2 8.0 ... \n",
"5 2024-09-26 Single 0 SLTA 9.0 ... \n",
"6 2024-08-23 Married 3 SLTA 0.0 ... \n",
"7 2024-10-07 Single 0 D2 7.0 ... \n",
"8 2024-08-29 Single 0 D1 4.0 ... \n",
"9 2024-07-27 Married 1 SLTA 13.0 ... \n",
"10 2024-09-21 Married 5 SLTA 4.0 ... \n",
"11 2024-10-02 Married 0 D1 5.0 ... \n",
"12 2024-09-19 Married 3 D3 7.0 ... \n",
"13 2024-08-02 Married 1 D1 8.0 ... \n",
"14 2024-09-21 Married 1 D3 4.0 ... \n",
"15 2024-09-07 Married 2 SLTA 12.0 ... \n",
"16 2024-10-12 Single 0 SLTA 0.0 ... \n",
"17 2024-09-27 Single 0 SLTA 2.0 ... \n",
"18 2024-07-25 Married 1 SLTA 12.0 ... \n",
"19 2024-09-14 Married 5 SLTA 5.0 ... \n",
"20 2024-07-25 Married 1 SLTA 7.0 ... \n",
"21 2024-09-25 Married 1 SLTA 13.0 ... \n",
"22 2024-09-21 Single 0 SLTA 3.0 ... \n",
"23 2024-10-02 Married 1 SLTA 15.0 ... \n",
"24 2024-10-13 Single 0 SLTA 13.0 ... \n",
"25 2024-08-29 Married 0 D3 5.0 ... \n",
"26 2024-09-21 Married 3 D1 3.0 ... \n",
"27 2024-08-26 Single 0 D2 9.0 ... \n",
"28 2024-08-07 Divorce 0 SLTA 7.0 ... \n",
"29 2024-09-07 Married 0 SLTA 13.0 ... \n",
"30 2024-09-21 Married 2 SLTA 10.0 ... \n",
"31 2024-10-02 Single 0 SLTA 10.0 ... \n",
"32 2024-10-22 Married 1 SLTA 15.0 ... \n",
"33 2024-10-26 Married 3 D2 7.0 ... \n",
"34 2024-08-31 Married 2 SLTA 11.0 ... \n",
"35 2024-09-11 Divorce 2 SLTA 0.0 ... \n",
"36 2024-09-14 Married 3 SLTA 0.0 ... \n",
"37 2024-09-13 Married 2 SLTA 8.0 ... \n",
"38 2024-09-18 Married 2 SLTA 6.0 ... \n",
"39 2024-09-30 Single 0 D3 4.0 ... \n",
"40 2024-09-23 Single 0 D1 6.0 ... \n",
"41 2024-10-11 Single 0 D1 5.0 ... \n",
"42 2024-09-15 Married 1 D1 7.0 ... \n",
"43 2024-08-23 Single 0 SLTA 14.0 ... \n",
"44 2024-09-27 Single 0 SLTA 0.0 ... \n",
"45 2024-10-26 Married 5 SLTA 0.0 ... \n",
"46 2024-09-18 Married 2 D3 2.0 ... \n",
"47 2024-09-08 Married 1 SLTA 7.0 ... \n",
"48 2024-10-31 Married 2 SLTA 10.0 ... \n",
"49 2024-10-03 Single 0 SLTA 7.0 ... \n",
"\n",
" active_work_category work_stability_score married_dependent_ratio \\\n",
"0 Mid-term 5.666667 4 \n",
"1 Mid-term 5.600000 3 \n",
"2 Mid-term 1.750000 1 \n",
"3 Mid-term 7.000000 1 \n",
"4 Mid-term 2.777778 1 \n",
"5 Mid-term 1.400000 1 \n",
"6 Mid-term 25.000000 4 \n",
"7 Mid-term 3.125000 1 \n",
"8 Mid-term 2.800000 1 \n",
"9 Mid-term 1.785714 2 \n",
"10 Mid-term 2.800000 6 \n",
"11 Mid-term 2.333333 1 \n",
"12 Mid-term 3.625000 4 \n",
"13 Mid-term 2.888889 2 \n",
"14 Mid-term 5.200000 2 \n",
"15 Mid-term 1.076923 3 \n",
"16 Mid-term 13.000000 1 \n",
"17 Mid-term 9.333333 1 \n",
"18 Mid-term 1.000000 2 \n",
"19 Mid-term 2.333333 6 \n",
"20 Mid-term 1.500000 2 \n",
"21 Mid-term 1.857143 2 \n",
"22 Mid-term 3.250000 1 \n",
"23 Mid-term 0.812500 2 \n",
"24 Mid-term 1.785714 1 \n",
"25 Mid-term 2.666667 1 \n",
"26 Mid-term 4.000000 4 \n",
"27 Mid-term 1.500000 1 \n",
"28 Mid-term 1.750000 1 \n",
"29 Mid-term 1.785714 1 \n",
"30 Mid-term 1.181818 3 \n",
"31 Mid-term 1.090909 1 \n",
"32 Mid-term 0.812500 2 \n",
"33 Mid-term 3.375000 4 \n",
"34 Mid-term 1.083333 3 \n",
"35 Mid-term 13.000000 1 \n",
"36 Mid-term 13.000000 4 \n",
"37 Mid-term 2.777778 3 \n",
"38 Mid-term 1.857143 3 \n",
"39 Mid-term 5.200000 1 \n",
"40 Mid-term 1.857143 1 \n",
"41 Mid-term 4.833333 1 \n",
"42 Mid-term 1.750000 2 \n",
"43 Mid-term 1.666667 1 \n",
"44 Mid-term 12.000000 1 \n",
"45 Mid-term 13.000000 6 \n",
"46 Mid-term 6.000000 3 \n",
"47 Mid-term 3.750000 2 \n",
"48 Mid-term 2.545455 3 \n",
"49 Mid-term 1.750000 1 \n",
"\n",
" position_score job_income_position_score education_score \\\n",
"0 1 4708861.0 4 \n",
"1 1 1430853.0 1 \n",
"2 1 1379381.0 1 \n",
"3 1 1911583.0 1 \n",
"4 1 3724157.0 3 \n",
"5 1 2229928.0 1 \n",
"6 1 1257855.0 1 \n",
"7 1 3034058.0 3 \n",
"8 1 4513378.0 2 \n",
"9 1 1599099.0 1 \n",
"10 1 2869178.0 1 \n",
"11 1 3040879.0 2 \n",
"12 1 4658718.0 4 \n",
"13 1 3326206.0 2 \n",
"14 1 3215076.0 4 \n",
"15 1 1178459.0 1 \n",
"16 1 1527441.0 1 \n",
"17 1 2890639.0 1 \n",
"18 1 1193560.0 1 \n",
"19 1 2048458.0 1 \n",
"20 1 1267701.0 1 \n",
"21 1 1544522.0 1 \n",
"22 1 1486463.0 1 \n",
"23 1 1214155.0 1 \n",
"24 1 1098601.0 1 \n",
"25 1 4646268.0 4 \n",
"26 1 3975285.0 2 \n",
"27 1 3496995.0 3 \n",
"28 1 2928866.0 1 \n",
"29 1 1374872.0 1 \n",
"30 1 2436465.0 1 \n",
"31 1 1191009.0 1 \n",
"32 1 1106988.0 1 \n",
"33 1 3502617.0 3 \n",
"34 1 1592248.0 1 \n",
"35 1 1798264.0 1 \n",
"36 1 1658463.0 1 \n",
"37 1 1461380.0 1 \n",
"38 1 2041027.0 1 \n",
"39 1 4568518.0 4 \n",
"40 1 3317052.0 2 \n",
"41 1 3966514.0 2 \n",
"42 1 3765986.0 2 \n",
"43 1 1258904.0 1 \n",
"44 1 1126688.0 1 \n",
"45 1 1144246.0 1 \n",
"46 1 3918148.0 4 \n",
"47 1 2490863.0 1 \n",
"48 1 2615137.0 1 \n",
"49 1 1745824.0 1 \n",
"\n",
" education_income_ratio weighted_satisfaction_performance \\\n",
"0 1.177215e+06 1.4 \n",
"1 1.430853e+06 1.0 \n",
"2 1.379381e+06 2.4 \n",
"3 1.911583e+06 1.0 \n",
"4 1.241386e+06 2.0 \n",
"5 2.229928e+06 1.4 \n",
"6 1.257855e+06 2.2 \n",
"7 1.011353e+06 2.2 \n",
"8 2.256689e+06 2.6 \n",
"9 1.599099e+06 1.6 \n",
"10 2.869178e+06 1.0 \n",
"11 1.520440e+06 1.6 \n",
"12 1.164680e+06 3.0 \n",
"13 1.663103e+06 2.6 \n",
"14 8.037690e+05 2.0 \n",
"15 1.178459e+06 1.6 \n",
"16 1.527441e+06 3.0 \n",
"17 2.890639e+06 1.6 \n",
"18 1.193560e+06 2.6 \n",
"19 2.048458e+06 1.6 \n",
"20 1.267701e+06 1.4 \n",
"21 1.544522e+06 2.6 \n",
"22 1.486463e+06 1.8 \n",
"23 1.214155e+06 1.0 \n",
"24 1.098601e+06 2.6 \n",
"25 1.161567e+06 1.4 \n",
"26 1.987642e+06 1.6 \n",
"27 1.165665e+06 2.2 \n",
"28 2.928866e+06 2.4 \n",
"29 1.374872e+06 1.6 \n",
"30 2.436465e+06 3.0 \n",
"31 1.191009e+06 2.2 \n",
"32 1.106988e+06 2.2 \n",
"33 1.167539e+06 1.8 \n",
"34 1.592248e+06 1.0 \n",
"35 1.798264e+06 2.0 \n",
"36 1.658463e+06 1.6 \n",
"37 1.461380e+06 2.4 \n",
"38 2.041027e+06 1.8 \n",
"39 1.142130e+06 1.0 \n",
"40 1.658526e+06 2.4 \n",
"41 1.983257e+06 2.6 \n",
"42 1.882993e+06 3.0 \n",
"43 1.258904e+06 2.2 \n",
"44 1.126688e+06 1.4 \n",
"45 1.144246e+06 1.4 \n",
"46 9.795370e+05 2.4 \n",
"47 2.490863e+06 2.0 \n",
"48 2.615137e+06 2.0 \n",
"49 1.745824e+06 1.4 \n",
"\n",
" resign_risk_indicator adjusted_work_time \n",
"0 Medium 9.857106 \n",
"1 Medium 9.694593 \n",
"2 Medium 9.059429 \n",
"3 Medium 9.842189 \n",
"4 Medium 9.047730 \n",
"5 Medium 9.114481 \n",
"6 Medium 9.320000 \n",
"7 Medium 9.091639 \n",
"8 Medium 9.479833 \n",
"9 Medium 9.077272 \n",
"10 Medium 9.599453 \n",
"11 Medium 9.781063 \n",
"12 Medium 9.494477 \n",
"13 Medium 9.786442 \n",
"14 Medium 9.773272 \n",
"15 Medium 9.072831 \n",
"16 Medium 9.390000 \n",
"17 Medium 9.562408 \n",
"18 Medium 9.313570 \n",
"19 Medium 9.372688 \n",
"20 Medium 9.190102 \n",
"21 Medium 9.825135 \n",
"22 Medium 9.705073 \n",
"23 Medium 9.733698 \n",
"24 Medium 9.087215 \n",
"25 Medium 9.108286 \n",
"26 Medium 9.580014 \n",
"27 Medium 9.188379 \n",
"28 Medium 9.357764 \n",
"29 Medium 9.107099 \n",
"30 Medium 9.250325 \n",
"31 Medium 9.134709 \n",
"32 Medium 9.220359 \n",
"33 Medium 9.133624 \n",
"34 Medium 9.668318 \n",
"35 Medium 9.720000 \n",
"36 Medium 9.150000 \n",
"37 Medium 9.695420 \n",
"38 Medium 9.550811 \n",
"39 Medium 9.773272 \n",
"40 Medium 9.371734 \n",
"41 Medium 9.342076 \n",
"42 Medium 9.327930 \n",
"43 Medium 9.798676 \n",
"44 Medium 9.400000 \n",
"45 Medium 9.140000 \n",
"46 Medium 9.478291 \n",
"47 Medium 9.106338 \n",
"48 Medium 9.342793 \n",
"49 Medium 9.208596 \n",
"\n",
"[50 rows x 37 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df_test = pd.read_csv('D:\\Tugas Akhir\\Codingan\\Development\\Data\\data_testing_resign_6.csv')\n",
"df_test"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"employee_id 0\n",
"domisili 0\n",
"jenis_kelamin 0\n",
"date_of_birth 0\n",
"join_date 0\n",
"resign_date 0\n",
"marriage_stat 0\n",
"dependant 0\n",
"education 0\n",
"absent_90D 0\n",
"avg_time_work 0\n",
"departemen 0\n",
"position 0\n",
"income 0\n",
"total_komp 0\n",
"job_satisfaction 0\n",
"performance_rating 0\n",
"churn_status 0\n",
"age_years 0\n",
"active_work 0\n",
"active_work_months 0\n",
"income_3_months 0\n",
"income_6_months 0\n",
"total_income_work 0\n",
"absence_ratio 0\n",
"income_dependant_ratio 0\n",
"work_efficiency 0\n",
"active_work_category 0\n",
"work_stability_score 0\n",
"married_dependent_ratio 0\n",
"position_score 0\n",
"job_income_position_score 0\n",
"education_score 0\n",
"education_income_ratio 0\n",
"weighted_satisfaction_performance 0\n",
"resign_risk_indicator 0\n",
"adjusted_work_time 0\n",
"dtype: int64"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_test.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"final_model = pickle.load(open('clasification_model.sav', 'rb'))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['departemen', 'position', 'income', 'domisili', 'marriage_stat', 'dependant', 'education', 'absent_90D', 'avg_time_work', 'total_komp', 'job_satisfaction', 'performance_rating', 'age_years', 'active_work', 'active_work_months', 'income_3_months', 'income_6_months', 'total_income_work', 'absence_ratio', 'income_dependant_ratio', 'work_efficiency', 'active_work_category', 'work_stability_score', 'married_dependent_ratio', 'position_score', 'job_income_position_score', 'education_score', 'education_income_ratio', 'weighted_satisfaction_performance', 'resign_risk_indicator', 'adjusted_work_time']\n"
]
}
],
"source": [
"expected_columns = final_model.feature_names_\n",
"print(expected_columns)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 1.0\n",
"Precision: 1.0\n",
"Recall: 1.0\n",
"F1 Score: 1.0\n"
]
}
],
"source": [
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
"from catboost import Pool\n",
"\n",
"# Drop kolom yang tidak relevan\n",
"X_test = df_test.drop(['churn_status', 'employee_id', 'date_of_birth', 'join_date', 'resign_date', 'active_work_months'], axis=1)\n",
"\n",
"# Konversi semua kolom kategori ke string\n",
"cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction', \n",
" 'performance_rating', 'education', 'active_work_category', 'resign_risk_indicator', 'jenis_kelamin']\n",
"\n",
"# Pastikan semua fitur kategori adalah string\n",
"for col in cat_feature:\n",
" if col in X_test.columns:\n",
" X_test[col] = X_test[col].astype(str)\n",
"\n",
"# Buat Pool untuk data uji\n",
"test_pool = Pool(data=X_test, cat_features=cat_feature)\n",
"\n",
"# Prediksi dengan model menggunakan Pool\n",
"y_pred = final_model.predict(test_pool)\n",
"\n",
"# Evaluasi\n",
"accuracy = accuracy_score(df_test['churn_status'], y_pred)\n",
"precision = precision_score(df_test['churn_status'], y_pred, zero_division=0)\n",
"recall = recall_score(df_test['churn_status'], y_pred, zero_division=0)\n",
"f1 = f1_score(df_test['churn_status'], y_pred, zero_division=0)\n",
"\n",
"print(\"Accuracy:\", accuracy)\n",
"print(\"Precision:\", precision)\n",
"print(\"Recall:\", recall)\n",
"print(\"F1 Score:\", f1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" income | \n",
" dependant | \n",
" absent_90D | \n",
" avg_time_work | \n",
" total_komp | \n",
" churn_status | \n",
" age_years | \n",
" active_work | \n",
" active_work_months | \n",
" income_3_months | \n",
" ... | \n",
" income_dependant_ratio | \n",
" work_efficiency | \n",
" work_stability_score | \n",
" married_dependent_ratio | \n",
" position_score | \n",
" job_income_position_score | \n",
" education_score | \n",
" education_income_ratio | \n",
" weighted_satisfaction_performance | \n",
" adjusted_work_time | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 8.120000e+02 | \n",
" 812.000000 | \n",
" 812.000000 | \n",
" 812.0 | \n",
" 812.000000 | \n",
" 812.0 | \n",
" 812.000000 | \n",
" 812.000000 | \n",
" 812.000000 | \n",
" 8.120000e+02 | \n",
" ... | \n",
" 8.120000e+02 | \n",
" 812.000 | \n",
" 812.000000 | \n",
" 812.000000 | \n",
" 812.000000 | \n",
" 8.120000e+02 | \n",
" 812.000000 | \n",
" 8.120000e+02 | \n",
" 812.000000 | \n",
" 812.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 2.704077e+06 | \n",
" 1.443350 | \n",
" 5.703202 | \n",
" 9.0 | \n",
" 0.116995 | \n",
" 1.0 | \n",
" 37.488916 | \n",
" 601.076355 | \n",
" 19.556650 | \n",
" 8.112230e+06 | \n",
" ... | \n",
" 1.494665e+06 | \n",
" 1.125 | \n",
" 5.940580 | \n",
" 2.387931 | \n",
" 1.008621 | \n",
" 2.676485e+06 | \n",
" 1.838670 | \n",
" 1.650454e+06 | \n",
" 2.018966 | \n",
" 8.967059 | \n",
"
\n",
" \n",
" std | \n",
" 1.217016e+06 | \n",
" 1.272101 | \n",
" 4.420202 | \n",
" 0.0 | \n",
" 0.543866 | \n",
" 0.0 | \n",
" 8.914092 | \n",
" 195.811635 | \n",
" 6.545148 | \n",
" 3.651049e+06 | \n",
" ... | \n",
" 1.103701e+06 | \n",
" 0.000 | \n",
" 6.556615 | \n",
" 1.283876 | \n",
" 0.092504 | \n",
" 1.165877e+06 | \n",
" 1.136691 | \n",
" 5.549310e+05 | \n",
" 0.580476 | \n",
" 0.029627 | \n",
"
\n",
" \n",
" min | \n",
" 1.015570e+06 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 9.0 | \n",
" 0.000000 | \n",
" 1.0 | \n",
" 1.000000 | \n",
" 365.000000 | \n",
" 12.000000 | \n",
" 3.046710e+06 | \n",
" ... | \n",
" 1.907077e+05 | \n",
" 1.125 | \n",
" 0.705882 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.015570e+06 | \n",
" 1.000000 | \n",
" 7.524518e+05 | \n",
" 1.000000 | \n",
" 8.866790 | \n",
"
\n",
" \n",
" 25% | \n",
" 1.570747e+06 | \n",
" 0.000000 | \n",
" 2.000000 | \n",
" 9.0 | \n",
" 0.000000 | \n",
" 1.0 | \n",
" 30.000000 | \n",
" 428.000000 | \n",
" 14.000000 | \n",
" 4.712242e+06 | \n",
" ... | \n",
" 6.466449e+05 | \n",
" 1.125 | \n",
" 1.854396 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.570747e+06 | \n",
" 1.000000 | \n",
" 1.200502e+06 | \n",
" 1.600000 | \n",
" 8.950040 | \n",
"
\n",
" \n",
" 50% | \n",
" 2.561418e+06 | \n",
" 1.000000 | \n",
" 5.000000 | \n",
" 9.0 | \n",
" 0.000000 | \n",
" 1.0 | \n",
" 37.000000 | \n",
" 496.500000 | \n",
" 16.000000 | \n",
" 7.684256e+06 | \n",
" ... | \n",
" 1.168337e+06 | \n",
" 1.125 | \n",
" 3.200000 | \n",
" 2.000000 | \n",
" 1.000000 | \n",
" 2.561418e+06 | \n",
" 1.000000 | \n",
" 1.548386e+06 | \n",
" 2.000000 | \n",
" 8.973353 | \n",
"
\n",
" \n",
" 75% | \n",
" 3.692924e+06 | \n",
" 2.000000 | \n",
" 9.000000 | \n",
" 9.0 | \n",
" 0.000000 | \n",
" 1.0 | \n",
" 44.000000 | \n",
" 798.000000 | \n",
" 26.000000 | \n",
" 1.107877e+07 | \n",
" ... | \n",
" 1.879129e+06 | \n",
" 1.125 | \n",
" 7.000000 | \n",
" 3.000000 | \n",
" 1.000000 | \n",
" 3.679853e+06 | \n",
" 3.000000 | \n",
" 2.048790e+06 | \n",
" 2.400000 | \n",
" 8.991674 | \n",
"
\n",
" \n",
" max | \n",
" 7.855813e+06 | \n",
" 6.000000 | \n",
" 16.000000 | \n",
" 9.0 | \n",
" 12.000000 | \n",
" 1.0 | \n",
" 57.000000 | \n",
" 1095.000000 | \n",
" 36.000000 | \n",
" 2.356744e+07 | \n",
" ... | \n",
" 7.855813e+06 | \n",
" 1.125 | \n",
" 33.000000 | \n",
" 7.000000 | \n",
" 2.000000 | \n",
" 4.982560e+06 | \n",
" 5.000000 | \n",
" 2.996378e+06 | \n",
" 3.000000 | \n",
" 9.000000 | \n",
"
\n",
" \n",
"
\n",
"
8 rows × 23 columns
\n",
"
"
],
"text/plain": [
" income dependant absent_90D avg_time_work total_komp \\\n",
"count 8.120000e+02 812.000000 812.000000 812.0 812.000000 \n",
"mean 2.704077e+06 1.443350 5.703202 9.0 0.116995 \n",
"std 1.217016e+06 1.272101 4.420202 0.0 0.543866 \n",
"min 1.015570e+06 0.000000 0.000000 9.0 0.000000 \n",
"25% 1.570747e+06 0.000000 2.000000 9.0 0.000000 \n",
"50% 2.561418e+06 1.000000 5.000000 9.0 0.000000 \n",
"75% 3.692924e+06 2.000000 9.000000 9.0 0.000000 \n",
"max 7.855813e+06 6.000000 16.000000 9.0 12.000000 \n",
"\n",
" churn_status age_years active_work active_work_months \\\n",
"count 812.0 812.000000 812.000000 812.000000 \n",
"mean 1.0 37.488916 601.076355 19.556650 \n",
"std 0.0 8.914092 195.811635 6.545148 \n",
"min 1.0 1.000000 365.000000 12.000000 \n",
"25% 1.0 30.000000 428.000000 14.000000 \n",
"50% 1.0 37.000000 496.500000 16.000000 \n",
"75% 1.0 44.000000 798.000000 26.000000 \n",
"max 1.0 57.000000 1095.000000 36.000000 \n",
"\n",
" income_3_months ... income_dependant_ratio work_efficiency \\\n",
"count 8.120000e+02 ... 8.120000e+02 812.000 \n",
"mean 8.112230e+06 ... 1.494665e+06 1.125 \n",
"std 3.651049e+06 ... 1.103701e+06 0.000 \n",
"min 3.046710e+06 ... 1.907077e+05 1.125 \n",
"25% 4.712242e+06 ... 6.466449e+05 1.125 \n",
"50% 7.684256e+06 ... 1.168337e+06 1.125 \n",
"75% 1.107877e+07 ... 1.879129e+06 1.125 \n",
"max 2.356744e+07 ... 7.855813e+06 1.125 \n",
"\n",
" work_stability_score married_dependent_ratio position_score \\\n",
"count 812.000000 812.000000 812.000000 \n",
"mean 5.940580 2.387931 1.008621 \n",
"std 6.556615 1.283876 0.092504 \n",
"min 0.705882 1.000000 1.000000 \n",
"25% 1.854396 1.000000 1.000000 \n",
"50% 3.200000 2.000000 1.000000 \n",
"75% 7.000000 3.000000 1.000000 \n",
"max 33.000000 7.000000 2.000000 \n",
"\n",
" job_income_position_score education_score education_income_ratio \\\n",
"count 8.120000e+02 812.000000 8.120000e+02 \n",
"mean 2.676485e+06 1.838670 1.650454e+06 \n",
"std 1.165877e+06 1.136691 5.549310e+05 \n",
"min 1.015570e+06 1.000000 7.524518e+05 \n",
"25% 1.570747e+06 1.000000 1.200502e+06 \n",
"50% 2.561418e+06 1.000000 1.548386e+06 \n",
"75% 3.679853e+06 3.000000 2.048790e+06 \n",
"max 4.982560e+06 5.000000 2.996378e+06 \n",
"\n",
" weighted_satisfaction_performance adjusted_work_time \n",
"count 812.000000 812.000000 \n",
"mean 2.018966 8.967059 \n",
"std 0.580476 0.029627 \n",
"min 1.000000 8.866790 \n",
"25% 1.600000 8.950040 \n",
"50% 2.000000 8.973353 \n",
"75% 2.400000 8.991674 \n",
"max 3.000000 9.000000 \n",
"\n",
"[8 rows x 23 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_test.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Feature | \n",
" Importance | \n",
"
\n",
" \n",
" \n",
" \n",
" 21 | \n",
" active_work_category | \n",
" 54.255294 | \n",
"
\n",
" \n",
" 13 | \n",
" active_work | \n",
" 7.777737 | \n",
"
\n",
" \n",
" 1 | \n",
" position | \n",
" 7.155448 | \n",
"
\n",
" \n",
" 14 | \n",
" active_work_months | \n",
" 4.230350 | \n",
"
\n",
" \n",
" 24 | \n",
" position_score | \n",
" 3.667581 | \n",
"
\n",
" \n",
" 26 | \n",
" education_score | \n",
" 3.106486 | \n",
"
\n",
" \n",
" 16 | \n",
" income_6_months | \n",
" 3.089712 | \n",
"
\n",
" \n",
" 6 | \n",
" education | \n",
" 2.826131 | \n",
"
\n",
" \n",
" 2 | \n",
" income | \n",
" 2.385703 | \n",
"
\n",
" \n",
" 17 | \n",
" total_income_work | \n",
" 2.048091 | \n",
"
\n",
" \n",
" 15 | \n",
" income_3_months | \n",
" 1.777421 | \n",
"
\n",
" \n",
" 29 | \n",
" resign_risk_indicator | \n",
" 1.190863 | \n",
"
\n",
" \n",
" 25 | \n",
" job_income_position_score | \n",
" 1.018409 | \n",
"
\n",
" \n",
" 12 | \n",
" age_years | \n",
" 0.762253 | \n",
"
\n",
" \n",
" 19 | \n",
" income_dependant_ratio | \n",
" 0.684176 | \n",
"
\n",
" \n",
" 4 | \n",
" marriage_stat | \n",
" 0.508788 | \n",
"
\n",
" \n",
" 3 | \n",
" domisili | \n",
" 0.435528 | \n",
"
\n",
" \n",
" 23 | \n",
" married_dependent_ratio | \n",
" 0.357841 | \n",
"
\n",
" \n",
" 27 | \n",
" education_income_ratio | \n",
" 0.335609 | \n",
"
\n",
" \n",
" 22 | \n",
" work_stability_score | \n",
" 0.309915 | \n",
"
\n",
" \n",
" 30 | \n",
" adjusted_work_time | \n",
" 0.308920 | \n",
"
\n",
" \n",
" 28 | \n",
" weighted_satisfaction_performance | \n",
" 0.283635 | \n",
"
\n",
" \n",
" 18 | \n",
" absence_ratio | \n",
" 0.282780 | \n",
"
\n",
" \n",
" 0 | \n",
" departemen | \n",
" 0.259452 | \n",
"
\n",
" \n",
" 5 | \n",
" dependant | \n",
" 0.241856 | \n",
"
\n",
" \n",
" 7 | \n",
" absent_90D | \n",
" 0.223434 | \n",
"
\n",
" \n",
" 11 | \n",
" performance_rating | \n",
" 0.206559 | \n",
"
\n",
" \n",
" 10 | \n",
" job_satisfaction | \n",
" 0.148087 | \n",
"
\n",
" \n",
" 9 | \n",
" total_komp | \n",
" 0.119943 | \n",
"
\n",
" \n",
" 8 | \n",
" avg_time_work | \n",
" 0.001582 | \n",
"
\n",
" \n",
" 20 | \n",
" work_efficiency | \n",
" 0.000416 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Feature Importance\n",
"21 active_work_category 54.255294\n",
"13 active_work 7.777737\n",
"1 position 7.155448\n",
"14 active_work_months 4.230350\n",
"24 position_score 3.667581\n",
"26 education_score 3.106486\n",
"16 income_6_months 3.089712\n",
"6 education 2.826131\n",
"2 income 2.385703\n",
"17 total_income_work 2.048091\n",
"15 income_3_months 1.777421\n",
"29 resign_risk_indicator 1.190863\n",
"25 job_income_position_score 1.018409\n",
"12 age_years 0.762253\n",
"19 income_dependant_ratio 0.684176\n",
"4 marriage_stat 0.508788\n",
"3 domisili 0.435528\n",
"23 married_dependent_ratio 0.357841\n",
"27 education_income_ratio 0.335609\n",
"22 work_stability_score 0.309915\n",
"30 adjusted_work_time 0.308920\n",
"28 weighted_satisfaction_performance 0.283635\n",
"18 absence_ratio 0.282780\n",
"0 departemen 0.259452\n",
"5 dependant 0.241856\n",
"7 absent_90D 0.223434\n",
"11 performance_rating 0.206559\n",
"10 job_satisfaction 0.148087\n",
"9 total_komp 0.119943\n",
"8 avg_time_work 0.001582\n",
"20 work_efficiency 0.000416"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feature_names = X_train.columns.tolist()\n",
"feature_importance = model.get_feature_importance()\n",
"\n",
"feature_importance_df = pd.DataFrame({\n",
" 'Feature': feature_names,\n",
" 'Importance': feature_importance\n",
"}).sort_values(by='Importance', ascending=False)\n",
"feature_importance_df"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CatBoost Classification model saved to 'clasification_model.sav'\n"
]
}
],
"source": [
"import pickle\n",
"\n",
"with open('clasification_model.sav', 'wb') as f:\n",
" pickle.dump(final_model, f)\n",
"print(\"CatBoost Classification model saved to 'clasification_model.sav'\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting streamlit-option-menu\n",
" Using cached streamlit_option_menu-0.4.0-py3-none-any.whl.metadata (2.5 kB)\n",
"Collecting streamlit>=1.36 (from streamlit-option-menu)\n",
" Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n",
"Requirement already satisfied: altair<6,>=4.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (5.2.0)\n",
"Requirement already satisfied: blinker<2,>=1.0.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (1.7.0)\n",
"Requirement already satisfied: cachetools<6,>=4.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (4.2.2)\n",
"Requirement already satisfied: click<9,>=7.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (7.1.2)\n",
"Collecting numpy<3,>=1.23 (from streamlit>=1.36->streamlit-option-menu)\n",
" Downloading numpy-2.0.2-cp39-cp39-win_amd64.whl.metadata (59 kB)\n",
"Requirement already satisfied: packaging<25,>=20 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (23.1)\n",
"Requirement already satisfied: pandas<3,>=1.4.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (1.4.2)\n",
"Requirement already satisfied: pillow<12,>=7.1.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (10.0.1)\n",
"Requirement already satisfied: protobuf<6,>=3.20 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (3.20.3)\n",
"Requirement already satisfied: pyarrow>=7.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (14.0.2)\n",
"Requirement already satisfied: requests<3,>=2.27 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (2.31.0)\n",
"Requirement already satisfied: rich<14,>=10.14.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (13.7.0)\n",
"Requirement already satisfied: tenacity<10,>=8.1.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (8.2.2)\n",
"Requirement already satisfied: toml<2,>=0.10.1 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (0.10.2)\n",
"Requirement already satisfied: typing-extensions<5,>=4.3.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (4.7.1)\n",
"Requirement already satisfied: watchdog<7,>=2.1.5 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (2.1.6)\n",
"Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (3.1.41)\n",
"Requirement already satisfied: pydeck<1,>=0.8.0b4 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (0.8.1b0)\n",
"Requirement already satisfied: tornado<7,>=6.0.3 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from streamlit>=1.36->streamlit-option-menu) (6.3.3)\n",
"Requirement already satisfied: jinja2 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (3.1.2)\n",
"Requirement already satisfied: jsonschema>=3.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (3.2.0)\n",
"Requirement already satisfied: toolz in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (0.12.0)\n",
"Requirement already satisfied: gitdb<5,>=4.0.1 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit>=1.36->streamlit-option-menu) (4.0.11)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (2023.3.post1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (2.0.4)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (2.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (1.26.16)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (2024.6.2)\n",
"Requirement already satisfied: markdown-it-py>=2.2.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from rich<14,>=10.14.0->streamlit>=1.36->streamlit-option-menu) (3.0.0)\n",
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from rich<14,>=10.14.0->streamlit>=1.36->streamlit-option-menu) (2.15.1)\n",
"Requirement already satisfied: smmap<6,>=3.0.1 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit>=1.36->streamlit-option-menu) (5.0.1)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from jinja2->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (2.1.1)\n",
"Requirement already satisfied: attrs>=17.4.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (23.1.0)\n",
"Requirement already satisfied: pyrsistent>=0.14.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (0.18.0)\n",
"Requirement already satisfied: setuptools in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (68.0.0)\n",
"Requirement already satisfied: six>=1.11.0 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (1.16.0)\n",
"Requirement already satisfied: mdurl~=0.1 in c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit>=1.36->streamlit-option-menu) (0.1.2)\n",
"Downloading streamlit_option_menu-0.4.0-py3-none-any.whl (829 kB)\n",
" ---------------------------------------- 0.0/829.3 kB ? eta -:--:--\n",
" ------------------------- -------------- 524.3/829.3 kB 4.2 MB/s eta 0:00:01\n",
" ---------------------------------------- 829.3/829.3 kB 3.3 MB/s eta 0:00:00\n",
"Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n",
" ---------------------------------------- 0.0/9.1 MB ? eta -:--:--\n",
" --- ------------------------------------ 0.8/9.1 MB 4.8 MB/s eta 0:00:02\n",
" ----- ---------------------------------- 1.3/9.1 MB 3.5 MB/s eta 0:00:03\n",
" -------- ------------------------------- 1.8/9.1 MB 4.0 MB/s eta 0:00:02\n",
" ------------- -------------------------- 3.1/9.1 MB 3.9 MB/s eta 0:00:02\n",
" ----------------- ---------------------- 3.9/9.1 MB 4.1 MB/s eta 0:00:02\n",
" -------------------- ------------------- 4.7/9.1 MB 3.8 MB/s eta 0:00:02\n",
" ----------------------- ---------------- 5.2/9.1 MB 3.7 MB/s eta 0:00:02\n",
" -------------------------- ------------- 6.0/9.1 MB 3.7 MB/s eta 0:00:01\n",
" ---------------------------- ----------- 6.6/9.1 MB 3.5 MB/s eta 0:00:01\n",
" --------------------------------- ------ 7.6/9.1 MB 3.6 MB/s eta 0:00:01\n",
" -------------------------------------- - 8.7/9.1 MB 3.7 MB/s eta 0:00:01\n",
" ---------------------------------------- 9.1/9.1 MB 3.7 MB/s eta 0:00:00\n",
"Downloading numpy-2.0.2-cp39-cp39-win_amd64.whl (15.9 MB)\n",
" ---------------------------------------- 0.0/15.9 MB ? eta -:--:--\n",
" - -------------------------------------- 0.5/15.9 MB 3.3 MB/s eta 0:00:05\n",
" ---- ----------------------------------- 1.8/15.9 MB 5.0 MB/s eta 0:00:03\n",
" ------- -------------------------------- 3.1/15.9 MB 5.4 MB/s eta 0:00:03\n",
" ---------- ----------------------------- 4.2/15.9 MB 5.5 MB/s eta 0:00:03\n",
" ------------- -------------------------- 5.5/15.9 MB 5.6 MB/s eta 0:00:02\n",
" ---------------- ----------------------- 6.6/15.9 MB 5.6 MB/s eta 0:00:02\n",
" ------------------- -------------------- 7.9/15.9 MB 5.5 MB/s eta 0:00:02\n",
" --------------------- ------------------ 8.7/15.9 MB 5.3 MB/s eta 0:00:02\n",
" ------------------------ --------------- 9.7/15.9 MB 5.2 MB/s eta 0:00:02\n",
" --------------------------- ------------ 11.0/15.9 MB 5.3 MB/s eta 0:00:01\n",
" ----------------------------- ---------- 11.8/15.9 MB 5.1 MB/s eta 0:00:01\n",
" ------------------------------ --------- 12.3/15.9 MB 4.9 MB/s eta 0:00:01\n",
" --------------------------------- ------ 13.4/15.9 MB 4.9 MB/s eta 0:00:01\n",
" ---------------------------------- ----- 13.9/15.9 MB 4.8 MB/s eta 0:00:01\n",
" -------------------------------------- - 15.2/15.9 MB 4.8 MB/s eta 0:00:01\n",
" ---------------------------------------- 15.9/15.9 MB 4.7 MB/s eta 0:00:00\n",
"Installing collected packages: numpy, streamlit, streamlit-option-menu\n",
" Attempting uninstall: numpy\n",
" Found existing installation: numpy 1.22.4\n",
" Uninstalling numpy-1.22.4:\n",
" Successfully uninstalled numpy-1.22.4\n",
" Attempting uninstall: streamlit\n",
" Found existing installation: streamlit 1.31.0\n",
" Uninstalling streamlit-1.31.0:\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING: Ignoring invalid distribution -pencv-python (c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages)\n",
"WARNING: Ignoring invalid distribution -treamlit (c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages)\n",
"WARNING: Ignoring invalid distribution -pencv-python (c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages)\n",
"WARNING: Ignoring invalid distribution -treamlit (c:\\users\\jesselyn mu\\anaconda3\\lib\\site-packages)\n",
" WARNING: Failed to remove contents in a temporary directory 'C:\\Users\\Jesselyn Mu\\anaconda3\\Lib\\site-packages\\~umpy'.\n",
" You can safely remove it manually.\n",
"ERROR: Could not install packages due to an OSError: [WinError 32] The process cannot access the file because it is being used by another process: 'c:\\\\users\\\\jesselyn mu\\\\anaconda3\\\\scripts\\\\streamlit.exe'\n",
"Consider using the `--user` option or check the permissions.\n",
"\n",
"\n",
"[notice] A new release of pip is available: 23.2.1 -> 24.3.1\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
}
],
"source": [
"%pip install streamlit-option-menu"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.16"
}
},
"nbformat": 4,
"nbformat_minor": 2
}