first commit

This commit is contained in:
Jesselyn Mu
2025-07-11 09:05:19 +07:00
parent 1506c2d72c
commit 572cd4c572
22 changed files with 4533 additions and 2226 deletions

View File

@ -13,7 +13,7 @@ import streamlit.components.v1 as components
import time
import json
reg_model = pickle.load(open('regression_model_final.sav', 'rb'))
reg_model = pickle.load(open('regression_model_final_1year.sav', 'rb'))
class_model = pickle.load(open('clasification_final_model_smote.sav', 'rb'))
train_file_path = 'X_train.csv'
@ -81,34 +81,6 @@ def process_employee_data(df):
df["income_dependant_ratio"] = df["income"] / (df["dependant"] + 1)
df["work_efficiency"] = df["avg_time_work"] / 8
def categorize_work_duration_months(months):
if months < 12:
return "Short-term"
elif 12 <= months <= 36:
return "Mid-term"
else:
return "Long-term"
df['active_work_category'] = df['active_work_months'].apply(categorize_work_duration_months)
# Work Stability Score
df['work_stability_score'] = df['active_work_months'] / (df['absent_90D'] + 1)
# Job Income to Position Score
position_score_mapping = {'Junior': 2, 'Staff': 1, 'Senior': 3, 'Manager': 4}
df['position_score'] = df['position'].map(position_score_mapping)
df['job_income_position_score'] = df['income'] / df['position_score']
# Education-Adjusted Income
education_score_mapping = {'SLTA': 1, 'D1': 2, 'D2': 3, 'D3': 4, 'S1': 5, 'S2': 6, 'S3': 7}
df['education_score'] = df['education'].map(education_score_mapping)
df['education_income_ratio'] = df['income'] / df['education_score']
# Weighted Satisfaction-Performance Score
df['weighted_satisfaction_performance'] = (
0.6 * df['job_satisfaction'] + 0.4 * df['performance_rating']
)
job_satisfaction_mapping = {1.0: 'Low', 2.0: 'Medium', 3.0: 'High', 4.0: 'Very High'}
df['job_satisfaction'] = df['job_satisfaction'].map(job_satisfaction_mapping)
@ -383,7 +355,7 @@ def show_prediction():
# Kolom kategori
cat_feature = ['departemen', 'position', 'domisili', 'marriage_stat', 'job_satisfaction',
'performance_rating', 'education', 'active_work_category', 'jenis_kelamin']
'performance_rating', 'education', 'jenis_kelamin']
X_test_class = df[expected_columns_class]
X_test_reg = df[expected_columns_reg]