334 lines
10 KiB
Python
334 lines
10 KiB
Python
#2_jurnaling.py
|
|
import streamlit as st
|
|
import numpy as np
|
|
import pickle
|
|
import tensorflow as tf
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
import gdown
|
|
import os
|
|
import re
|
|
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
|
|
from nltk.corpus import stopwords
|
|
|
|
import nltk
|
|
nltk.download('stopwords')
|
|
|
|
st.set_page_config(
|
|
page_title="Analisis Journaling",
|
|
layout="wide",
|
|
initial_sidebar_state="collapsed"
|
|
)
|
|
|
|
MODEL_URL = "https://drive.google.com/uc?id=1ArBDPUBcPMdsUzH_dKwdjVEK-DyXG9qz"
|
|
TOKENIZER_URL = "https://drive.google.com/uc?id=1YVdwW-58y1Jie01MOjkd-4nY3bWFmt0E"
|
|
MODEL_PATH = "model/Pemodelan_GRU_Valid.keras"
|
|
TOKENIZER_PATH = "model/tokenizer_Valid.pkl"
|
|
|
|
os.makedirs("model", exist_ok=True)
|
|
|
|
@st.cache_resource
|
|
def load_model():
|
|
try:
|
|
if not os.path.exists(MODEL_PATH):
|
|
st.info("Mengunduh model dari Google Drive...")
|
|
gdown.download(MODEL_URL, MODEL_PATH, quiet=False)
|
|
return tf.keras.models.load_model(MODEL_PATH)
|
|
except Exception as e:
|
|
st.error(f"Gagal memuat model: {str(e)}")
|
|
st.warning("Model tidak ditemukan. Menggunakan data demo.")
|
|
return None
|
|
|
|
@st.cache_resource
|
|
def load_tokenizer():
|
|
try:
|
|
if not os.path.exists(TOKENIZER_PATH):
|
|
st.info("Mengunduh tokenizer dari Google Drive...")
|
|
gdown.download(TOKENIZER_URL, TOKENIZER_PATH, quiet=False)
|
|
with open(TOKENIZER_PATH, "rb") as handle:
|
|
return pickle.load(handle)
|
|
except Exception as e:
|
|
st.error(f"Gagal memuat tokenizer: {str(e)}")
|
|
st.warning("Tokenizer tidak ditemukan. Menggunakan data demo.")
|
|
return None
|
|
|
|
model = load_model()
|
|
tokenizer = load_tokenizer()
|
|
|
|
MAXLEN = 14
|
|
|
|
stop_words = set(stopwords.words('indonesian'))
|
|
stemmer = StemmerFactory().create_stemmer()
|
|
|
|
st.markdown("""
|
|
<style>
|
|
.main-header {
|
|
color: white;
|
|
background-color: #1E1E5A;
|
|
padding: 1.5rem;
|
|
text-align: center;
|
|
border-radius: 10px 10px 0 0;
|
|
margin-bottom: 0;
|
|
font-size: 2rem;
|
|
font-weight: bold;
|
|
}
|
|
.sub-header {
|
|
color: white;
|
|
background-color: #1E1E5A;
|
|
font-size: 1rem;
|
|
padding: 0.7rem;
|
|
text-align: center;
|
|
border-radius: 0 0 10px 10px;
|
|
margin-bottom: 2rem;
|
|
letter-spacing: 1px;
|
|
}
|
|
.text-input-container {
|
|
background-color: #f7f7f7;
|
|
padding: 1rem;
|
|
border-radius: 10px;
|
|
margin-bottom: 1.5rem;
|
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
|
}
|
|
.stButton > button {
|
|
background-color: #007bff;
|
|
color: white;
|
|
padding: 10px 24px;
|
|
border-radius: 8px;
|
|
font-size: 1rem;
|
|
border: none;
|
|
cursor: pointer;
|
|
transition: 0.3s;
|
|
display: block;
|
|
margin: 0 auto;
|
|
width: 100%;
|
|
}
|
|
.stButton > button:hover {
|
|
background-color: #0056b3;
|
|
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
|
}
|
|
.result-container {
|
|
background-color: white;
|
|
border-radius: 10px;
|
|
padding: 1.5rem;
|
|
box-shadow: 0 3px 10px rgba(0,0,0,0.1);
|
|
margin-top: 2rem;
|
|
margin-bottom: 2rem;
|
|
animation: fadeIn 0.5s;
|
|
}
|
|
.custom-button-container {
|
|
margin-top: 20px;
|
|
margin-bottom: 20px;
|
|
}
|
|
@keyframes fadeIn {
|
|
0% { opacity: 0; }
|
|
100% { opacity: 1; }
|
|
}
|
|
.emotion-label {
|
|
font-weight: bold;
|
|
font-size: 1.1rem;
|
|
margin-bottom: 0.5rem;
|
|
}
|
|
.emotion-score {
|
|
margin-left: 10px;
|
|
color: #555;
|
|
}
|
|
.sentiment-label {
|
|
font-weight: bold;
|
|
font-size: 1.1rem;
|
|
margin-top: 1rem;
|
|
margin-bottom: 0.5rem;
|
|
}
|
|
.chart-title {
|
|
text-align: center;
|
|
font-weight: bold;
|
|
margin-bottom: 1rem;
|
|
color: #333;
|
|
}
|
|
.divider {
|
|
margin-top: 1.5rem;
|
|
margin-bottom: 1.5rem;
|
|
border-top: 1px solid #eee;
|
|
}
|
|
</style>
|
|
""", unsafe_allow_html=True)
|
|
|
|
st.markdown('<div class="main-header">Analisis Jurnaling</div>', unsafe_allow_html=True)
|
|
st.markdown('<div class="sub-header">TULISKAN EKSPRESIMU DENGAN KATA-KATA</div>', unsafe_allow_html=True)
|
|
|
|
text_input = st.text_area("", height=200, placeholder="Tuliskan isi jurnal anda di sini...")
|
|
|
|
col1, col2, col3 = st.columns([1, 2, 1])
|
|
with col2:
|
|
st.markdown('<div class="custom-button-container">', unsafe_allow_html=True)
|
|
analyze_button = st.button("Analisis Teks", key="analyze", use_container_width=True)
|
|
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
|
def clean_text(text):
|
|
text = text.lower()
|
|
text = re.sub(r'http\S+', '', text)
|
|
text = re.sub(r'[^a-zA-Z\s]', '', text)
|
|
text = re.sub(r'\s+', ' ', text).strip()
|
|
|
|
words = text.split()
|
|
words = [stemmer.stem(word) for word in words if word not in stop_words]
|
|
return ' '.join(words)
|
|
|
|
def analyze_text(text):
|
|
if model is None or tokenizer is None:
|
|
return {
|
|
"emotions": {
|
|
"marah": 0.01,
|
|
"sedih": 0.02,
|
|
"jijik": 0.048,
|
|
"takut": 0.01,
|
|
"bahagia": 0.01,
|
|
"netral": 0.945,
|
|
"terkejut": 0.005
|
|
},
|
|
"dominant_emotion": "netral",
|
|
"text": text
|
|
}
|
|
|
|
clean_text_input = clean_text(text)
|
|
|
|
text_seq = tokenizer.texts_to_sequences([clean_text_input])
|
|
|
|
if not text_seq[0]:
|
|
st.warning("Teks tidak mengandung kata yang dikenali oleh model. Coba gunakan kata-kata yang lebih umum.")
|
|
return {
|
|
"emotions": {label: 0.0 for label in ["marah", "sedih", "bahagia", "takut", "jijik", "netral", "terkejut"]},
|
|
"dominant_emotion": "tidak_dikenali",
|
|
"text": text
|
|
}
|
|
|
|
text_padded = tf.keras.preprocessing.sequence.pad_sequences(text_seq, maxlen=MAXLEN, padding='post')
|
|
|
|
prediction = model.predict(text_padded, verbose=0)
|
|
predicted_class = np.argmax(prediction, axis=1)[0]
|
|
|
|
label_mapping = {0: "marah", 1: "sedih", 2: "bahagia", 3: "takut", 4: "jijik", 5: "netral", 6: "terkejut"}
|
|
emotion_label = label_mapping[predicted_class]
|
|
|
|
emotions = {}
|
|
for i, label in label_mapping.items():
|
|
emotions[label] = float(prediction[0][i])
|
|
|
|
return {
|
|
"emotions": emotions,
|
|
"dominant_emotion": emotion_label,
|
|
"text": text
|
|
}
|
|
|
|
if analyze_button:
|
|
if text_input:
|
|
result = analyze_text(text_input)
|
|
st.session_state.text_analysis_result = result
|
|
st.rerun()
|
|
else:
|
|
st.warning("Silakan masukkan teks terlebih dahulu.")
|
|
|
|
if 'text_analysis_result' in st.session_state:
|
|
result = st.session_state.text_analysis_result
|
|
|
|
st.markdown("### Hasil:")
|
|
st.markdown("#### Emosi Yang Terdeteksi:")
|
|
|
|
emotion_colors = {
|
|
"marah": "#E53935",
|
|
"sedih": "#7986CB",
|
|
"jijik": "#8BC34A",
|
|
"takut": "#FFB74D",
|
|
"bahagia": "#4CAF50",
|
|
"netral": "#9E9E9E",
|
|
"terkejut": "#1E88E5"
|
|
}
|
|
|
|
top_emotions = sorted(result["emotions"].items(), key=lambda x: x[1], reverse=True)[:3]
|
|
|
|
for emotion, score in top_emotions:
|
|
emotion_name = emotion.capitalize()
|
|
score_percent = score * 100
|
|
color = emotion_colors.get(emotion, "#FFFFFF")
|
|
|
|
st.markdown(
|
|
f'<div class="emotion-label" style="color:{color};">{emotion_name} <span class="emotion-score">{score_percent:.1f}%</span></div>',
|
|
unsafe_allow_html=True
|
|
)
|
|
|
|
st.markdown('<div class="chart-title">Top 3 Emosi</div>', unsafe_allow_html=True)
|
|
|
|
emotions = [e[0].capitalize() for e in top_emotions]
|
|
scores = [e[1]*100 for e in top_emotions]
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 4))
|
|
|
|
colors = [emotion_colors.get(emotion.lower(), "#1E88E5") for emotion in emotions]
|
|
|
|
bars = ax.barh(emotions, scores, color=colors, height=0.5)
|
|
|
|
for bar in bars:
|
|
width = bar.get_width()
|
|
ax.text(width + 1, bar.get_y() + bar.get_height()/2, f'{width:.1f}%',
|
|
va='center', fontweight='bold')
|
|
|
|
ax.set_xlim(0, 100)
|
|
ax.set_xlabel('Confidence (%)')
|
|
ax.spines['top'].set_visible(False)
|
|
ax.spines['right'].set_visible(False)
|
|
ax.spines['bottom'].set_color('#DDDDDD')
|
|
ax.spines['left'].set_color('#DDDDDD')
|
|
ax.tick_params(bottom=False, left=False)
|
|
ax.set_axisbelow(True)
|
|
ax.grid(axis='x', linestyle='-', alpha=0.2)
|
|
|
|
st.pyplot(fig)
|
|
|
|
positive_emotions = ["bahagia", "netral", "terkejut"]
|
|
negative_emotions = ["marah", "sedih", "jijik", "takut"]
|
|
|
|
positive_score = sum(result["emotions"][e] for e in positive_emotions) * 100
|
|
negative_score = sum(result["emotions"][e] for e in negative_emotions) * 100
|
|
|
|
st.markdown('<div class="divider"></div>', unsafe_allow_html=True)
|
|
|
|
st.markdown(
|
|
f'<div class="sentiment-label" style="color:#4CAF50;">Positive Sentiment <span class="emotion-score">{positive_score:.1f}%</span></div>',
|
|
unsafe_allow_html=True
|
|
)
|
|
|
|
st.markdown(
|
|
f'<div class="sentiment-label" style="color:#E53935;">Negative Sentiment <span class="emotion-score">{negative_score:.1f}%</span></div>',
|
|
unsafe_allow_html=True
|
|
)
|
|
|
|
fig2, ax2 = plt.subplots(figsize=(10, 2))
|
|
|
|
sentiments = ["Positive", "Negative"]
|
|
sentiment_scores = [positive_score, negative_score]
|
|
sentiment_colors = ["#4CAF50", "#E53935"]
|
|
|
|
bars2 = ax2.barh(sentiments, sentiment_scores, color=sentiment_colors, height=0.5)
|
|
|
|
for bar in bars2:
|
|
width = bar.get_width()
|
|
ax2.text(width + 1, bar.get_y() + bar.get_height()/2, f'{width:.1f}%',
|
|
va='center', fontweight='bold')
|
|
|
|
ax2.set_xlim(0, 100)
|
|
ax2.set_xlabel('Sentiment Score (%)')
|
|
ax2.spines['top'].set_visible(False)
|
|
ax2.spines['right'].set_visible(False)
|
|
ax2.spines['bottom'].set_color('#DDDDDD')
|
|
ax2.spines['left'].set_color('#DDDDDD')
|
|
ax2.tick_params(bottom=False, left=False)
|
|
ax2.set_axisbelow(True)
|
|
ax2.grid(axis='x', linestyle='-', alpha=0.2)
|
|
|
|
st.pyplot(fig2)
|
|
|
|
st.markdown('<div class="custom-button-container">', unsafe_allow_html=True)
|
|
multimodal_button = st.button("Lihat Hasil Multimodal", key="multimodal", use_container_width=True)
|
|
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
|
if multimodal_button:
|
|
st.switch_page("pages/3_hasil.py") |