Nima Kamali Lassem
commited on
Commit
·
e4afdb3
1
Parent(s):
9db40e8
all
Browse files- app.py +76 -0
- not.py +86 -0
- requirements.txt +7 -0
- tokenizer_config.json +1 -0
- vocab.txt +0 -0
app.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
from transformers.models.bert import BertTokenizer
|
| 5 |
+
from transformers import TFBertModel
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from transformers import TFAutoModel
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
hist_loss= [0.1971,0.0732,0.0465,0.0319,0.0232,0.0167,0.0127,0.0094,0.0073,0.0058,0.0049,0.0042]
|
| 14 |
+
hist_acc = [0.9508,0.9811,0.9878,0.9914,0.9936,0.9954,0.9965,0.9973,0.9978,0.9983,0.9986,0.9988]
|
| 15 |
+
hist_val_acc = [0.9804,0.9891,0.9927,0.9956,0.9981,0.998,0.9991,0.9997,0.9991,0.9998,0.9998,0.9998]
|
| 16 |
+
hist_val_loss = [0.0759,0.0454,0.028,0.015,0.0063,0.0064,0.004,0.0011,0.0021,0.00064548,0.0010,0.00042896]
|
| 17 |
+
Epochs = [i for i in range(1,13)]
|
| 18 |
+
|
| 19 |
+
hist_loss[:] = [x * 100 for x in hist_loss]
|
| 20 |
+
hist_acc[:] = [x * 100 for x in hist_acc]
|
| 21 |
+
hist_val_acc[:] = [x * 100 for x in hist_val_acc]
|
| 22 |
+
hist_val_loss[:] = [x * 100 for x in hist_val_loss]
|
| 23 |
+
d = {'val_acc':hist_val_acc, 'acc':hist_acc,'loss':hist_loss, 'val_loss':hist_val_loss, 'Epochs': Epochs}
|
| 24 |
+
chart_data = pd.DataFrame(d)
|
| 25 |
+
chart_data.index = range(1,13)
|
| 26 |
+
|
| 27 |
+
@st.cache(suppress_st_warning=True, allow_output_mutation=True)
|
| 28 |
+
def load_model(show_spinner=True):
|
| 29 |
+
yorum_model = TFAutoModel.from_pretrained("NimaKL/tc32_test")
|
| 30 |
+
tokenizer = BertTokenizer.from_pretrained('NimaKL/tc32_test')
|
| 31 |
+
return yorum_model, tokenizer
|
| 32 |
+
|
| 33 |
+
st.set_page_config(layout='wide', initial_sidebar_state='expanded')
|
| 34 |
+
col1, col2= st.columns(2)
|
| 35 |
+
with col1:
|
| 36 |
+
st.title("TC32 Multi-Class Text Classification")
|
| 37 |
+
st.subheader('Model Loss and Accuracy')
|
| 38 |
+
st.area_chart(chart_data)
|
| 39 |
+
yorum_model, tokenizer = load_model()
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
with col2:
|
| 43 |
+
st.title("Sınıfı bulmak için bir şikayet girin.")
|
| 44 |
+
st.subheader("Şikayet")
|
| 45 |
+
text = st.text_area('', height=240)
|
| 46 |
+
aButton = st.button('Ara')
|
| 47 |
+
|
| 48 |
+
def prepare_data(input_text, tokenizer):
|
| 49 |
+
token = tokenizer.encode_plus(
|
| 50 |
+
input_text,
|
| 51 |
+
max_length=256,
|
| 52 |
+
truncation=True,
|
| 53 |
+
padding='max_length',
|
| 54 |
+
add_special_tokens=True,
|
| 55 |
+
return_tensors='tf'
|
| 56 |
+
)
|
| 57 |
+
return {
|
| 58 |
+
'input_ids': tf.cast(token.input_ids, tf.float64),
|
| 59 |
+
'attention_mask': tf.cast(token.attention_mask, tf.float64)
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
def make_prediction(model, processed_data, classes=['Alışveriş','Anne-Bebek','Beyaz Eşya','Bilgisayar','Cep Telefonu','Eğitim','Elektronik','Emlak ve İnşaat','Enerji','Etkinlik ve Organizasyon','Finans','Gıda','Giyim','Hizmet','İçecek','İnternet','Kamu','Kargo-Nakliyat','Kozmetik','Küçük Ev Aletleri','Medya','Mekan ve Eğlence','Mobilya - Ev Tekstili','Mücevher Saat Gözlük','Mutfak Araç Gereç','Otomotiv','Sağlık','Sigorta','Spor','Temizlik','Turizm','Ulaşım']):
|
| 63 |
+
probs = model.predict(processed_data)[0]
|
| 64 |
+
return classes[np.argmax(probs)]
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
if text or aButton:
|
| 68 |
+
with col2:
|
| 69 |
+
with st.spinner('Wait for it...'):
|
| 70 |
+
processed_data = prepare_data(text, tokenizer)
|
| 71 |
+
result = make_prediction(yorum_model, processed_data=processed_data)
|
| 72 |
+
description = '<table style="border: collapse;"><tr><div style="height: 62px;"></div></tr><tr><p style="border-width: medium; border-color: #aa5e70; border-radius: 10px;padding-top: 1px;padding-left: 20px;background:#20212a;font-family:Courier New; color: white;font-size: 36px; font-weight: boldest;">'+result+'</p></tr><table>'
|
| 73 |
+
st.markdown(description, unsafe_allow_html=True)
|
| 74 |
+
with col1:
|
| 75 |
+
st.success("Tahmin başarıyla tamamlandı!")
|
| 76 |
+
|
not.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
if not os.path.exists('variables'):
|
| 4 |
+
os.makedirs('variables')
|
| 5 |
+
shutil.move('variables.data-00000-of-00001', 'variables')
|
| 6 |
+
shutil.move('variables.index', 'variables')
|
| 7 |
+
if not os.path.exists('tokenizer'):
|
| 8 |
+
os.makedirs('tokenizer')
|
| 9 |
+
shutil.move('tokenizer_config.json', 'tokeizer')
|
| 10 |
+
shutil.move('vocab.txt', 'tokenizer')
|
| 11 |
+
|
| 12 |
+
import pandas as pd
|
| 13 |
+
import numpy as np
|
| 14 |
+
import tensorflow as tf
|
| 15 |
+
from transformers.models.bert import BertTokenizer
|
| 16 |
+
from transformers import TFBertModel
|
| 17 |
+
import streamlit as st
|
| 18 |
+
import pandas as pd
|
| 19 |
+
from transformers import TFAutoModel
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
hist_loss= [0.1971,0.0732,0.0465,0.0319,0.0232,0.0167,0.0127,0.0094,0.0073,0.0058,0.0049,0.0042]
|
| 24 |
+
hist_acc = [0.9508,0.9811,0.9878,0.9914,0.9936,0.9954,0.9965,0.9973,0.9978,0.9983,0.9986,0.9988]
|
| 25 |
+
hist_val_acc = [0.9804,0.9891,0.9927,0.9956,0.9981,0.998,0.9991,0.9997,0.9991,0.9998,0.9998,0.9998]
|
| 26 |
+
hist_val_loss = [0.0759,0.0454,0.028,0.015,0.0063,0.0064,0.004,0.0011,0.0021,0.00064548,0.0010,0.00042896]
|
| 27 |
+
Epochs = [i for i in range(1,13)]
|
| 28 |
+
|
| 29 |
+
hist_loss[:] = [x * 100 for x in hist_loss]
|
| 30 |
+
hist_acc[:] = [x * 100 for x in hist_acc]
|
| 31 |
+
hist_val_acc[:] = [x * 100 for x in hist_val_acc]
|
| 32 |
+
hist_val_loss[:] = [x * 100 for x in hist_val_loss]
|
| 33 |
+
d = {'val_acc':hist_val_acc, 'acc':hist_acc,'loss':hist_loss, 'val_loss':hist_val_loss, 'Epochs': Epochs}
|
| 34 |
+
chart_data = pd.DataFrame(d)
|
| 35 |
+
chart_data.index = range(1,13)
|
| 36 |
+
|
| 37 |
+
@st.cache(suppress_st_warning=True, allow_output_mutation=True)
|
| 38 |
+
def load_model(show_spinner=True):
|
| 39 |
+
yorum_model = tf.keras.models.load_model(os.currdir)
|
| 40 |
+
tokenizer = BertTokenizer.from_pretrained('NimaKL/TC32')
|
| 41 |
+
return yorum_model, tokenizer
|
| 42 |
+
|
| 43 |
+
st.set_page_config(layout='wide', initial_sidebar_state='expanded')
|
| 44 |
+
col1, col2= st.columns(2)
|
| 45 |
+
with col1:
|
| 46 |
+
st.title("TC32 Multi-Class Text Classification")
|
| 47 |
+
st.subheader('Model Loss and Accuracy')
|
| 48 |
+
st.area_chart(chart_data)
|
| 49 |
+
yorum_model, tokenizer = load_model()
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
with col2:
|
| 54 |
+
st.title("Sınıfı bulmak için bir şikayet girin.")
|
| 55 |
+
st.subheader("Şikayet")
|
| 56 |
+
text = st.text_area('', height=240)
|
| 57 |
+
aButton = st.button('Ara')
|
| 58 |
+
|
| 59 |
+
def prepare_data(input_text, tokenizer):
|
| 60 |
+
token = tokenizer.encode_plus(
|
| 61 |
+
input_text,
|
| 62 |
+
max_length=256,
|
| 63 |
+
truncation=True,
|
| 64 |
+
padding='max_length',
|
| 65 |
+
add_special_tokens=True,
|
| 66 |
+
return_tensors='tf'
|
| 67 |
+
)
|
| 68 |
+
return {
|
| 69 |
+
'input_ids': tf.cast(token.input_ids, tf.float64),
|
| 70 |
+
'attention_mask': tf.cast(token.attention_mask, tf.float64)
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
def make_prediction(model, processed_data, classes=['Alışveriş','Anne-Bebek','Beyaz Eşya','Bilgisayar','Cep Telefonu','Eğitim','Elektronik','Emlak ve İnşaat','Enerji','Etkinlik ve Organizasyon','Finans','Gıda','Giyim','Hizmet','İçecek','İnternet','Kamu','Kargo-Nakliyat','Kozmetik','Küçük Ev Aletleri','Medya','Mekan ve Eğlence','Mobilya - Ev Tekstili','Mücevher Saat Gözlük','Mutfak Araç Gereç','Otomotiv','Sağlık','Sigorta','Spor','Temizlik','Turizm','Ulaşım']):
|
| 74 |
+
probs = model.predict(processed_data)[0]
|
| 75 |
+
return classes[np.argmax(probs)]
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
if text or aButton:
|
| 79 |
+
with col2:
|
| 80 |
+
with st.spinner('Wait for it...'):
|
| 81 |
+
processed_data = prepare_data(text, tokenizer)
|
| 82 |
+
result = make_prediction(yorum_model, processed_data=processed_data)
|
| 83 |
+
description = '<table style="border: collapse;"><tr><div style="height: 62px;"></div></tr><tr><p style="border-width: medium; border-color: #aa5e70; border-radius: 10px;padding-top: 1px;padding-left: 20px;background:#20212a;font-family:Courier New; color: white;font-size: 36px; font-weight: boldest;">'+result+'</p></tr><table>'
|
| 84 |
+
st.markdown(description, unsafe_allow_html=True)
|
| 85 |
+
with col1:
|
| 86 |
+
st.success("Tahmin başarıyla tamamlandı!")
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy==1.23.4
|
| 2 |
+
tensorflow==2.10.0
|
| 3 |
+
transformers==4.23.1
|
| 4 |
+
pandas==1.5.1
|
| 5 |
+
streamlit==1.14.0
|
| 6 |
+
|
| 7 |
+
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"do_lower_case": false, "max_len": 512, "init_inputs": []}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|