{ "cells": [ { "cell_type": "markdown", "id": "d10bfa50537af75f", "metadata": {}, "source": [ "## Experiment exp027-2\n", "xlm-roberta-large, Batch Size: 32, Learning Rate: 2e-5, Warmup Steps: 500" ] }, { "cell_type": "code", "execution_count": 51, "id": "9748a35a024779ae", "metadata": { "ExecuteTime": { "end_time": "2025-06-27T22:06:52.194727Z", "start_time": "2025-06-27T22:06:52.191088Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from transformers import (\n", " AutoTokenizer,\n", " BertForTokenClassification,\n", " AutoModelForTokenClassification\n", ")\n", "import torch\n", "import os\n", "\n", "os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = '1'" ] }, { "cell_type": "code", "execution_count": 56, "id": "4ae3d9e4c556a288", "metadata": { "ExecuteTime": { "end_time": "2025-06-27T22:07:26.334867Z", "start_time": "2025-06-27T22:07:26.325629Z" } }, "outputs": [], "source": [ "test_comments_spans = pd.read_csv(\"./submissions/task2-predicted.csv\")" ] }, { "cell_type": "code", "execution_count": 57, "id": "156c9b1c48a954b4", "metadata": { "ExecuteTime": { "end_time": "2025-06-27T22:07:30.302897Z", "start_time": "2025-06-27T22:07:30.290021Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | document | \n", "comment_id | \n", "type | \n", "start | \n", "end | \n", "
|---|---|---|---|---|---|
| 0 | \n", "NDY-004 | \n", "2 | \n", "compliment | \n", "0 | \n", "21 | \n", "
| 1 | \n", "NDY-004 | \n", "4 | \n", "affection declaration | \n", "0 | \n", "19 | \n", "
| 2 | \n", "NDY-004 | \n", "5 | \n", "affection declaration | \n", "0 | \n", "25 | \n", "
| 3 | \n", "NDY-004 | \n", "5 | \n", "affection declaration | \n", "26 | \n", "56 | \n", "
| 4 | \n", "NDY-004 | \n", "5 | \n", "positive feedback | \n", "57 | \n", "71 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 5498 | \n", "NDY-203 | \n", "526 | \n", "affection declaration | \n", "0 | \n", "17 | \n", "
| 5499 | \n", "NDY-203 | \n", "526 | \n", "positive feedback | \n", "30 | \n", "59 | \n", "
| 5500 | \n", "NDY-203 | \n", "526 | \n", "positive feedback | \n", "64 | \n", "104 | \n", "
| 5501 | \n", "NDY-203 | \n", "526 | \n", "affection declaration | \n", "105 | \n", "106 | \n", "
| 5502 | \n", "NDY-203 | \n", "526 | \n", "affection declaration | \n", "105 | \n", "114 | \n", "
5503 rows × 5 columns
\n", "| \n", " | document | \n", "comment_id | \n", "comment | \n", "predicted_labels | \n", "predicted_probs | \n", "offset_mapping | \n", "text_tokens | \n", "predicted_spans | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "NDY-004 | \n", "1 | \n", "Lol i love lochis | \n", "[0, 0, 0, 0, 0, 0, 0, 0] | \n", "[[0.99999654, 1.7456429e-07, 1.6115715e-07, 1.... | \n", "[[0, 0], [0, 1], [1, 3], [4, 5], [6, 10], [11,... | \n", "[▁L, ol, ▁i, ▁love, ▁loc, his] | \n", "[] | \n", "
| 1 | \n", "NDY-004 | \n", "2 | \n", "ihr singt voll gut :) | \n", "[0, 2, 12, 12, 12, 12, 12, 0] | \n", "[[0.9999976, 1.1218729e-07, 1.239344e-07, 1.50... | \n", "[[0, 0], [0, 3], [4, 8], [8, 9], [10, 14], [15... | \n", "[▁ihr, ▁sing, t, ▁voll, ▁gut, ▁:)] | \n", "[{'type': 'compliment', 'start': 0, 'end': 21,... | \n", "
| 2 | \n", "NDY-004 | \n", "3 | \n", "Junge fick dich | \n", "[0, 0, 0, 0, 0, 0] | \n", "[[0.9999981, 5.8623616e-08, 1.05891374e-07, 1.... | \n", "[[0, 0], [0, 4], [4, 5], [6, 10], [11, 15], [0... | \n", "[▁Jung, e, ▁fick, ▁dich] | \n", "[] | \n", "
| 3 | \n", "NDY-004 | \n", "4 | \n", "Ihr seit die besten | \n", "[0, 3, 13, 13, 13, 0] | \n", "[[0.99999774, 1.6417343e-07, 1.384722e-07, 1.1... | \n", "[[0, 0], [0, 3], [4, 8], [9, 12], [13, 19], [0... | \n", "[▁Ihr, ▁seit, ▁die, ▁besten] | \n", "[{'type': 'affection declaration', 'start': 0,... | \n", "
| 4 | \n", "NDY-004 | \n", "5 | \n", "ihr seit die ALLER besten ich finde euch soooo... | \n", "[0, 3, 13, 13, 13, 13, 13, 3, 13, 13, 13, 13, ... | \n", "[[0.99999785, 1.2960982e-07, 1.4320104e-07, 1.... | \n", "[[0, 0], [0, 3], [4, 8], [9, 12], [13, 17], [1... | \n", "[▁ihr, ▁seit, ▁die, ▁ALLE, R, ▁besten, ▁ich, ▁... | \n", "[{'type': 'affection declaration', 'start': 0,... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 9224 | \n", "NDY-203 | \n", "522 | \n", "hihi kannst du mich grüßen 💕 👋 😍 Achso wusstes... | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 11, 0, 11, 11, ... | \n", "[[0.99999774, 1.8107521e-07, 1.0220851e-07, 9.... | \n", "[[0, 0], [0, 4], [5, 11], [12, 14], [15, 19], ... | \n", "[▁hihi, ▁kannst, ▁du, ▁mich, ▁gr, üß, en, ▁, 💕... | \n", "[{'type': 'positive feedback', 'start': 27, 'e... | \n", "
| 9225 | \n", "NDY-203 | \n", "523 | \n", "#Glocke aktiviert 👑 Ich liebe deine Videos 💍 💎... | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 11, 11, 11, 11,... | \n", "[[0.9999976, 1.1908668e-07, 8.492378e-08, 6.60... | \n", "[[0, 0], [0, 1], [1, 2], [2, 6], [6, 7], [8, 1... | \n", "[▁#, G, lock, e, ▁aktiv, iert, ▁, 👑, ▁Ich, ▁li... | \n", "[{'type': 'positive feedback', 'start': 20, 'e... | \n", "
| 9226 | \n", "NDY-203 | \n", "524 | \n", "Bist die beste ❤ Bitte Grüße mich 💕 ❤ 😘 😍 | \n", "[0, 3, 13, 13, 13, 13, 0, 0, 0, 1, 1, 11, 11, ... | \n", "[[0.9999974, 2.1362885e-07, 1.2580301e-07, 9.5... | \n", "[[0, 0], [0, 3], [3, 4], [5, 8], [9, 14], [15,... | \n", "[▁Bis, t, ▁die, ▁beste, ▁❤, ▁Bitte, ▁Grüße, ▁m... | \n", "[{'type': 'affection declaration', 'start': 0,... | \n", "
| 9227 | \n", "NDY-203 | \n", "525 | \n", "Hi Bonny ❤️ War letztens auf'm Flughafen , und... | \n", "[0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0,... | \n", "[[0.99999523, 6.63842e-07, 2.0147786e-07, 1.16... | \n", "[[0, 0], [0, 2], [3, 6], [6, 8], [9, 10], [10,... | \n", "[▁Hi, ▁Bon, ny, ▁❤, ️, ▁War, ▁letzten, s, ▁auf... | \n", "[{'type': 'positive feedback', 'start': 9, 'en... | \n", "
| 9228 | \n", "NDY-203 | \n", "526 | \n", "du bist die beste ich bin neu ich hab dich sof... | \n", "[0, 3, 13, 13, 13, 0, 0, 0, 1, 11, 11, 11, 11,... | \n", "[[0.999997, 3.4811254e-07, 7.750037e-08, 7.272... | \n", "[[0, 0], [0, 2], [3, 7], [8, 11], [12, 17], [1... | \n", "[▁du, ▁bist, ▁die, ▁beste, ▁ich, ▁bin, ▁neu, ▁... | \n", "[{'type': 'affection declaration', 'start': 0,... | \n", "
9229 rows × 8 columns
\n", "