{
  'label': 11, # integer label corresponding to "card_arrival" intent
  'text': 'I am still waiting on my card?'
}

# Import essential libraries for data handling, visualization, and machine learning
import pandas as pd
import numpy as np
import datasets
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import seaborn as sns

# Preprocessing and metrics from sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, f1_score, classification_report


# PyTorch for neural network implementation
import torch
import torch.nn as nn
import torch.nn.functional as f
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Hugging Face tools for transformers and PEFT (LoRA)
from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments

import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

# Configure pandas to display all rows and columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Load training dataset
train_df = pd.read_csv("./datasets/banking77_train.csv")
# Display the first few rows of the training data
train_df.head()

# Load testing dataset
test_df = pd.read_csv("./datasets/banking77_test.csv")
# Display the first few rows of the testing data
test_df.head()

# Summary statistics for training set to check for anomalies
print(train_df.describe())

                                  text                  category
count                            10003                     10003
unique                           10003                        77
top     I am still waiting on my card?  card_payment_fee_charged
freq                                 1                       187

# Check for any missing values in the training set
print(train_df.isna().sum())

text        0
category    0
dtype: int64

# Summary statistics for test set to ensure consistency
print(test_df.describe())

                            text      category
count                       3080          3080
unique                      3080            77
top     How do I locate my card?  card_arrival
freq                           1            40

# Check for missing values in the test set
print(test_df.isna().sum())

text        0
category    0
dtype: int64

# Plot the distribution of categories to visualize class balance
plt.figure(figsize=(16,4))
train_df["category"].value_counts().plot(kind='bar')
plt.title("Distribution of Intent Labels")
plt.xlabel("category")
plt.ylabel("Count")
plt.show()

# Calculate text length for each query in training set
train_df["text_length"] = train_df.text.apply(len)
# Aggregate text length metrics by category
text_length_dist = train_df.groupby("category")["text_length"].agg(['count', 'min', 'median', 'max'])

# Display top 10 categories by query count
print("=" * 18 + " Word Length - Top 10 Intent Categories " + "=" * 18)
print(text_length_dist.sort_values("count", ascending=False).head(10))
print()

================== Word Length - Top 10 Intent Categories ==================
                                                  count  min  median  max
category                                                                 
card_payment_fee_charged                            187   23    53.0  213
direct_debit_payment_not_recognised                 182   17    60.0  268
balance_not_updated_after_cheque_or_cash_deposit    181   25    65.0  202
wrong_amount_of_cash_received                       180   15    54.5  254
cash_withdrawal_charge                              177   19    51.0  255
transaction_charged_twice                           175   19    51.0  339
declined_cash_withdrawal                            173   22    49.0  207
transfer_fee_charged                                172   18    55.0  409
transfer_not_received_by_recipient                  171   20    60.0  268
balance_not_updated_after_bank_transfer             171   23    58.0  202

# Display bottom 10 categories by query count
print("=" * 6 + " Word Length - Bottom 10 Intent Categories " + "=" * 6)
print(text_length_dist.sort_values("count", ascending=False).tail(10))

====== Word Length - Bottom 10 Intent Categories ======
                             count  min  median  max
category                                            
get_disposable_virtual_card     97   22    43.0  156
top_up_limits                   97   13    33.0   80
receiving_money                 95   22    49.0  103
atm_support                     87   16    35.0   72
compromised_card                86   30    61.5  321
lost_or_stolen_card             82   18    39.0  210
card_swallowed                  61   15    41.0  141
card_acceptance                 59   20    34.0   58
virtual_card_not_working        41   28    44.0  123
contactless_not_working         35   20    48.0  143

# Plot D
plt.figure(figsize=(16, 16))
sns.boxplot(data=train_df, y="category", x="text_length")
plt.suptitle('')
plt.title('Text Length Distribution by Intent')
plt.ylabel('Number of Words')
plt.tight_layout()
plt.show()

# Get the top 10 categories by frequency
top_10_intents = train_df['category'].value_counts().nlargest(10).index

fig, axes = plt.subplots(2, 5, figsize=(25, 12))
axes = axes.flatten()

for i, intent in enumerate(top_10_intents):
    # Filter and join text for the intent
    words = " ".join(train_df[train_df.category == intent]['text'])
    
    # Generate word cloud
    wordcloud = WordCloud(width=600, height=600, background_color='white', max_words=20).generate(words)
    
    # Plot on the corresponding axis
    axes[i].imshow(wordcloud, interpolation='bilinear')
    axes[i].set_title(f"Category: {intent}")
    axes[i].axis("off")

plt.tight_layout()
plt.show()

encoder = LabelEncoder()

y_train = encoder.fit_transform(train_df.category.astype(str).values)
y_test = encoder.transform(test_df.category.astype(str).values)

num_classes = len(encoder.classes_)

#Confirm 77 classes encoded
print(num_classes)

77

tfidf = TfidfVectorizer(
    lowercase=True,
    ngram_range = (1,2),
    min_df = 2,
    max_df = 0.95,
    max_features = 50000,
    strip_accents = "unicode"
)

X_train = tfidf.fit_transform(train_df["text"].astype(str).values)
X_test = tfidf.transform(test_df["text"].astype(str).values)

input_size = X_train.shape[1]
print("Input Size: ", input_size)

Input Size:  10292

class TfidDataset(Dataset):
    def __init__(self, X_sparse, y):
        self.X = X_sparse
        self.y = y

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        x = self.X[idx].toarray().astype(np.float32).squeeze(0)
        y = np.int64(self.y[idx])
        return torch.from_numpy(x), torch.tensor(y)

train_ds =TfidDataset(X_train, y_train)

test_ds = TfidDataset(X_test, y_test)

### Create DataLoaders
train_loader = DataLoader(train_ds, batch_size=256, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=256, shuffle=False)

class SimpleMLP(nn.Module):
    def __init__(self, input_size: int, hidden_size=256, output_size=77):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

from torchviz import make_dot

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = SimpleMLP(
    input_size=input_size,
    hidden_size = 256,
    output_size=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

Using device: cuda

# Set random seed
torch.manual_seed(42)

def train_model(model, train_loader, loss_fn, optimizer, num_epochs, device):
    for epoch in range(num_epochs):
        epoch_loss=0
        correct = 0
        total = 0
        num_batches = 0

        for batch_X, batch_y in train_loader:
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)

            # Forward Pass
            logits = model(batch_X)
            loss = loss_fn(logits, batch_y)

            # Backward Pass
            optimizer.zero_grad(set_to_none=True)
            loss.backward()
            optimizer.step()

            # Training predicitions
            epoch_loss += loss.item()
            num_batches += 1
            preds = logits.argmax(dim=1)
            correct += (preds == batch_y).sum().item()
            total += batch_y.size(0)

        # Metrics
        avg_loss = epoch_loss / num_batches
        accuracy = correct / total

        print(
            f"Epoch [{epoch + 1}/{num_epochs}] | "
            f"Loss: {avg_loss:.4f} | "
            f"Accuracy: {accuracy:.4f}"
        )         

#Train model
train_model(model=model,
            train_loader=train_loader,
            loss_fn=criterion,
            optimizer=optimizer,
            num_epochs=10,
            device=device)

Epoch [1/10] | Loss: 4.2509 | Accuracy: 0.0753
Epoch [2/10] | Loss: 3.3632 | Accuracy: 0.3640
Epoch [3/10] | Loss: 1.5487 | Accuracy: 0.7505
Epoch [4/10] | Loss: 0.5909 | Accuracy: 0.9001
Epoch [5/10] | Loss: 0.2978 | Accuracy: 0.9485
Epoch [6/10] | Loss: 0.1804 | Accuracy: 0.9708
Epoch [7/10] | Loss: 0.1162 | Accuracy: 0.9827
Epoch [8/10] | Loss: 0.0836 | Accuracy: 0.9906
Epoch [9/10] | Loss: 0.0615 | Accuracy: 0.9931
Epoch [10/10] | Loss: 0.0450 | Accuracy: 0.9956

def predict(model, dataloader, device):
    model.eval()

    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch_X, batch_y in dataloader:
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)

            logits = model(batch_X)
            preds = logits.argmax(dim=1)

            all_predictions.append(preds.cpu().numpy())
            all_labels.append(batch_y.cpu().numpy())

    y_pred = np.concatenate(all_predictions)
    y_true = np.concatenate(all_labels)
    return y_pred, y_true

    # Generate test predictions

mlp_preds, mlp_true = predict(
    model=model,
    dataloader=test_loader,
    device=device)

mlp_accuracy = accuracy_score(mlp_true, mlp_preds)
mlp_macro_f1 = f1_score(mlp_true, mlp_preds, average='macro')
mlp_weighted_f1 = f1_score(mlp_true, mlp_preds, average='weighted')
mlp_report = classification_report(mlp_true, mlp_preds, target_names=encoder.classes_)

print("=== Baseline MLP Performance ===")
print(f"Overall Accuracy Score: {mlp_accuracy:.4f}")
print(f"Macro F1: {mlp_macro_f1:.4f}")
print(f"Weighted F1: {mlp_weighted_f1:.4f}")
print("\nClassification report:")
print(mlp_report)

=== Baseline MLP Performance ===
Overall Accuracy Score: 0.8779
Macro F1: 0.8782
Weighted F1: 0.8782

Classification report:
                                                  precision    recall  f1-score   support

                           Refund_not_showing_up       0.93      0.95      0.94        40
                                activate_my_card       0.93      0.97      0.95        40
                                       age_limit       0.98      1.00      0.99        40
                         apple_pay_or_google_pay       1.00      0.97      0.99        40
                                     atm_support       0.88      0.93      0.90        40
                                automatic_top_up       1.00      0.90      0.95        40
         balance_not_updated_after_bank_transfer       0.69      0.68      0.68        40
balance_not_updated_after_cheque_or_cash_deposit       0.92      0.90      0.91        40
                         beneficiary_not_allowed       0.95      0.97      0.96        40
                                 cancel_transfer       0.86      0.95      0.90        40
                            card_about_to_expire       0.98      1.00      0.99        40
                                 card_acceptance       0.71      0.88      0.79        40
                                    card_arrival       0.85      0.85      0.85        40
                          card_delivery_estimate       0.92      0.82      0.87        40
                                    card_linking       0.97      0.90      0.94        40
                                card_not_working       0.63      0.90      0.74        40
                        card_payment_fee_charged       0.83      0.88      0.85        40
                     card_payment_not_recognised       0.87      0.82      0.85        40
                card_payment_wrong_exchange_rate       0.88      0.93      0.90        40
                                  card_swallowed       0.92      0.85      0.88        40
                          cash_withdrawal_charge       1.00      0.93      0.96        40
                  cash_withdrawal_not_recognised       0.88      0.93      0.90        40
                                      change_pin       0.90      0.95      0.93        40
                                compromised_card       0.86      0.78      0.82        40
                         contactless_not_working       0.76      0.70      0.73        40
                                 country_support       0.90      0.90      0.90        40
                           declined_card_payment       0.73      0.90      0.81        40
                        declined_cash_withdrawal       0.82      0.90      0.86        40
                               declined_transfer       0.96      0.62      0.76        40
             direct_debit_payment_not_recognised       0.83      0.88      0.85        40
                          disposable_card_limits       0.92      0.90      0.91        40
                           edit_personal_details       0.98      1.00      0.99        40
                                 exchange_charge       0.95      0.93      0.94        40
                                   exchange_rate       0.90      0.95      0.93        40
                                exchange_via_app       0.89      0.97      0.93        40
                       extra_charge_on_statement       0.92      0.88      0.90        40
                                 failed_transfer       0.69      0.85      0.76        40
                           fiat_currency_support       0.97      0.78      0.86        40
                     get_disposable_virtual_card       0.87      0.85      0.86        40
                               get_physical_card       0.92      0.88      0.90        40
                              getting_spare_card       0.95      0.90      0.92        40
                            getting_virtual_card       0.90      0.88      0.89        40
                             lost_or_stolen_card       0.87      0.85      0.86        40
                            lost_or_stolen_phone       0.95      0.97      0.96        40
                             order_physical_card       0.87      0.85      0.86        40
                              passcode_forgotten       1.00      0.93      0.96        40
                            pending_card_payment       0.95      0.93      0.94        40
                         pending_cash_withdrawal       1.00      0.97      0.99        40
                                  pending_top_up       0.89      0.82      0.86        40
                                pending_transfer       0.79      0.75      0.77        40
                                     pin_blocked       0.94      0.82      0.88        40
                                 receiving_money       0.88      0.93      0.90        40
                                  request_refund       1.00      0.90      0.95        40
                          reverted_card_payment?       0.85      0.97      0.91        40
                  supported_cards_and_currencies       0.78      0.97      0.87        40
                               terminate_account       0.93      0.95      0.94        40
                  top_up_by_bank_transfer_charge       0.91      0.72      0.81        40
                           top_up_by_card_charge       0.90      0.93      0.91        40
                        top_up_by_cash_or_cheque       0.94      0.80      0.86        40
                                   top_up_failed       0.70      0.93      0.80        40
                                   top_up_limits       0.91      0.97      0.94        40
                                 top_up_reverted       0.97      0.85      0.91        40
                              topping_up_by_card       0.85      0.72      0.78        40
                       transaction_charged_twice       0.93      1.00      0.96        40
                            transfer_fee_charged       0.77      0.90      0.83        40
                           transfer_into_account       0.91      0.72      0.81        40
              transfer_not_received_by_recipient       0.68      0.80      0.74        40
                                 transfer_timing       0.88      0.75      0.81        40
                       unable_to_verify_identity       0.91      0.72      0.81        40
                              verify_my_identity       0.82      0.57      0.68        40
                          verify_source_of_funds       0.87      1.00      0.93        40
                                   verify_top_up       0.97      0.93      0.95        40
                        virtual_card_not_working       0.97      0.72      0.83        40
                              visa_or_mastercard       0.97      0.93      0.95        40
                             why_verify_identity       0.61      0.95      0.75        40
                   wrong_amount_of_cash_received       0.95      0.93      0.94        40
         wrong_exchange_rate_for_cash_withdrawal       0.92      0.85      0.88        40

                                        accuracy                           0.88      3080
                                       macro avg       0.89      0.88      0.88      3080
                                    weighted avg       0.89      0.88      0.88      3080

train = pd.read_csv("datasets/banking77_train.csv")
test = pd.read_csv("datasets/banking77_test.csv")
train.head()

train = train.dropna(subset=["text", "category"]).copy()
test = test.dropna(subset=["text", "category"]).copy()

label_encoder = LabelEncoder()
train["labels"] = label_encoder.fit_transform(train["category"].astype(str))
test["labels"]  = label_encoder.transform(test["category"].astype(str))

num_classes = len(label_encoder.classes_)
print("num_classes:", num_classes)

training_set = datasets.Dataset.from_pandas(train[["text", "labels"]], preserve_index=False)
test_set = datasets.Dataset.from_pandas(test[["text", "labels"]], preserve_index=False)

num_classes: 77

tokenizer = AutoTokenizer.from_pretrained("roberta-base")

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=256)

tokenized_training_set = training_set.map(tokenize_function, batched=True)
tokenized_test_set     = test_set.map(tokenize_function, batched=True)

cols_to_keep = {"input_ids", "attention_mask", "labels"}
tokenized_training_set = tokenized_training_set.remove_columns(
    [c for c in tokenized_training_set.column_names if c not in cols_to_keep]
)
tokenized_test_set = tokenized_test_set.remove_columns(
    [c for c in tokenized_test_set.column_names if c not in cols_to_keep]
)

# Data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map: 100%|█████████████████████████████████████████████████████████████| 10003/10003 [00:00<00:00, 57454.96 examples/s]
Map: 100%|███████████████████████████████████████████████████████████████| 3080/3080 [00:00<00:00, 77901.34 examples/s]

training_args = TrainingArguments(
    output_dir="./temp_results",
    save_strategy="no",
    logging_dir="./logs",
    eval_strategy="epoch",
    logging_strategy="epoch",
    report_to="none",

    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    seed=42,
)

warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
`logging_dir` is deprecated and will be removed in v5.2. Please set `TENSORBOARD_LOGGING_DIR` instead.

torch.manual_seed(42)

model = AutoModelForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=num_classes
).to(device)

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    target_modules=["query", "key", "value"],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
model.to(device)

Loading weights: 100%|█| 197/197 [00:00<00:00, 1175.45it/s, Materializing param=roberta.encoder.layer.11.output.dense.w
RobertaForSequenceClassification LOAD REPORT from: roberta-base
Key                             | Status     | 
--------------------------------+------------+-
lm_head.layer_norm.weight       | UNEXPECTED | 
lm_head.layer_norm.bias         | UNEXPECTED | 
lm_head.dense.bias              | UNEXPECTED | 
roberta.embeddings.position_ids | UNEXPECTED | 
lm_head.dense.weight            | UNEXPECTED | 
lm_head.bias                    | UNEXPECTED | 
classifier.out_proj.weight      | MISSING    | 
classifier.out_proj.bias        | MISSING    | 
classifier.dense.weight         | MISSING    | 
classifier.dense.bias           | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.

trainable params: 2,419,277 || all params: 127,124,122 || trainable%: 1.9031

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): RobertaForSequenceClassification(
      (roberta): RobertaModel(
        (embeddings): RobertaEmbeddings(
          (word_embeddings): Embedding(50265, 768, padding_idx=1)
          (token_type_embeddings): Embedding(1, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (position_embeddings): Embedding(514, 768, padding_idx=1)
        )
        (encoder): RobertaEncoder(
          (layer): ModuleList(
            (0-11): 12 x RobertaLayer(
              (attention): RobertaAttention(
                (self): RobertaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768, out_features=32, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=32, out_features=768, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
                    (lora_embedding_B): ParameterDict()
                    (lora_magnitude_vector): ModuleDict()
                  )
                  (key): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768, out_features=32, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=32, out_features=768, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
                    (lora_embedding_B): ParameterDict()
                    (lora_magnitude_vector): ModuleDict()
                  )
                  (value): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768, out_features=32, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=32, out_features=768, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
                    (lora_embedding_B): ParameterDict()
                    (lora_magnitude_vector): ModuleDict()
                  )
                  (dropout): Dropout(p=0.1, inplace=False)
                )
                (output): RobertaSelfOutput(
                  (dense): Linear(in_features=768, out_features=768, bias=True)
                  (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
                  (dropout): Dropout(p=0.1, inplace=False)
                )
              )
              (intermediate): RobertaIntermediate(
                (dense): Linear(in_features=768, out_features=3072, bias=True)
                (intermediate_act_fn): GELUActivation()
              )
              (output): RobertaOutput(
                (dense): Linear(in_features=3072, out_features=768, bias=True)
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
          )
        )
      )
      (classifier): ModulesToSaveWrapper(
        (original_module): RobertaClassificationHead(
          (dense): Linear(in_features=768, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (out_proj): Linear(in_features=768, out_features=77, bias=True)
        )
        (modules_to_save): ModuleDict(
          (default): RobertaClassificationHead(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
            (out_proj): Linear(in_features=768, out_features=77, bias=True)
          )
        )
      )
    )
  )
)

torch.manual_seed(42)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_training_set,
    eval_dataset=tokenized_test_set,
    processing_class=tokenizer,
    data_collator=data_collator,
)

trainer.train()
eval_metrics = trainer.evaluate()
print(eval_metrics)

{'eval_loss': 0.25042590498924255, 'eval_runtime': 2.2513, 'eval_samples_per_second': 1368.102, 'eval_steps_per_second': 43.086, 'epoch': 10.0}

save_dir = "./finetuned_roberta_lora_model"

model = model.merge_and_unload()
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

Writing model shards: 100%|██████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.69it/s]

('./finetuned_roberta_lora_model\\tokenizer_config.json',
 './finetuned_roberta_lora_model\\tokenizer.json')

path = "./finetuned_roberta_lora_model"
tokenizer = AutoTokenizer.from_pretrained(path)
model = AutoModelForSequenceClassification.from_pretrained(path).to(device)
model.eval()

Loading weights: 100%|█| 201/201 [00:00<00:00, 1129.77it/s, Materializing param=roberta.encoder.layer.11.output.dense.w

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): RobertaIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): RobertaOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
  )
  (classifier): RobertaClassificationHead(
    (dense): Linear(in_features=768, out_features=768, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (out_proj): Linear(in_features=768, out_features=77, bias=True)
  )
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

prediction_args = TrainingArguments(
    output_dir="./temp_predictions", 
    per_device_eval_batch_size=32,    
    report_to="none",                  
)

predictor = Trainer(
    model=model,
    args=prediction_args, 
    processing_class=tokenizer,
    data_collator=data_collator,
)

# Generate testing predictions
roberta_pred_output = predictor.predict(tokenized_test_set)
roberta_y_pred = np.argmax(roberta_pred_output.predictions, axis=1)
roberta_y_true = roberta_pred_output.label_ids

lora_roberta_accuracy = accuracy_score(roberta_y_true, roberta_y_pred)
lora_roberta_macro_f1 = f1_score(roberta_y_true, roberta_y_pred, average='macro')
lora_roberta_weighted_f1 = f1_score(roberta_y_true, roberta_y_pred, average='weighted')
lora_roberta_report = classification_report(roberta_y_true, roberta_y_pred, target_names=label_encoder.classes_)

print("=== Finetuned RoBERTa + LoRA Performance ===")
print(f"Overall Accuracy Score: {lora_roberta_accuracy:.4f}")
print(f"Macro F1: {lora_roberta_macro_f1:.4f}")
print(f"Weighted F1: {lora_roberta_weighted_f1:.4f}")
print("\nClassification report:")
print(lora_roberta_report)

=== Finetuned RoBERTa + LoRA Performance ===
Overall Accuracy Score: 0.9367
Macro F1: 0.9367
Weighted F1: 0.9367

Classification report:
                                                  precision    recall  f1-score   support

                           Refund_not_showing_up       0.97      0.95      0.96        40
                                activate_my_card       0.97      0.97      0.97        40
                                       age_limit       1.00      1.00      1.00        40
                         apple_pay_or_google_pay       1.00      1.00      1.00        40
                                     atm_support       0.98      1.00      0.99        40
                                automatic_top_up       1.00      0.97      0.99        40
         balance_not_updated_after_bank_transfer       0.82      0.78      0.79        40
balance_not_updated_after_cheque_or_cash_deposit       1.00      0.90      0.95        40
                         beneficiary_not_allowed       0.88      0.88      0.88        40
                                 cancel_transfer       1.00      0.97      0.99        40
                            card_about_to_expire       0.97      0.97      0.97        40
                                 card_acceptance       0.97      0.93      0.95        40
                                    card_arrival       0.90      0.88      0.89        40
                          card_delivery_estimate       0.90      0.93      0.91        40
                                    card_linking       1.00      1.00      1.00        40
                                card_not_working       0.88      0.95      0.92        40
                        card_payment_fee_charged       0.88      0.95      0.92        40
                     card_payment_not_recognised       0.92      0.90      0.91        40
                card_payment_wrong_exchange_rate       0.97      0.95      0.96        40
                                  card_swallowed       0.97      0.88      0.92        40
                          cash_withdrawal_charge       0.95      0.95      0.95        40
                  cash_withdrawal_not_recognised       0.88      0.95      0.92        40
                                      change_pin       0.93      1.00      0.96        40
                                compromised_card       0.90      0.93      0.91        40
                         contactless_not_working       1.00      0.93      0.96        40
                                 country_support       0.93      1.00      0.96        40
                           declined_card_payment       0.81      0.95      0.87        40
                        declined_cash_withdrawal       0.82      1.00      0.90        40
                               declined_transfer       0.97      0.75      0.85        40
             direct_debit_payment_not_recognised       0.94      0.85      0.89        40
                          disposable_card_limits       0.93      0.93      0.93        40
                           edit_personal_details       1.00      1.00      1.00        40
                                 exchange_charge       1.00      0.90      0.95        40
                                   exchange_rate       0.91      1.00      0.95        40
                                exchange_via_app       0.91      0.97      0.94        40
                       extra_charge_on_statement       0.95      0.97      0.96        40
                                 failed_transfer       0.88      0.93      0.90        40
                           fiat_currency_support       0.90      0.93      0.91        40
                     get_disposable_virtual_card       0.94      0.85      0.89        40
                               get_physical_card       0.97      0.97      0.97        40
                              getting_spare_card       0.97      0.97      0.97        40
                            getting_virtual_card       0.83      0.97      0.90        40
                             lost_or_stolen_card       0.84      0.95      0.89        40
                            lost_or_stolen_phone       0.97      0.95      0.96        40
                             order_physical_card       0.92      0.90      0.91        40
                              passcode_forgotten       0.98      1.00      0.99        40
                            pending_card_payment       0.97      0.95      0.96        40
                         pending_cash_withdrawal       0.97      0.95      0.96        40
                                  pending_top_up       0.93      0.95      0.94        40
                                pending_transfer       0.86      0.80      0.83        40
                                     pin_blocked       0.97      0.90      0.94        40
                                 receiving_money       0.93      0.93      0.93        40
                                  request_refund       0.93      0.97      0.95        40
                          reverted_card_payment?       0.86      0.90      0.88        40
                  supported_cards_and_currencies       0.88      0.95      0.92        40
                               terminate_account       0.98      1.00      0.99        40
                  top_up_by_bank_transfer_charge       0.90      0.95      0.93        40
                           top_up_by_card_charge       0.95      0.95      0.95        40
                        top_up_by_cash_or_cheque       0.93      0.95      0.94        40
                                   top_up_failed       0.93      0.93      0.93        40
                                   top_up_limits       1.00      1.00      1.00        40
                                 top_up_reverted       0.97      0.85      0.91        40
                              topping_up_by_card       0.89      0.82      0.86        40
                       transaction_charged_twice       0.95      1.00      0.98        40
                            transfer_fee_charged       1.00      0.90      0.95        40
                           transfer_into_account       0.92      0.90      0.91        40
              transfer_not_received_by_recipient       0.82      0.90      0.86        40
                                 transfer_timing       0.86      0.90      0.88        40
                       unable_to_verify_identity       1.00      0.95      0.97        40
                              verify_my_identity       0.95      0.95      0.95        40
                          verify_source_of_funds       1.00      1.00      1.00        40
                                   verify_top_up       1.00      1.00      1.00        40
                        virtual_card_not_working       1.00      0.88      0.93        40
                              visa_or_mastercard       1.00      0.90      0.95        40
                             why_verify_identity       0.93      0.97      0.95        40
                   wrong_amount_of_cash_received       1.00      0.93      0.96        40
         wrong_exchange_rate_for_cash_withdrawal       0.97      0.95      0.96        40

                                        accuracy                           0.94      3080
                                       macro avg       0.94      0.94      0.94      3080
                                    weighted avg       0.94      0.94      0.94      3080

mlp_report_dict = classification_report(mlp_true, mlp_preds, target_names=encoder.classes_, output_dict=True)
roberta_report_dict = classification_report(roberta_y_true, roberta_y_pred, target_names=label_encoder.classes_, output_dict=True)

# Extract per-class metrics from classification reports
def extract_class_metrics(report_dict):
    class_metrics = {}
    for intent, metrics in report_dict.items():
        # Skip aggregate metrics (accuracy, macro avg, weighted avg)
        if intent not in ['accuracy', 'macro avg', 'weighted avg']:
            class_metrics[intent] = {
                'precision': metrics['precision'],
                'recall': metrics['recall'],
                'f1-score': metrics['f1-score'],
                'support': metrics['support']
            }
    return pd.DataFrame(class_metrics).T

# Create DataFrames for each model
mlp_df = extract_class_metrics(mlp_report_dict)
roberta_df = extract_class_metrics(roberta_report_dict)

# Create comparison DataFrame (optionally add recall/precision scores)
comparison_df = pd.DataFrame({
     'MLP_Precision': mlp_df['precision'],
     'RoBERTa_Precision': roberta_df['precision'],
     'MLP_Recall': mlp_df['recall'],
     'RoBERTa_Recall': roberta_df['recall'],
     'MLP_F1': mlp_df['f1-score'],
     'RoBERTa_F1': roberta_df['f1-score'],
     'Precision_Diff': roberta_df['precision'] - mlp_df['precision'],
     'Recall_Diff': roberta_df['recall'] - mlp_df['recall'],
     'F1_Diff': roberta_df['f1-score'] - mlp_df['f1-score'],
})

# Sort by F1 difference
comparison_df = comparison_df.sort_values('F1_Diff', ascending=False)
comparison_df.head(10)

print("====================== 10 Highest F1 Diff ======================")
print(comparison_df[['MLP_F1', 'RoBERTa_F1', 'F1_Diff']].head(10))
print()
print("==================== 10 Lowest F1 Diff =====================")
print(comparison_df[['MLP_F1', 'RoBERTa_F1', 'F1_Diff']].tail(10))

====================== 10 Highest F1 Diff ======================
                                      MLP_F1  RoBERTa_F1   F1_Diff
verify_my_identity                  0.676471    0.950000  0.273529
contactless_not_working             0.727273    0.961039  0.233766
why_verify_identity                 0.745098    0.951220  0.206121
card_not_working                    0.742268    0.915663  0.173395
unable_to_verify_identity           0.805556    0.974359  0.168803
card_acceptance                     0.786517    0.948718  0.162201
failed_transfer                     0.764045    0.902439  0.138394
top_up_failed                       0.795699    0.925000  0.129301
transfer_not_received_by_recipient  0.735632    0.857143  0.121511
top_up_by_bank_transfer_charge      0.805556    0.926829  0.121274

==================== 10 Lowest F1 Diff =====================
                           MLP_F1  RoBERTa_F1   F1_Diff
getting_virtual_card     0.886076    0.896552  0.010476
request_refund           0.947368    0.951220  0.003851
top_up_reverted          0.906667    0.906667  0.000000
lost_or_stolen_phone     0.962963    0.962025 -0.000938
visa_or_mastercard       0.948718    0.947368 -0.001350
cash_withdrawal_charge   0.961039    0.950000 -0.011039
card_about_to_expire     0.987654    0.975000 -0.012654
pending_cash_withdrawal  0.987342    0.962025 -0.025316
reverted_card_payment?   0.906977    0.878049 -0.028928
beneficiary_not_allowed  0.962963    0.875000 -0.087963

Epoch	Training Loss	Validation Loss
1	2.867155	0.820623
2	0.588901	0.377281
3	0.340138	0.305643
4	0.246103	0.275706
5	0.183561	0.265762
6	0.139964	0.271889
7	0.110447	0.259594
8	0.087138	0.250638
9	0.076306	0.251792
10	0.070241	0.250426

Classifying Banking Intent from Customer Queries¶

Introduction¶

Project Goals¶

Dataset¶

Import libraries and setup up workspace¶

Load and verify data¶

Distribution of class intent labels¶

Explore and visualize data¶

Word Clouds¶

Multi-Layer Perceptron (MLP)¶

Encode Class Labels with LabelEncoder¶

Extract and Vectorize Text Features with TfidVectorizer¶

Create dataset class for PyTorch processing¶

Build MLP Architecture¶

Instantiate MLP, Loss Function, and Optimizer¶

Train the MLP on the Training Set¶

Evaluate MLP on the Testing Dataset¶

Classification Report: MLP¶

Finetuning RoBERTa Transformer with LoRA¶

Encode Class Labels with `LabelEncoder`¶

Tokenization Using RoBERTa Tokenizer¶

RoBERTa Transformer Finetuning and Evaluation¶

Set Up Finetuning Configuration¶

Configure LoRA for Parameter-Efficient Finetuning¶

Finetune and Evaluate the LoRA-RoBERTa Model¶

Save Finetuned LoRA-RoBERTa¶

Load the Finetuned LoRA-RoBERTa¶

Evaluate the Finetuned LoRA-RoBERTa on the Testing Dataset¶

Classification Report: Finetuned LoRA-RoBERTa¶

Compare Performances¶

Performance Comparison: MLP and the Finetuned LoRA-RoBERTa¶

Macro F1 Scores¶

Intent-Level F1 Score Performance¶

Summary¶

	text	category
0	I am still waiting on my card?	card_arrival
1	What can I do if my card still hasn't arrived ...	card_arrival
2	I have been waiting over a week. Is the card s...	card_arrival
3	Can I track my card while it is in the process...	card_arrival
4	How do I know if I will get my card, or if it ...	card_arrival

	text	category
0	How do I locate my card?	card_arrival
1	I still have not received my new card, I order...	card_arrival
2	I ordered a card but it has not arrived. Help ...	card_arrival
3	Is there a way to know when my card will arrive?	card_arrival
4	My card has not arrived yet.	card_arrival

	MLP_Precision	RoBERTa_Precision	MLP_Recall	RoBERTa_Recall	MLP_F1	RoBERTa_F1	Precision_Diff	Recall_Diff	F1_Diff
verify_my_identity	0.821429	0.950000	0.575	0.950	0.676471	0.950000	0.128571	0.375	0.273529
contactless_not_working	0.756757	1.000000	0.700	0.925	0.727273	0.961039	0.243243	0.225	0.233766
why_verify_identity	0.612903	0.928571	0.950	0.975	0.745098	0.951220	0.315668	0.025	0.206121
card_not_working	0.631579	0.883721	0.900	0.950	0.742268	0.915663	0.252142	0.050	0.173395
unable_to_verify_identity	0.906250	1.000000	0.725	0.950	0.805556	0.974359	0.093750	0.225	0.168803
card_acceptance	0.714286	0.973684	0.875	0.925	0.786517	0.948718	0.259398	0.050	0.162201
failed_transfer	0.693878	0.880952	0.850	0.925	0.764045	0.902439	0.187075	0.075	0.138394
top_up_failed	0.698113	0.925000	0.925	0.925	0.795699	0.925000	0.226887	0.000	0.129301
transfer_not_received_by_recipient	0.680851	0.818182	0.800	0.900	0.735632	0.857143	0.137331	0.100	0.121511
top_up_by_bank_transfer_charge	0.906250	0.904762	0.725	0.950	0.805556	0.926829	-0.001488	0.225	0.121274

Model	Accuracy	Macro F1	Weighted F1
MLP (baseline)	0.8779	0.8782	0.8782
LoRA-RoBERTa	0.9367	0.9367	0.9367