Wednesday, January 21, 2026

A simple example for embedding model

from sentence_transformers import SentenceTransformer, losses

from torch.utils.data import DataLoader


def prepare_training_data():

    """Domain-specific query-document pairs"""

    return [

        ("What is EBITDA?", "EBITDA (Earnings Before Interest, Taxes..."),

        ("Explain capital expenditure", "Capital expenditure (CapEx) refers to..."),

        # ... thousands more pairs

    ]

def fine_tune_model():

    """Fine-tune on domain data"""

    # Load base model

    model = SentenceTransformer('all-MiniLM-L6-v2')

  

    # Prepare training data

    train_examples = prepare_training_data()

    train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

  

    # Define loss function

    train_loss = losses.MultipleNegativesRankingLoss(model)

  

    # Train

    model.fit(

        train_objectives=[(train_dataloader, train_loss)],

        epochs=3,

        warmup_steps=100

    )

  

    model.save('./fine_tuned_financial_model')

    return model

# Use fine-tuned model

embedding_model = SentenceTransformer('./fine_tuned_financial_model')


No comments:

Post a Comment