Model Info

This is still in development stage and a stable model will be released soon

MTEB (Eng v2)

MTEB English / Models Param. Mean(Task) Mean(Type) Class. Clust. Pair Class. Rerank. Retri. STS Summ.
multilingual-e5-large-instruct 0.6B 65.53 61.21 75.54 49.89 86.24 48.74 53.47 84.72 29.89
NV-Embed-v2 7.8B 69.81 65.00 87.19 47.66 88.69 49.61 62.84 83.82 35.21
GritLM-7B 7.2B 67.07 63.22 81.25 50.82 87.29 49.59 54.95 83.03 35.65
gte-Qwen2-1.5B-instruct 1.5B 67.20 63.26 85.84 53.54 87.52 49.25 50.25 82.51 33.94
stella_en_1.5B_v5 1.5B 69.43 65.32 89.38 57.06 88.02 50.19 52.42 83.27 36.91
gte-Qwen2-7B-instruct 7.6B 70.72 65.77 88.52 58.97 85.9 50.47 58.09 82.69 35.74
gemini-embedding-exp-03-07 - 73.3 67.67 90.05 59.39 87.7 48.59 64.35 85.29 38.28
Qwen3-Embedding-0.6B 0.6B 70.70 64.88 85.76 54.05 84.37 48.18 61.83 86.57 33.43
Qwen3-Embedding-4B 4B 74.60 68.10 89.84 57.51 87.01 50.76 68.46 88.72 34.39
Qwen3-Embedding-8B 8B 75.22 68.71 90.43 58.57 87.52 51.56 69.44 88.58 34.83
Tarka-Embedding-V1-Beta 0.350M 65.30 59.63 84.89 53.26 74.17 45.75 51.80 79.04 28.50

Sentence Transformers Usage

# Requires transformers>=4.51.0
# Requires sentence-transformers>=2.7.0

from sentence_transformers import SentenceTransformer

# Load the model
model = SentenceTransformer("Tarka-Labs/Tarka-Embedding-V1-Beta")

# We recommend enabling flash_attention_2 for better acceleration and memory saving,
# together with setting `padding_side` to "left":
# model = SentenceTransformer(
#     "Tarka-Labs/Tarka-Embedding-V1-Beta",
#     model_kwargs={"attn_implementation": "flash_attention_2", "device_map": "auto"},
#     tokenizer_kwargs={"padding_side": "left"},
# )

# The queries and documents to embed
queries = [
    "What is the capital of China?",
    "Explain gravity",
]
documents = [
    "The capital of China is Beijing.",
    "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.",
]

# Encode the queries and documents. Note that queries benefit from using a prompt
# Here we use the prompt called "query" stored under `model.prompts`, but you can
# also pass your own prompt via the `prompt` argument
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)

# Compute the (cosine) similarity between the query and document embeddings
similarity = model.similarity(query_embeddings, document_embeddings)
print(similarity)

# tensor([[0.9079, 0.3945],
#       [0.3406, 0.7091]])

Transformers Usage

# Requires transformers>=4.51.0

import torch
import torch.nn.functional as F

from torch import Tensor
from transformers import AutoTokenizer, AutoModel


def last_token_pool(last_hidden_states: Tensor,
                 attention_mask: Tensor) -> Tensor:
    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
    if left_padding:
        return last_hidden_states[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden_states.shape[0]
        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]


def get_detailed_instruct(task_description: str, query: str) -> str:
    return f'Instruct: {task_description}\nQuery:{query}'

# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'

queries = [
    get_detailed_instruct(task, 'What is the capital of China?'),
    get_detailed_instruct(task, 'Explain gravity')
]
# No need to add instruction for retrieval documents
documents = [
    "The capital of China is Beijing.",
    "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun."
]
input_texts = queries + documents

tokenizer = AutoTokenizer.from_pretrained('Tarka-Labs/Tarka-Embedding-V1-Beta', padding_side='left')
model = AutoModel.from_pretrained('Tarka-Labs/Tarka-Embedding-V1-Beta')

# We recommend enabling flash_attention_2 for better acceleration and memory saving.
# model = AutoModel.from_pretrained('Tarka-Labs/Tarka-Embedding-V1-Beta', attn_implementation="flash_attention_2", torch_dtype=torch.float16).cuda()

max_length = 8192

# Tokenize the input texts
batch_dict = tokenizer(
    input_texts,
    padding=True,
    truncation=True,
    max_length=max_length,
    return_tensors="pt",
)
batch_dict.to(model.device)
outputs = model(**batch_dict)
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])

# normalize embeddings
embeddings = F.normalize(embeddings, p=2, dim=1)
scores = (embeddings[:2] @ embeddings[2:].T)
print(scores.tolist())
# [[0.8379101753234863, 0.2661917507648468], [0.20272496342658997, 0.7120912075042725]]

vLLM Usage

# Requires vllm>=0.8.5
import torch
import vllm
from vllm import LLM

def get_detailed_instruct(task_description: str, query: str) -> str:
    return f'Instruct: {task_description}\nQuery:{query}'

# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'

queries = [
    get_detailed_instruct(task, 'What is the capital of China?'),
    get_detailed_instruct(task, 'Explain gravity')
]
# No need to add instruction for retrieval documents
documents = [
    "The capital of China is Beijing.",
    "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun."
]
input_texts = queries + documents

model = LLM(model="Tarka-Labs/Tarka-Embedding-V1-Beta", task="embed")

outputs = model.embed(input_texts)
embeddings = torch.tensor([o.outputs.embedding for o in outputs])
scores = (embeddings[:2] @ embeddings[2:].T)
print(scores.tolist())
# [[0.8431930541992188, 0.2617242932319641], [0.19912847876548767, 0.6638712286949158]]
Downloads last month
20
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support