redis
/

langcache-colbert-v1

@@ -5,7 +5,8 @@ license: apache-2.0
 tags:
 - colbert
 - PyLate
-- feature-extractiontext-classification
 - sentence-pair-classification
 - semantic-similarity
 - semantic-search
@@ -30,8 +31,8 @@ This is a [PyLate](https://github.com/lightonai/pylate) model finetuned from [li
 ### Model Description
 - **Model Type:** PyLate model
 - **Base model:** [lightonai/GTE-ModernColBERT-v1](https://huggingface.co/lightonai/GTE-ModernColBERT-v1) <!-- at revision 6605e431bed9b582d3eff7699911d2b64e8ccd3f -->
-- **Document Length:** 300 tokens
-- **Query Length:** 32 tokens
 - **Output Dimensionality:** 128 tokens
 - **Similarity Function:** MaxSim
 - **Training Dataset:**
@@ -49,7 +50,7 @@ This is a [PyLate](https://github.com/lightonai/pylate) model finetuned from [li
 ```
 ColBERT(
-  (0): Transformer({'max_seq_length': 31, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
   (1): Dense({'in_features': 768, 'out_features': 128, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity', 'use_residual': False})
 )
 ```
@@ -224,10 +225,10 @@ You can finetune this model on your own dataset.
 * Size: 1,452,533 training samples
 * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative_1</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | anchor                                                                           | positive                                                                          | negative_1                                                                        |
-  |:--------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
-  | type    | string                                                                           | string                                                                            | string                                                                            |
-  | details | <ul><li>min: 9 tokens</li><li>mean: 27.0 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 26.87 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 23.11 tokens</li><li>max: 32 tokens</li></ul> |
 * Samples:
   | anchor                                                                                                                                   | positive                                                                                                                                 | negative_1                                                                               |
   |:-----------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------|
@@ -244,10 +245,10 @@ You can finetune this model on your own dataset.
 * Size: 110,066 evaluation samples
 * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative_1</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | anchor                                                                            | positive                                                                          | negative_1                                                                        |
-  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
-  | type    | string                                                                            | string                                                                            | string                                                                            |
-  | details | <ul><li>min: 5 tokens</li><li>mean: 24.87 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 24.55 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 19.51 tokens</li><li>max: 32 tokens</li></ul> |
 * Samples:
   | anchor                                                                                        | positive                                                                                      | negative_1                                                                                     |
   |:----------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------|

 tags:
 - colbert
 - PyLate
+- feature-extraction
+- text-classification
 - sentence-pair-classification
 - semantic-similarity
 - semantic-search
 ### Model Description
 - **Model Type:** PyLate model
 - **Base model:** [lightonai/GTE-ModernColBERT-v1](https://huggingface.co/lightonai/GTE-ModernColBERT-v1) <!-- at revision 6605e431bed9b582d3eff7699911d2b64e8ccd3f -->
+- **Document Length:** 512 tokens
+- **Query Length:** 512 tokens
 - **Output Dimensionality:** 128 tokens
 - **Similarity Function:** MaxSim
 - **Training Dataset:**
 ```
 ColBERT(
+  (0): Transformer({'max_seq_length': 511, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
   (1): Dense({'in_features': 768, 'out_features': 128, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity', 'use_residual': False})
 )
 ```
 * Size: 1,452,533 training samples
 * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative_1</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                            | positive                                                                          | negative_1                                                                        |
+  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
+  | type    | string                                                                            | string                                                                            | string                                                                            |
+  | details | <ul><li>min: 9 tokens</li><li>mean: 28.67 tokens</li><li>max: 79 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 28.51 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 24.02 tokens</li><li>max: 50 tokens</li></ul> |
 * Samples:
   | anchor                                                                                                                                   | positive                                                                                                                                 | negative_1                                                                               |
   |:-----------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------|
 * Size: 110,066 evaluation samples
 * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative_1</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                             | positive                                                                           | negative_1                                                                        |
+  |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                             | string                                                                            |
+  | details | <ul><li>min: 5 tokens</li><li>mean: 26.68 tokens</li><li>max: 104 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 26.34 tokens</li><li>max: 104 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 20.39 tokens</li><li>max: 69 tokens</li></ul> |
 * Samples:
   | anchor                                                                                        | positive                                                                                      | negative_1                                                                                     |
   |:----------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------|

config_sentence_transformers.json CHANGED Viewed

@@ -12,8 +12,8 @@
   "similarity_fn_name": "MaxSim",
   "query_prefix": "[Q] ",
   "document_prefix": "[D] ",
-  "query_length": 32,
-  "document_length": 300,
   "attend_to_expansion_tokens": false,
   "skiplist_words": [
     "!",

   "similarity_fn_name": "MaxSim",
   "query_prefix": "[Q] ",
   "document_prefix": "[D] ",
+  "query_length": 512,
+  "document_length": 512,
   "attend_to_expansion_tokens": false,
   "skiplist_words": [
     "!",

sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-    "max_seq_length": 31,
     "do_lower_case": false
 }

 {
+    "max_seq_length": 511,
     "do_lower_case": false
 }

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 31,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 511,
     "strategy": "LongestFirst",
     "stride": 0
   },