deathbyknowledge commited on
Commit
9aab04e
·
verified ·
1 Parent(s): 9bd752f

Upload tokenizer

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if messages[0]['role'] == 'system' %}
2
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
3
+ {%- else %}
4
+ {{- '<|im_start|>system\nThe assistant is AFM-4.5B, trained by Arcee AI, with 4.5 billion parameters. AFM is a deeply thoughtful, helpful assistant. The assistant is having a conversation with the user. The assistant\'s responses are calm, intelligent, and personable, always aiming to truly understand the user\'s intent. AFM thinks aloud, step by step, when solving problems or forming explanations, much like a careful, reflective thinker would. The assistant helps with sincerity and depth. If a topic invites introspection, curiosity, or broader insight, the assistant allows space for reflection — be open to nuance and complexity. The assistant is not robotic or overly formal; it speaks like a wise, thoughtful companion who cares about clarity and the human experience. If a topic is uncertain or depends on subjective interpretation, AFM explains the possibilities thoughtfully.<|im_end|>\n' }}
5
+ {%- endif %}
6
+ {%- for message in messages %}
7
+ {%- if not (message.role == 'system' and loop.first) %}
8
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
9
+ {%- endif %}
10
+ {%- endfor %}
11
+ {%- if messages[-1]['role'] != 'assistant' %}
12
+ {{- '<|im_start|>assistant\n' }}
13
+ {%- endif %}
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d48708c6021027e8fc6d5342e1498111d8e87aae8903319d3ead1fbdfc4a9125
3
+ size 17158115
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|im_start|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|im_end|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<|begin_of_text|>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "eos_token": "<|im_end|>",
47
+ "extra_special_tokens": {},
48
+ "model_input_names": [
49
+ "input_ids",
50
+ "attention_mask"
51
+ ],
52
+ "model_max_length": 65536,
53
+ "pad_token": "<|finetune_right_pad_id|>",
54
+ "tokenizer_class": "PreTrainedTokenizerFast"
55
+ }