diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..363208fced8415af83c73a9ba7b548ef7dfc672f --- /dev/null +++ b/config.json @@ -0,0 +1,40 @@ +{ + "architectures": [ + "Cohere2Model" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 5, + "cache_implementation": "hybrid", + "eos_token_id": 255001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 12288, + "initializer_range": 0.02, + "intermediate_size": 36864, + "layer_norm_eps": 1e-05, + "logit_scale": 0.25, + "max_position_embeddings": 262144, + "model_type": "cohere2", + "num_attention_heads": 96, + "num_hidden_layers": 64, + "num_key_value_heads": 8, + "order_of_interleaved_layers": "local_attn_first", + "pad_token_id": 0, + "position_embedding_type": "rope_gptj", + "rope_scaling": null, + "rope_theta": 50000, + "rotary_pct": 1.0, + "sliding_window": 4096, + "sliding_window_pattern": 4, + "torch_dtype": "bfloat16", + "transformers_version": "4.50.0.dev0", + "unsloth_fixed": true, + "unsloth_version": "2025.3.18", + "use_cache": true, + "use_embedding_sharing": true, + "use_gated_activation": true, + "use_parallel_block": true, + "use_parallel_embedding": true, + "vocab_size": 256000 +} \ No newline at end of file diff --git a/model-00001-of-00049.safetensors b/model-00001-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51830b008b9f67bbac9ce5fccf98fecca050cb22 --- /dev/null +++ b/model-00001-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b510a976376c61d1387063aec33104a962b451416d69d901089758bb00d9e8 +size 6291456144 diff --git a/model-00002-of-00049.safetensors b/model-00002-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b1661d425c3665d0eb5ce15feb30ee295088a39 --- /dev/null +++ b/model-00002-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee4d6108410f26afb7f9bf191c775a1c2d1802f42ac09dbe3dd705e92ff33bc +size 4932527624 diff --git a/model-00003-of-00049.safetensors b/model-00003-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a47f4016f4200d797ef67b6afc9a83be96660848 --- /dev/null +++ b/model-00003-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b945a46b4d21d462aa69d62ef3ebea5255c4f23acd0cd81689436abda8cabc69 +size 4278215728 diff --git a/model-00004-of-00049.safetensors b/model-00004-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a6e76031f24860f344bb76439bb7a41b75ac08c --- /dev/null +++ b/model-00004-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ff60fae9e444c8d13fa463ca77c27a3fb470b4b832c78bb984aa0e21df3ecbc +size 4932552312 diff --git a/model-00005-of-00049.safetensors b/model-00005-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12e70ae149334ce5ab1c02313813134d941352d7 --- /dev/null +++ b/model-00005-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d8ce0025b4d2dedb56513a20589ebb380f969d28fcb8d551183c50d54cdd23 +size 4278215728 diff --git a/model-00006-of-00049.safetensors b/model-00006-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a25b7e8e97013f7c283307361be08f678ab166fe --- /dev/null +++ b/model-00006-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c7330fbcd7b03c1dfcad709c762a20eeee1bedff8213b146d7798f745857e31 +size 4278215728 diff --git a/model-00007-of-00049.safetensors b/model-00007-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77a4a08c566234e8a52319369cd2e0bae216b6fe --- /dev/null +++ b/model-00007-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6283b23450d1ef0383a62c3b48e9ca271969a656457847d1c70f1b30c65605 +size 4932552312 diff --git a/model-00008-of-00049.safetensors b/model-00008-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..def4c7f3bc8887bc2abdad247fc5d0a0ca5d92c0 --- /dev/null +++ b/model-00008-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5944eda2cbe29452347a682cc2c4c2b7d7936ccf0ad1dea65d13bc5ef138a8 +size 4278215728 diff --git a/model-00009-of-00049.safetensors b/model-00009-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d14f4e6e887ea2777db31f334712d5fe10b670c9 --- /dev/null +++ b/model-00009-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215cafb7d6b1c87c036e5841f9a993983d1d36ea5f85eebb1f4e5eddec33ab6b +size 4278215744 diff --git a/model-00010-of-00049.safetensors b/model-00010-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2be741ac54988f67ec34d0ee833b9119be0e5e34 --- /dev/null +++ b/model-00010-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670be36e373c95efd2514c79779dc2b89bb59dc690c7c0a708611b876dc22eb6 +size 4932552328 diff --git a/model-00011-of-00049.safetensors b/model-00011-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a24491c188187069e64c303b9bdac3a410f4727b --- /dev/null +++ b/model-00011-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8962ae6eb195dbf12c5cce1e2417bb2e80e2c9cbdbc4736ea80431438903fac3 +size 4278215736 diff --git a/model-00012-of-00049.safetensors b/model-00012-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..961cd62c93140be700ef4d4ae64841aa0964f2f8 --- /dev/null +++ b/model-00012-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a75ad5cffd3818834a65c17d1faa3be77d34793c30f15ed2208435a14de439e +size 4278215736 diff --git a/model-00013-of-00049.safetensors b/model-00013-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a3181736e0683435b3452e6a69dfc0d85e301d7 --- /dev/null +++ b/model-00013-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ffdb83ffdb30ff49e59070d5b8fa5775df11cd0245bf7b3c3279b757365ee4 +size 4932552328 diff --git a/model-00014-of-00049.safetensors b/model-00014-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a9fdcd40fda130fc5ee842724668951d89bd04f --- /dev/null +++ b/model-00014-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1abf708cfda881b946ca58012d818c7b7dbde534415b2b72e4058fbe2686e19 +size 4278215736 diff --git a/model-00015-of-00049.safetensors b/model-00015-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1f81f4fb873081b7fe70b006eb410fa18e51612 --- /dev/null +++ b/model-00015-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:614c02d2f8b0d05c3d833eb8de3f93fdf5a58dcdbcb9b33853e772971db4d21d +size 4278215736 diff --git a/model-00016-of-00049.safetensors b/model-00016-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e146c891548ad4269c9cf5db141c82558109353 --- /dev/null +++ b/model-00016-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9964277a4d1d8e37cc17eac504495b454b4a1054359f3075480fade054be7e +size 4932552328 diff --git a/model-00017-of-00049.safetensors b/model-00017-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d587a3b4243d90157723dec92e7964ec72252d78 --- /dev/null +++ b/model-00017-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5b6546a58ba6b3bea16afa950fc361917195ceef0eb04c52d934cf22be9568 +size 4278215736 diff --git a/model-00018-of-00049.safetensors b/model-00018-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25d40d866bacb539bf7aeb91531ce19166761484 --- /dev/null +++ b/model-00018-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e8f133cd5293a2e7284400233cf0211aa4439e1f995162b2c0ccd003ba97164 +size 4278215736 diff --git a/model-00019-of-00049.safetensors b/model-00019-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aca011f17766f641f69bfc4c63948f616e272665 --- /dev/null +++ b/model-00019-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6246a2a5a174aa5318897d28c3fb5f1ddc29fad7f16ecea5c8bc010ab87e45da +size 4932552328 diff --git a/model-00020-of-00049.safetensors b/model-00020-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49794485533a5bc8a963adffd0679ae7514de00b --- /dev/null +++ b/model-00020-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0708c9750b66db3d61a6341612f3497df056ceef455574f144298c207799bda1 +size 4278215736 diff --git a/model-00021-of-00049.safetensors b/model-00021-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6fdbec6a50426d5d598b5968503c1c87b6dada66 --- /dev/null +++ b/model-00021-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b19914180674fbd94c5f9efb1ab4ad23b31f5946a6306059e123319f70f28d1 +size 4278215736 diff --git a/model-00022-of-00049.safetensors b/model-00022-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96715eaa3f789432594bd9f94d1c9d88d3e42c0d --- /dev/null +++ b/model-00022-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f158dc29f92ec25ac99d062a90d396459f58ce9853e70875f9084371cf10ab59 +size 4932552328 diff --git a/model-00023-of-00049.safetensors b/model-00023-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..404c4d0a510ba848c72231cf009861883cd0d7bb --- /dev/null +++ b/model-00023-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:522e659c533b0bf5fc65899fb322283bc1ce0efdc63f89a193916853a81ccb57 +size 4278215736 diff --git a/model-00024-of-00049.safetensors b/model-00024-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a76351bb79d9acd9896dd3b6d359f58ff50106a3 --- /dev/null +++ b/model-00024-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:563b8fe9be0b2a7cce3e5f814b426db4200e0ec59c110f70f4ed6da5f9e83c9f +size 4278215736 diff --git a/model-00025-of-00049.safetensors b/model-00025-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..edc906616f74d3434a1a7a13da800b82a6d78b4d --- /dev/null +++ b/model-00025-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c247318570d39ff19f9943aa2ad2bb04795744a23bd6225117444d317d2879f +size 4932552328 diff --git a/model-00026-of-00049.safetensors b/model-00026-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c43dab5f951214afa0be45c65bda1c09c66a6524 --- /dev/null +++ b/model-00026-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ac488b1c2bedf418e1a5d3ae57de524a46cd0ebaa75debd15624a6537d6bb0 +size 4278215736 diff --git a/model-00027-of-00049.safetensors b/model-00027-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..542a4ef1ec54d162512195de35fe8a005df0bf63 --- /dev/null +++ b/model-00027-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993698106aac5f41693dd24b8cb41aba538465e120f036f5c6b6b70fd4604957 +size 4278215736 diff --git a/model-00028-of-00049.safetensors b/model-00028-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b20f5e5aa343d90209a66cd2e7919ae594765815 --- /dev/null +++ b/model-00028-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b919cd73988eb8307dceebccb843ac53c300f21acf9021be0963ab2c75060f +size 4932552328 diff --git a/model-00029-of-00049.safetensors b/model-00029-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92e5128bb6be6acceb6eefe59fca91f39d4bd5ef --- /dev/null +++ b/model-00029-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:794c7f079b996e136ad7289e5a6e0391a394806ef1cfe1291318f697c25117ec +size 4278215736 diff --git a/model-00030-of-00049.safetensors b/model-00030-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a7b6be32a77069caf11eb8497785c7b7c87e6f3 --- /dev/null +++ b/model-00030-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:191a21628c2f79d145fb6433a9e54970b9da5361e5c2ef6b391d4e7837f5597a +size 4278215736 diff --git a/model-00031-of-00049.safetensors b/model-00031-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3914fe146890abd9d1dbdd4f0c94e5ea9cff2fa --- /dev/null +++ b/model-00031-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6879c30ee17f41da17f4fe5a5a63ed8a94eb3e152365c4708ba938fe134aef7f +size 4932552328 diff --git a/model-00032-of-00049.safetensors b/model-00032-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a46b8ccdb87885a999764da4a76cb34769fec6c --- /dev/null +++ b/model-00032-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd0a11c38c35cb25537299dd2134371397a4ec8ea58834307211e0fa0f59e45 +size 4278215736 diff --git a/model-00033-of-00049.safetensors b/model-00033-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8df0d6f0c8c5e4c8cef8549e0344e99d82ef5342 --- /dev/null +++ b/model-00033-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a69d8380303ee44a141da7663bdd0d3450859c8c3d23d241ac0a4e0df0c40fb1 +size 4278215736 diff --git a/model-00034-of-00049.safetensors b/model-00034-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f344f2574a963aebfdad350d8b27daa6fce14a1 --- /dev/null +++ b/model-00034-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17abf9665eb0d94d85db4861f8cddcc349b43413265714f3a139fb3cdfb26d0e +size 4932552328 diff --git a/model-00035-of-00049.safetensors b/model-00035-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f4e681c792fc58f7468aec11170ff8440695ee2 --- /dev/null +++ b/model-00035-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14011308805a1afc5c90d70d6960c163dd5794d97e9003a6cc113c67e75d94df +size 4278215736 diff --git a/model-00036-of-00049.safetensors b/model-00036-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00914cb1161e5798b9956f2dec5a052b5044e3a5 --- /dev/null +++ b/model-00036-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e26649bcc957f593131f272785d4bc3a0f8e091f2cd26b554a577d0ac4080df +size 4278215736 diff --git a/model-00037-of-00049.safetensors b/model-00037-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16c7e2b32aee73740c8aa422e291385e519993fd --- /dev/null +++ b/model-00037-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf1dbbdbbeb1a8efd19c9b7c341618a45cbd8380d3f4faf8a6beb75e6b9aaff2 +size 4932552328 diff --git a/model-00038-of-00049.safetensors b/model-00038-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..72249a2eef0df27f789f8c760512e105929f08f0 --- /dev/null +++ b/model-00038-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b01ab759be29632dd244006903fb7dcc04f624c4fe695ecc3069264038f944 +size 4278215736 diff --git a/model-00039-of-00049.safetensors b/model-00039-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e1c6e99ba9fa95582ebcf38f8f3179d47a73248 --- /dev/null +++ b/model-00039-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26588dcab9870f762679b07cd234f19ea6fd3f5e7422b97e0f868e2f7ad6ae49 +size 4278215736 diff --git a/model-00040-of-00049.safetensors b/model-00040-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d4288c7c326f597d5f5544dd92e66345d0dbe53 --- /dev/null +++ b/model-00040-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e52a9050c007879bc127d9d1efe24358abced40d4844d517980adf22aaf706 +size 4932552328 diff --git a/model-00041-of-00049.safetensors b/model-00041-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d739f6776a5120467a047d4127b35b605509e309 --- /dev/null +++ b/model-00041-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f6959108a28c5599cbb975b1f3a46eac68d7aecc750337a63a3b645896daf6 +size 4278215736 diff --git a/model-00042-of-00049.safetensors b/model-00042-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..809a781a0a3e2e3c3a5d40f801eb60fa2fbd0857 --- /dev/null +++ b/model-00042-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fb5533884a81a9153eebf9cf05a21172b86564b3bc45285e72db8eb0d01d8a3 +size 4278215736 diff --git a/model-00043-of-00049.safetensors b/model-00043-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a25f4d3c3d5d60ead2e560b7e87dcf2d791e85f0 --- /dev/null +++ b/model-00043-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e7314eccfa70a7a9e8a958ff2b8bf9a7fe78246bbe0416344c70389810d782 +size 4932552328 diff --git a/model-00044-of-00049.safetensors b/model-00044-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ca4774741c0ceffcf8f5510de1de50d8e1a7987 --- /dev/null +++ b/model-00044-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c2ebca97413f58be101ba40863e0c28648aecb96eb42eb32bad04de24fe642 +size 4278215736 diff --git a/model-00045-of-00049.safetensors b/model-00045-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93e39175814065859f427db34a1a745ddb2a381a --- /dev/null +++ b/model-00045-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c452c6707c37021f09ab02be75fdca1ac21cc129d02f8fa36e7c4e30dae87d6 +size 4278215736 diff --git a/model-00046-of-00049.safetensors b/model-00046-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fec0ba80fca1dc22eb98a51ee06413c5007ea4e --- /dev/null +++ b/model-00046-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f59be0bfde6fbe198b5b5aecc4cee1678a8b36195f1a916ad84299bff489b81 +size 4932552328 diff --git a/model-00047-of-00049.safetensors b/model-00047-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f7fb0b3d2d79dfa90365632f662ec3647841f1c --- /dev/null +++ b/model-00047-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88d99bf82a1073862e2b820fa0983b589e510e34d0325323cbc2a5a267b24df +size 4278215736 diff --git a/model-00048-of-00049.safetensors b/model-00048-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5969397b9585c1fd7ecaad14a7ef08a77ae1c84 --- /dev/null +++ b/model-00048-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd602689d69f4941e5f8a20d90876d7edd143fdb44c489474b00469e8b5e6d5 +size 4278215736 diff --git a/model-00049-of-00049.safetensors b/model-00049-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1bd42ada391f41b7c2f178e225482a7e0b22d111 --- /dev/null +++ b/model-00049-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a41d789c38919957e24371aab5ce76cf1341bf2db019d99aacff751d63a657d8 +size 4278265088 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..847668e4f8456c745ef364f9eb8ce175139cd29b --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,521 @@ +{ + "metadata": { + "total_size": 222115160064 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00049.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00049.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00049.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.10.input_layernorm.weight": "model-00010-of-00049.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.11.input_layernorm.weight": "model-00010-of-00049.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.12.input_layernorm.weight": "model-00011-of-00049.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.13.input_layernorm.weight": "model-00012-of-00049.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.14.input_layernorm.weight": "model-00013-of-00049.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.15.input_layernorm.weight": "model-00013-of-00049.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.16.input_layernorm.weight": "model-00014-of-00049.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.17.input_layernorm.weight": "model-00015-of-00049.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.18.input_layernorm.weight": "model-00016-of-00049.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.19.input_layernorm.weight": "model-00016-of-00049.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.2.input_layernorm.weight": "model-00004-of-00049.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.20.input_layernorm.weight": "model-00017-of-00049.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.21.input_layernorm.weight": "model-00018-of-00049.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.22.input_layernorm.weight": "model-00019-of-00049.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.23.input_layernorm.weight": "model-00019-of-00049.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.24.input_layernorm.weight": "model-00020-of-00049.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.25.input_layernorm.weight": "model-00021-of-00049.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.26.input_layernorm.weight": "model-00022-of-00049.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.27.input_layernorm.weight": "model-00022-of-00049.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.28.input_layernorm.weight": "model-00023-of-00049.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.29.input_layernorm.weight": "model-00024-of-00049.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-00049.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.30.input_layernorm.weight": "model-00025-of-00049.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.31.input_layernorm.weight": "model-00025-of-00049.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.32.input_layernorm.weight": "model-00026-of-00049.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.33.input_layernorm.weight": "model-00027-of-00049.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.34.input_layernorm.weight": "model-00028-of-00049.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.35.input_layernorm.weight": "model-00028-of-00049.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.36.input_layernorm.weight": "model-00029-of-00049.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.37.input_layernorm.weight": "model-00030-of-00049.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.38.input_layernorm.weight": "model-00031-of-00049.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.39.input_layernorm.weight": "model-00031-of-00049.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-00049.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.40.input_layernorm.weight": "model-00032-of-00049.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.41.input_layernorm.weight": "model-00033-of-00049.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.42.input_layernorm.weight": "model-00034-of-00049.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.43.input_layernorm.weight": "model-00034-of-00049.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.44.input_layernorm.weight": "model-00035-of-00049.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.45.input_layernorm.weight": "model-00036-of-00049.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.46.input_layernorm.weight": "model-00037-of-00049.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.47.input_layernorm.weight": "model-00037-of-00049.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.48.input_layernorm.weight": "model-00038-of-00049.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.49.input_layernorm.weight": "model-00039-of-00049.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.5.input_layernorm.weight": "model-00006-of-00049.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.50.input_layernorm.weight": "model-00040-of-00049.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.51.input_layernorm.weight": "model-00040-of-00049.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.52.input_layernorm.weight": "model-00041-of-00049.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.53.input_layernorm.weight": "model-00042-of-00049.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.54.input_layernorm.weight": "model-00043-of-00049.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.55.input_layernorm.weight": "model-00043-of-00049.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.56.input_layernorm.weight": "model-00044-of-00049.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.57.input_layernorm.weight": "model-00045-of-00049.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.58.input_layernorm.weight": "model-00046-of-00049.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.59.input_layernorm.weight": "model-00046-of-00049.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.6.input_layernorm.weight": "model-00007-of-00049.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.60.input_layernorm.weight": "model-00047-of-00049.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.61.input_layernorm.weight": "model-00048-of-00049.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.62.input_layernorm.weight": "model-00049-of-00049.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.63.input_layernorm.weight": "model-00049-of-00049.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.7.input_layernorm.weight": "model-00007-of-00049.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.8.input_layernorm.weight": "model-00008-of-00049.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.9.input_layernorm.weight": "model-00009-of-00049.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00008-of-00049.safetensors", + "model.norm.weight": "model-00049-of-00049.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8eb9ef63af01dfa2c350573b543b14275370944 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|END_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3e13c779896a87c94c9e0df9482975c8b8485265 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:953b2730d23ca19e7dca96f75f3e10b497bb679290b06d8981190bff2039fc72 +size 20124922 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..58a110fc1da0cacbac440922392f8b73d9b0a30e --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,351 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255000": { + "content": "<|START_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255001": { + "content": "<|END_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255002": { + "content": "<|YES_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255003": { + "content": "<|NO_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255004": { + "content": "<|GOOD_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255005": { + "content": "<|BAD_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255006": { + "content": "<|USER_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255007": { + "content": "<|CHATBOT_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255008": { + "content": "<|SYSTEM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255009": { + "content": "<|USER_0_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255010": { + "content": "<|USER_1_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255011": { + "content": "<|USER_2_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255012": { + "content": "<|USER_3_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255013": { + "content": "<|USER_4_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255014": { + "content": "<|USER_5_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255015": { + "content": "<|USER_6_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255016": { + "content": "<|USER_7_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255017": { + "content": "<|USER_8_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255018": { + "content": "<|USER_9_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255019": { + "content": "<|START_THINKING|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255020": { + "content": "<|END_THINKING|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255021": { + "content": "<|START_RESPONSE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255022": { + "content": "<|END_RESPONSE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255023": { + "content": "<|START_ACTION|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255024": { + "content": "<|END_ACTION|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255025": { + "content": "<|START_TOOL_RESULT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255026": { + "content": "<|END_TOOL_RESULT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255027": { + "content": "<|EXTRA_8_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255028": { + "content": "<|NEW_FILE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255029": { + "content": "<|BEGINNING_OF_PREFIX_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255030": { + "content": "<|BEGINNING_OF_MIDDLE_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255031": { + "content": "<|BEGINNING_OF_SUFFIX_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255032": { + "content": "<|END_OF_MIDDLE_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + '\n\n' }}{% set loop_messages = messages[1:] %}{% else %}{{ 'Below are some instructions that describe some tasks. Write responses that appropriately complete each request.' + '\n\n' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\n' + message['content'] + eos_token + '\n\n' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response:\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|END_OF_TURN_TOKEN|>", + "extra_special_tokens": {}, + "legacy": true, + "merges_file": null, + "model_max_length": 262144, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "CohereTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "vocab_file": null +}