msr2000 commited on
Commit
ba658fb
·
verified ·
1 Parent(s): e91970e

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. LICENSE +21 -0
  2. config.json +66 -0
  3. generation_config.json +9 -0
  4. model-00102-of-000163.safetensors +3 -0
  5. model-00103-of-000163.safetensors +3 -0
  6. model-00104-of-000163.safetensors +3 -0
  7. model-00105-of-000163.safetensors +3 -0
  8. model-00106-of-000163.safetensors +3 -0
  9. model-00107-of-000163.safetensors +3 -0
  10. model-00108-of-000163.safetensors +3 -0
  11. model-00109-of-000163.safetensors +3 -0
  12. model-00110-of-000163.safetensors +3 -0
  13. model-00111-of-000163.safetensors +3 -0
  14. model-00112-of-000163.safetensors +3 -0
  15. model-00113-of-000163.safetensors +3 -0
  16. model-00114-of-000163.safetensors +3 -0
  17. model-00115-of-000163.safetensors +3 -0
  18. model-00116-of-000163.safetensors +3 -0
  19. model-00117-of-000163.safetensors +3 -0
  20. model-00118-of-000163.safetensors +3 -0
  21. model-00119-of-000163.safetensors +3 -0
  22. model-00120-of-000163.safetensors +3 -0
  23. model-00121-of-000163.safetensors +3 -0
  24. model-00122-of-000163.safetensors +3 -0
  25. model-00123-of-000163.safetensors +3 -0
  26. model-00124-of-000163.safetensors +3 -0
  27. model-00125-of-000163.safetensors +3 -0
  28. model-00126-of-000163.safetensors +3 -0
  29. model-00127-of-000163.safetensors +3 -0
  30. model-00128-of-000163.safetensors +3 -0
  31. model-00129-of-000163.safetensors +3 -0
  32. model-00130-of-000163.safetensors +3 -0
  33. model-00131-of-000163.safetensors +3 -0
  34. model-00132-of-000163.safetensors +3 -0
  35. model-00133-of-000163.safetensors +3 -0
  36. model-00134-of-000163.safetensors +3 -0
  37. model-00135-of-000163.safetensors +3 -0
  38. model-00136-of-000163.safetensors +3 -0
  39. model-00137-of-000163.safetensors +3 -0
  40. model-00138-of-000163.safetensors +3 -0
  41. model-00139-of-000163.safetensors +3 -0
  42. model-00140-of-000163.safetensors +3 -0
  43. model-00141-of-000163.safetensors +3 -0
  44. model-00142-of-000163.safetensors +3 -0
  45. model-00143-of-000163.safetensors +3 -0
  46. model-00144-of-000163.safetensors +3 -0
  47. model-00145-of-000163.safetensors +3 -0
  48. model-00146-of-000163.safetensors +3 -0
  49. model-00147-of-000163.safetensors +3 -0
  50. model-00148-of-000163.safetensors +3 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 DeepSeek
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV32ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 1,
9
+ "ep_size": 1,
10
+ "first_k_dense_replace": 3,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 7168,
13
+ "index_head_dim": 128,
14
+ "index_n_heads": 64,
15
+ "index_topk": 2048,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 18432,
18
+ "kv_lora_rank": 512,
19
+ "max_position_embeddings": 163840,
20
+ "model_type": "deepseek_v32",
21
+ "moe_intermediate_size": 2048,
22
+ "moe_layer_freq": 1,
23
+ "n_group": 8,
24
+ "n_routed_experts": 256,
25
+ "n_shared_experts": 1,
26
+ "norm_topk_prob": true,
27
+ "num_attention_heads": 128,
28
+ "num_experts_per_tok": 8,
29
+ "num_hidden_layers": 61,
30
+ "num_key_value_heads": 128,
31
+ "num_nextn_predict_layers": 1,
32
+ "q_lora_rank": 1536,
33
+ "qk_nope_head_dim": 128,
34
+ "qk_rope_head_dim": 64,
35
+ "quantization_config": {
36
+ "activation_scheme": "dynamic",
37
+ "fmt": "e4m3",
38
+ "quant_method": "fp8",
39
+ "scale_fmt": "ue8m0",
40
+ "weight_block_size": [
41
+ 128,
42
+ 128
43
+ ]
44
+ },
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": {
47
+ "beta_fast": 32,
48
+ "beta_slow": 1,
49
+ "factor": 40,
50
+ "mscale": 1.0,
51
+ "mscale_all_dim": 1.0,
52
+ "original_max_position_embeddings": 4096,
53
+ "type": "yarn"
54
+ },
55
+ "rope_theta": 10000,
56
+ "routed_scaling_factor": 2.5,
57
+ "scoring_func": "sigmoid",
58
+ "tie_word_embeddings": false,
59
+ "topk_group": 4,
60
+ "topk_method": "noaux_tc",
61
+ "torch_dtype": "bfloat16",
62
+ "transformers_version": "4.44.2",
63
+ "use_cache": true,
64
+ "v_head_dim": 128,
65
+ "vocab_size": 129280
66
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "do_sample": true,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.46.3"
9
+ }
model-00102-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd588fd7aaaaaebfbb451ee3d2c85326374001ca1dc2d7f5be480b1bce78c9d
3
+ size 4302384914
model-00103-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3acb6068a0f4dbe7747a4ebf0f440fae261670834ed97d01b1b7c42ecdc11a73
3
+ size 4302122764
model-00104-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7ee6148cc8563cd92fa76dd914b36a75c6536156f7f8266a11ad4bf1226e6de
3
+ size 4302384516
model-00105-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88dd1665e59e94fc21f35904c89ccc63bae0dc45370b6b145e12cdef60e425bf
3
+ size 4302384961
model-00106-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec388cd8f9d432ee018ba6f27c749ef22a70cfc00422612abed6976b5c9f8db
3
+ size 4302122576
model-00107-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:897f8e2dd87c98d68e4bb106da491cea3378d553bfe3a379d8a5e8110a767f70
3
+ size 4302384704
model-00108-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a25ec05acd29f3fcc6233962a9f69e07b3c9c8008bfa78c530e5c95dc5e6292
3
+ size 4302384963
model-00109-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8be69408eacbb01af73a218409646d90d5c0c27fd0bc79f909c862e8288b5464
3
+ size 4302122398
model-00110-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a65a239cc05d061877e0a9e2b1ccb238d010c0f3c80240cc5a0f56309442b194
3
+ size 4302384890
model-00111-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e483a7229f00f791e14c0c5999452893371ce5ab3d1acb26b2a1099490ac437
3
+ size 4302122786
model-00112-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64cf509f9304eb7e1abd67e10e687aa8942d5401076540702355cc3a7bfe15c6
3
+ size 4302384494
model-00113-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f07054b0b122a7bc972db459e894dbcaa7aad3b5fa6dd75722e178d9d248fdb
3
+ size 4302384963
model-00114-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5f9b51692a50761880c6845182210611214b1e7e9e769f645d6538a4314c9ce
3
+ size 4302122598
model-00115-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb5b8951313bf70ade90691bf7f870b49d2aa4d9440ed1fe101eda3b00f1c7f4
3
+ size 4302384680
model-00116-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e059ed5c2238758326e3d993e5100ac875fee2212dc81cacc0503d12dd3ccb7
3
+ size 4302384963
model-00117-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eb47e82aeae4ef6c5b20b243fc628b6de3d6ae8c4a7d63e0dc8c907ebdb00e6
3
+ size 4302122420
model-00118-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8667166977c09c6b8b2ff212c1dd85414b3ede6a8b887c63b9d52622e6c64fa1
3
+ size 4302384870
model-00119-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd011f63a9d861bccbcd7510a646e1dc13bb47a44e0fc29f94ea45032bdf02a0
3
+ size 4302122808
model-00120-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e84b88c644b3f32604c23d5526ff74f9def70a291bbf7643c8572bc1a88b63
3
+ size 4302384470
model-00121-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de00ff563eae8ec1f459d966be372c96302b73e3998a74b2b71da2c521787a54
3
+ size 4302384963
model-00122-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7db77a4dd01a7ade0d78be052a0ad69e60d7b5b6eb6d5134dc923124f1d2fd9
3
+ size 1864917414
model-00123-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0fc1ff16dcf3a05925f0f69d80e19b5eb2c50c8ab78d41f10971188c5c2e321
3
+ size 4302061105
model-00124-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ecab8be5b95a333252e3067465f66e50cffbd356575b79dafcd88e3a50f3b2
3
+ size 4302384914
model-00125-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a59f70eb21a2f80599ca43cfaa573c09d50d8ec9a876eecfeef7aec22e67a4
3
+ size 4302122764
model-00126-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:021a8c97b7ab0f7c631c9c9ffa3eab0e7650079dceb68d7461e1a2dd1b901a7a
3
+ size 4302384516
model-00127-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:416271db6327f2d28193ef06d5750de3a7e5326f28dea74b4f9ebc84f01777c9
3
+ size 4302384961
model-00128-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81f47704507f4ff9580e6e090e11545d0841a0a338ea6f1deb2486151c8fc472
3
+ size 4302122576
model-00129-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bae27016f20e43578b3a1341dcb3d4ebaa1d3b295f3da8e11f511709057cea3
3
+ size 4302384704
model-00130-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d16fefaead36baba56ea16e142a686fa7cb63b5025e7bc9a006fa1db73372ffc
3
+ size 4302384963
model-00131-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7ea72470022c61d534fa534dc71b0767659708b4247e935fa343a35b2dc1a82
3
+ size 4302122398
model-00132-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca13cf0c562ab38251b09c1b65bd00cd350a201a4160b0a93bbaf30da4616873
3
+ size 4302384890
model-00133-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c419e475733fafb612e59a266212109ac6d1ae5d9c8e0ae909fe0aa6cead16
3
+ size 4302122786
model-00134-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0dd13676ced9f26ce7c092353b1152898a2592a6eb7913b141a586430ea7bb8
3
+ size 4302384494
model-00135-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6979bb49b28a77db606491550e27ea6018305aa398bf9e6820ddd20525eab58
3
+ size 4302384963
model-00136-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f43ec1b90698dd18cc5f7fd5671cc8f98f47c02cf711ed14022bcbf5e05139fb
3
+ size 4302122598
model-00137-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e4a0dc5e67a1e0f39dab37674fd31cbfda1e782ef72f719b7ee95a8227a7fbf
3
+ size 4302384680
model-00138-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74769133c5f5ea0505286799bfd18387c46094418cae846a379c229d9d810e65
3
+ size 4302384963
model-00139-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:582e2f9d71249b963278a22c5aacfa91eb3e99d966872b8372818d56e6c747ec
3
+ size 4302122420
model-00140-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a07d2c16ad9b89c9efac647bcbcb3175f5897856f045620d2e689af74c879420
3
+ size 4302384870
model-00141-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:267a3bd6abccfef3c60def0bf3b11e5e7d1a74e31d12b8c972730d40c216d6ae
3
+ size 3245204828
model-00142-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00290593119bbd683fc7f60450a74773f2c7cff86a62df3965bb3542acf1924f
3
+ size 4302061105
model-00143-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54717d9742a5265bb19eaeef3dccc4869dda4ef559178f569387d67e994aface
3
+ size 4302384914
model-00144-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d02691e8c9fb2f252eaaa381938128c1fa689b39262f654d560daf98b61e0c0a
3
+ size 4302122764
model-00145-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:063580b988345e9d295e8d186db35ae4c25af4d688381b8d0195af040b84582f
3
+ size 4302384516
model-00146-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:062e0dc7bcab035f296c8b3964a36397c156213de4fd9f987fc6c8dd9f3390a3
3
+ size 4302384961
model-00147-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18ce8926102d836d20d87575cd809bdd245407d1467fba29970d2d5b11d7f0e0
3
+ size 4302122576
model-00148-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42dec5e5e2c6da561bd4064be318ea7940da1a881999af826a0bb74640b79cf6
3
+ size 4302384704